summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java21
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java313
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java17
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java129
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/KMeans.java307
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java21
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java566
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java21
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java310
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java71
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java74
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java49
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java176
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java177
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java213
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java78
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java87
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java26
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java163
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java123
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java64
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java84
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java66
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java69
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java57
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java19
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java18
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java43
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java71
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java74
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java123
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java70
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java35
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java29
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OUTRES.java368
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java47
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java195
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/SOD.java176
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java45
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java30
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java17
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java18
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java26
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java18
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java7
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java212
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java235
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/package-info.java2
124 files changed, 4111 insertions, 1470 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java b/src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java
index 5cfb7073..fc346cd9 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java
index 61a695eb..30e6e226 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -126,6 +126,9 @@ public abstract class AbstractAlgorithm<R extends Result> implements Algorithm {
if(e.getTargetException() instanceof RuntimeException) {
throw (RuntimeException) e.getTargetException();
}
+ if(e.getTargetException() instanceof AssertionError) {
+ throw (AssertionError) e.getTargetException();
+ }
throw new APIViolationException("Invoking the real 'run' method failed: " + e.getTargetException().toString(), e.getTargetException());
}
}
@@ -147,6 +150,9 @@ public abstract class AbstractAlgorithm<R extends Result> implements Algorithm {
if(e.getTargetException() instanceof RuntimeException) {
throw (RuntimeException) e.getTargetException();
}
+ if(e.getTargetException() instanceof AssertionError) {
+ throw (AssertionError) e.getTargetException();
+ }
throw new APIViolationException("Invoking the real 'run' method failed: " + e.getTargetException().toString(), e.getTargetException());
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java
index 394ea55b..70d4ba3a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java
index ad49563a..4fa12e11 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java
index 40f24914..7c6f0dc5 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java b/src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java
index 86fc3e51..0ecfb228 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.algorithm;
*/
import java.text.NumberFormat;
-import java.util.List;
import java.util.Locale;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -34,9 +33,8 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
@@ -97,7 +95,7 @@ public class DependencyDerivator<V extends NumberVector<V, ?>, D extends Distanc
public static final OptionID OUTPUT_ACCURACY_ID = OptionID.getOrCreateOptionID("derivator.accuracy", "Threshold for output accuracy fraction digits.");
/**
- * Optional parameter to specify the treshold for the size of the random
+ * Optional parameter to specify the threshold for the size of the random
* sample to use, must be an integer greater than 0.
* <p/>
* Default value: the size of the complete dataset
@@ -159,17 +157,12 @@ public class DependencyDerivator<V extends NumberVector<V, ?>, D extends Distanc
DBIDs ids;
if(this.sampleSize > 0) {
if(randomsample) {
- ids = DBIDUtil.randomSample(relation.getDBIDs(), this.sampleSize, 1);
+ ids = DBIDUtil.randomSample(relation.getDBIDs(), this.sampleSize, 1l);
}
else {
DistanceQuery<V, D> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
- List<DistanceResultPair<D>> queryResults = database.getKNNQuery(distanceQuery, this.sampleSize).getKNNForObject(centroidDV, this.sampleSize);
- ModifiableDBIDs tids = DBIDUtil.newHashSet(this.sampleSize);
- for(DistanceResultPair<D> qr : queryResults) {
- tids.add(qr.getDBID());
- }
- // Cast to non-modifiable
- ids = tids;
+ KNNResult<D> queryResults = database.getKNNQuery(distanceQuery, this.sampleSize).getKNNForObject(centroidDV, this.sampleSize);
+ ids = DBIDUtil.newHashSet(queryResults.asDBIDs());
}
}
else {
@@ -243,7 +236,7 @@ public class DependencyDerivator<V extends NumberVector<V, ?>, D extends Distanc
// +1 == + B.getColumnDimensionality()
Matrix gaussJordan = new Matrix(transposedWeakEigenvectors.getRowDimensionality(), transposedWeakEigenvectors.getColumnDimensionality() + 1);
gaussJordan.setMatrix(0, transposedWeakEigenvectors.getRowDimensionality() - 1, 0, transposedWeakEigenvectors.getColumnDimensionality() - 1, transposedWeakEigenvectors);
- gaussJordan.setColumnVector(transposedWeakEigenvectors.getColumnDimensionality(), B);
+ gaussJordan.setCol(transposedWeakEigenvectors.getColumnDimensionality(), B);
if(logger.isDebuggingFiner()) {
logger.debugFiner("Gauss-Jordan-Elimination of " + FormatUtil.format(gaussJordan, NF));
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java
index e403c623..168c69f1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -39,12 +39,12 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
/**
- * Dummy Algorithm, which just iterates over all points once, doing a 10NN query
+ * Dummy algorithm, which just iterates over all points once, doing a 10NN query
* each. Useful in testing e.g. index structures and as template for custom
- * algorithms. If you are looking for an algorithm that does <em>nothing</em>,
+ * algorithms. While this algorithm doesn't produce a result, it
+ * still performs rather expensive operations. If you are looking for an algorithm that does <em>nothing</em>,
* you must use {@link de.lmu.ifi.dbs.elki.algorithm.NullAlgorithm
- * NullAlgorithm} instead. While this algorithm doesn't produce a result, it
- * still performs rather expensive operations.
+ * NullAlgorithm} instead.
*
* @author Erich Schubert
* @param <O> Vector type
@@ -52,7 +52,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
* @apiviz.uses KNNQuery
*/
@Title("Dummy Algorithm")
-@Description("The algorithm executes a euclidena 10NN query on all data points, and can be used in unit testing")
+@Description("The algorithm executes an Euclidean 10NN query on all data points, and can be used in unit testing")
public class DummyAlgorithm<O extends NumberVector<?, ?>> extends AbstractAlgorithm<Result> {
/**
* The logger for this class.
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java b/src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java
index 35065973..ac1820f9 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -32,9 +32,9 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
@@ -117,9 +117,8 @@ public class KNNDistanceOrder<O, D extends Distance<D>> extends AbstractDistance
List<D> knnDistances = new ArrayList<D>(relation.size());
for(DBID id : relation.iterDBIDs()) {
if(random.nextDouble() < percentage) {
- final List<DistanceResultPair<D>> neighbors = knnQuery.getKNNForDBID(id, k);
- final int last = Math.min(k - 1, neighbors.size() - 1);
- knnDistances.add(neighbors.get(last).getDistance());
+ final KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k);
+ knnDistances.add(neighbors.getKNNDistance());
}
}
Collections.sort(knnDistances, Collections.reverseOrder());
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java b/src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java
index 6dac350d..3cbfe143 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -37,20 +37,24 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.index.tree.LeafEntry;
import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry;
import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialIndexTree;
import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialNode;
+import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialPointLeafEntry;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNList;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
@@ -66,7 +70,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* This algorithm only supports spatial databases based on a spatial index
* structure.
*
+ * Since this method compares the MBR of every single leaf with every other
+ * leaf, it is essentially quadratic in the number of leaves, which may not be
+ * appropriate for large trees.
+ *
* @author Elke Achtert
+ * @author Erich Schubert
+ *
* @param <V> the type of FeatureVector handled by this Algorithm
* @param <D> the type of Distance used by this Algorithm
* @param <N> the type of node used in the spatial index structure
@@ -110,7 +120,7 @@ public class KNNJoin<V extends NumberVector<V, ?>, D extends Distance<D>, N exte
* {@link SpatialPrimitiveDistanceFunction}.
*/
@SuppressWarnings("unchecked")
- public DataStore<KNNList<D>> run(Database database, Relation<V> relation) throws IllegalStateException {
+ public WritableDataStore<KNNList<D>> run(Database database, Relation<V> relation) throws IllegalStateException {
if(!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction)) {
throw new IllegalStateException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
}
@@ -121,117 +131,222 @@ public class KNNJoin<V extends NumberVector<V, ?>, D extends Distance<D>, N exte
// FIXME: Ensure were looking at the right relation!
SpatialIndexTree<N, E> index = indexes.iterator().next();
SpatialPrimitiveDistanceFunction<V, D> distFunction = (SpatialPrimitiveDistanceFunction<V, D>) getDistanceFunction();
- DistanceQuery<V, D> distq = database.getDistanceQuery(relation, distFunction);
-
DBIDs ids = relation.getDBIDs();
- WritableDataStore<KNNHeap<D>> knnHeaps = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, KNNHeap.class);
+ // Optimize for double?
+ final boolean doubleOptimize = (getDistanceFunction() instanceof SpatialPrimitiveDoubleDistanceFunction);
- try {
- // data pages of s
- List<E> ps_candidates = index.getLeaves();
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress(this.getClass().getName(), relation.size(), logger) : null;
- IndefiniteProgress pageprog = logger.isVerbose() ? new IndefiniteProgress("Number of processed data pages", logger) : null;
- if(logger.isDebugging()) {
- logger.debugFine("# ps = " + ps_candidates.size());
- }
- // data pages of r
- List<E> pr_candidates = new ArrayList<E>(ps_candidates);
- if(logger.isDebugging()) {
- logger.debugFine("# pr = " + pr_candidates.size());
- }
- int processed = 0;
- int processedPages = 0;
- boolean up = true;
- for(E pr_entry : pr_candidates) {
- N pr = index.getNode(pr_entry);
- D pr_knn_distance = distq.infiniteDistance();
- if(logger.isDebugging()) {
- logger.debugFine(" ------ PR = " + pr);
- }
- // create for each data object a knn list
- for(int j = 0; j < pr.getNumEntries(); j++) {
- knnHeaps.put(((LeafEntry) pr.getEntry(j)).getDBID(), new KNNHeap<D>(k, distq.infiniteDistance()));
- }
+ // data pages
+ List<E> ps_candidates = new ArrayList<E>(index.getLeaves());
+ // knn heaps
+ List<List<KNNHeap<D>>> heaps = new ArrayList<List<KNNHeap<D>>>(ps_candidates.size());
+ Heap<Task> pq = new Heap<Task>(ps_candidates.size() * ps_candidates.size() / 10);
- if(up) {
- for(E ps_entry : ps_candidates) {
- D distance = distFunction.minDist(pr_entry, ps_entry);
+ // Initialize with the page self-pairing
+ for(int i = 0; i < ps_candidates.size(); i++) {
+ E pr_entry = ps_candidates.get(i);
+ N pr = index.getNode(pr_entry);
+ heaps.add(initHeaps(distFunction, doubleOptimize, pr));
+ }
- if(distance.compareTo(pr_knn_distance) <= 0) {
- N ps = index.getNode(ps_entry);
- pr_knn_distance = processDataPages(distq, pr, ps, knnHeaps, pr_knn_distance);
- }
- }
- up = false;
+ // Build priority queue
+ final int sqsize = ps_candidates.size() * (ps_candidates.size() - 1) / 2;
+ if(logger.isDebuggingFine()) {
+ logger.debugFine("Number of leaves: " + ps_candidates.size() + " so " + sqsize + " MBR computations.");
+ }
+ FiniteProgress mprogress = logger.isVerbose() ? new FiniteProgress("Comparing leaf MBRs", sqsize, logger) : null;
+ for(int i = 0; i < ps_candidates.size(); i++) {
+ E pr_entry = ps_candidates.get(i);
+ List<KNNHeap<D>> pr_heaps = heaps.get(i);
+ D pr_knn_distance = computeStopDistance(pr_heaps);
+
+ for(int j = i + 1; j < ps_candidates.size(); j++) {
+ E ps_entry = ps_candidates.get(j);
+ List<KNNHeap<D>> ps_heaps = heaps.get(j);
+ D ps_knn_distance = computeStopDistance(ps_heaps);
+ D minDist = distFunction.minDist(pr_entry, ps_entry);
+ // Resolve immediately:
+ if(minDist.isNullDistance()) {
+ N pr = index.getNode(ps_candidates.get(i));
+ N ps = index.getNode(ps_candidates.get(j));
+ processDataPagesOptimize(distFunction, doubleOptimize, pr_heaps, ps_heaps, pr, ps);
+ }
+ else if(minDist.compareTo(pr_knn_distance) <= 0 || minDist.compareTo(ps_knn_distance) <= 0) {
+ pq.add(new Task(minDist, i, j));
}
+ if(mprogress != null) {
+ mprogress.incrementProcessed(logger);
+ }
+ }
+ }
+ if(mprogress != null) {
+ mprogress.ensureCompleted(logger);
+ }
+ // Process the queue
+ FiniteProgress qprogress = logger.isVerbose() ? new FiniteProgress("Processing queue", pq.size(), logger) : null;
+ IndefiniteProgress fprogress = logger.isVerbose() ? new IndefiniteProgress("Full comparisons", logger) : null;
+ while(!pq.isEmpty()) {
+ Task task = pq.poll();
+ List<KNNHeap<D>> pr_heaps = heaps.get(task.i);
+ List<KNNHeap<D>> ps_heaps = heaps.get(task.j);
+ D pr_knn_distance = computeStopDistance(pr_heaps);
+ D ps_knn_distance = computeStopDistance(ps_heaps);
+ boolean dor = task.mindist.compareTo(pr_knn_distance) <= 0;
+ boolean dos = task.mindist.compareTo(ps_knn_distance) <= 0;
+ if(dor || dos) {
+ N pr = index.getNode(ps_candidates.get(task.i));
+ N ps = index.getNode(ps_candidates.get(task.j));
+ if(dor && dos) {
+ processDataPagesOptimize(distFunction, doubleOptimize, pr_heaps, ps_heaps, pr, ps);
+ }
else {
- for(int s = ps_candidates.size() - 1; s >= 0; s--) {
- E ps_entry = ps_candidates.get(s);
- D distance = distFunction.minDist(pr_entry, ps_entry);
-
- if(distance.compareTo(pr_knn_distance) <= 0) {
- N ps = index.getNode(ps_entry);
- pr_knn_distance = processDataPages(distq, pr, ps, knnHeaps, pr_knn_distance);
- }
+ if(dor) {
+ processDataPagesOptimize(distFunction, doubleOptimize, pr_heaps, null, pr, ps);
+ }
+ else /* dos */{
+ processDataPagesOptimize(distFunction, doubleOptimize, ps_heaps, null, ps, pr);
}
- up = true;
}
+ if(fprogress != null) {
+ fprogress.incrementProcessed(logger);
+ }
+ }
+ if(qprogress != null) {
+ qprogress.incrementProcessed(logger);
+ }
+ }
+ if(qprogress != null) {
+ qprogress.ensureCompleted(logger);
+ }
+ if(fprogress != null) {
+ fprogress.setCompleted(logger);
+ }
- processed += pr.getNumEntries();
+ WritableDataStore<KNNList<D>> knnLists = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, KNNList.class);
+ // FiniteProgress progress = logger.isVerbose() ? new
+ // FiniteProgress(this.getClass().getName(), relation.size(), logger) :
+ // null;
+ FiniteProgress pageprog = logger.isVerbose() ? new FiniteProgress("Number of processed data pages", ps_candidates.size(), logger) : null;
+ // int processed = 0;
+ for(int i = 0; i < ps_candidates.size(); i++) {
+ N pr = index.getNode(ps_candidates.get(i));
+ List<KNNHeap<D>> pr_heaps = heaps.get(i);
- if(progress != null && pageprog != null) {
- progress.setProcessed(processed, logger);
- pageprog.setProcessed(processedPages++, logger);
- }
+ // Finalize lists
+ for(int j = 0; j < pr.getNumEntries(); j++) {
+ knnLists.put(((LeafEntry) pr.getEntry(j)).getDBID(), pr_heaps.get(j).toKNNList());
}
+ // Forget heaps and pq
+ heaps.set(i, null);
+ // processed += pr.getNumEntries();
+
+ // if(progress != null) {
+ // progress.setProcessed(processed, logger);
+ // }
if(pageprog != null) {
- pageprog.setCompleted(logger);
- }
- WritableDataStore<KNNList<D>> knnLists = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, KNNList.class);
- for(DBID id : ids) {
- knnLists.put(id, knnHeaps.get(id).toKNNList());
+ pageprog.incrementProcessed(logger);
}
- return knnLists;
}
+ // if(progress != null) {
+ // progress.ensureCompleted(logger);
+ // }
+ if(pageprog != null) {
+ pageprog.ensureCompleted(logger);
+ }
+ return knnLists;
+ }
- catch(Exception e) {
- throw new IllegalStateException(e);
+ private List<KNNHeap<D>> initHeaps(SpatialPrimitiveDistanceFunction<V, D> distFunction, final boolean doubleOptimize, N pr) {
+ List<KNNHeap<D>> pr_heaps;
+ // Create for each data object a knn heap
+ pr_heaps = new ArrayList<KNNHeap<D>>(pr.getNumEntries());
+ for(int j = 0; j < pr.getNumEntries(); j++) {
+ pr_heaps.add(new KNNHeap<D>(k, distFunction.getDistanceFactory().infiniteDistance()));
}
+ // Self-join first, as this is expected to improve most and cannot be
+ // pruned.
+ processDataPagesOptimize(distFunction, doubleOptimize, pr_heaps, null, pr, pr);
+ return pr_heaps;
}
/**
* Processes the two data pages pr and ps and determines the k-nearest
* neighbors of pr in ps.
*
- * @param distQ the distance to use
+ * @param distFunction the distance to use
+ * @param doubleOptimize Flag whether to optimize for doubles.
* @param pr the first data page
* @param ps the second data page
- * @param knnLists the knn lists for each data object
- * @param pr_knn_distance the current knn distance of data page pr
- * @return the k-nearest neighbor distance of pr in ps
+ * @param pr_heaps the knn lists for each data object in pr
+ * @param ps_heaps the knn lists for each data object in ps (if ps != pr)
*/
- private D processDataPages(DistanceQuery<V, D> distQ, N pr, N ps, WritableDataStore<KNNHeap<D>> knnLists, D pr_knn_distance) {
- // TODO: optimize for double?
- boolean infinite = pr_knn_distance.isInfiniteDistance();
- for(int i = 0; i < pr.getNumEntries(); i++) {
- DBID r_id = ((LeafEntry) pr.getEntry(i)).getDBID();
- KNNHeap<D> knnList = knnLists.get(r_id);
-
+ private void processDataPagesOptimize(SpatialPrimitiveDistanceFunction<V, D> distFunction, final boolean doubleOptimize, List<KNNHeap<D>> pr_heaps, List<KNNHeap<D>> ps_heaps, N pr, N ps) {
+ if(doubleOptimize) {
+ List<?> khp = (List<?>) pr_heaps;
+ List<?> khs = (List<?>) ps_heaps;
+ processDataPagesDouble((SpatialPrimitiveDoubleDistanceFunction<? super V>) distFunction, pr, ps, (List<KNNHeap<DoubleDistance>>) khp, (List<KNNHeap<DoubleDistance>>) khs);
+ }
+ else {
for(int j = 0; j < ps.getNumEntries(); j++) {
- DBID s_id = ((LeafEntry) ps.getEntry(j)).getDBID();
-
- D distance = distQ.distance(r_id, s_id);
- if(knnList.add(distance, s_id)) {
- // set kNN distance of r
- if(infinite) {
- pr_knn_distance = knnList.getMaximumDistance();
+ final SpatialPointLeafEntry s_e = (SpatialPointLeafEntry) ps.getEntry(j);
+ DBID s_id = s_e.getDBID();
+ for(int i = 0; i < pr.getNumEntries(); i++) {
+ final SpatialPointLeafEntry r_e = (SpatialPointLeafEntry) pr.getEntry(i);
+ D distance = distFunction.minDist(s_e, r_e);
+ pr_heaps.get(i).add(distance, s_id);
+ if(pr != ps && ps_heaps != null) {
+ ps_heaps.get(j).add(distance, r_e.getDBID());
}
- pr_knn_distance = DistanceUtil.max(knnList.getMaximumDistance(), pr_knn_distance);
}
}
}
+ }
+
+ /**
+ * Processes the two data pages pr and ps and determines the k-nearest
+ * neighbors of pr in ps.
+ *
+ * @param df the distance function to use
+ * @param pr the first data page
+ * @param ps the second data page
+ * @param pr_heaps the knn lists for each data object
+ * @param ps_heaps the knn lists for each data object in ps
+ */
+ private void processDataPagesDouble(SpatialPrimitiveDoubleDistanceFunction<? super V> df, N pr, N ps, List<KNNHeap<DoubleDistance>> pr_heaps, List<KNNHeap<DoubleDistance>> ps_heaps) {
+ // Compare pairwise
+ for(int j = 0; j < ps.getNumEntries(); j++) {
+ final SpatialPointLeafEntry s_e = (SpatialPointLeafEntry) ps.getEntry(j);
+ DBID s_id = s_e.getDBID();
+ for(int i = 0; i < pr.getNumEntries(); i++) {
+ final SpatialPointLeafEntry r_e = (SpatialPointLeafEntry) pr.getEntry(i);
+ double distance = df.doubleMinDist(s_e, r_e);
+ pr_heaps.get(i).add(new DoubleDistanceResultPair(distance, s_id));
+ if(pr != ps && ps_heaps != null) {
+ ps_heaps.get(j).add(new DoubleDistanceResultPair(distance, r_e.getDBID()));
+ }
+ }
+ }
+ }
+
+ /**
+ * Compute the maximum stop distance
+ *
+ * @param heaps
+ * @return the k-nearest neighbor distance of pr in ps
+ */
+ private D computeStopDistance(List<KNNHeap<D>> heaps) {
+ // Update pruning distance
+ D pr_knn_distance = null;
+ for(KNNHeap<D> knnList : heaps) {
+ // set kNN distance of r
+ if(pr_knn_distance == null) {
+ pr_knn_distance = knnList.getKNNDistance();
+ }
+ else {
+ pr_knn_distance = DistanceUtil.max(knnList.getKNNDistance(), pr_knn_distance);
+ }
+ }
return pr_knn_distance;
}
@@ -246,6 +361,40 @@ public class KNNJoin<V extends NumberVector<V, ?>, D extends Distance<D>, N exte
}
/**
+ * Task in the processing queue
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private class Task implements Comparable<Task> {
+ final D mindist;
+
+ final int i;
+
+ final int j;
+
+ /**
+ * Constructor.
+ *
+ * @param mindist
+ * @param i
+ * @param j
+ */
+ public Task(D mindist, int i, int j) {
+ super();
+ this.mindist = mindist;
+ this.i = i;
+ this.j = j;
+ }
+
+ @Override
+ public int compareTo(Task o) {
+ return mindist.compareTo(o.mindist);
+ }
+ }
+
+ /**
* Parameterization class.
*
* @author Erich Schubert
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java b/src/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java
index 345fda1a..89d2d3e0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java
index 6c13e68e..a879c6b2 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
index 5712d814..ea441655 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
index 95d88b93..108ba0ed 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -340,9 +340,9 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
}
}
- if(processedIDs.size() == distFunc.getRelation().size() && noise.size() == 0) {
+ /* if(processedIDs.size() == relation.size() && noise.size() == 0) {
break;
- }
+ } */
}
if(currentCluster.size() >= minpts) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java
index e28dbff3..5ec59777 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
index 2576c5f6..b59af555 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -33,7 +33,6 @@ import de.lmu.ifi.dbs.elki.data.model.ClusterModel;
import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
@@ -132,7 +131,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
/**
* Performs the DBSCAN algorithm on the given database.
*/
- public Clustering<Model> run(Database database, Relation<O> relation) {
+ public Clustering<Model> run(Relation<O> relation) {
RangeQuery<O, D> rangeQuery = QueryUtil.getRangeQuery(relation, getDistanceFunction());
final int size = relation.size();
@@ -142,9 +141,9 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
noise = DBIDUtil.newHashSet();
processedIDs = DBIDUtil.newHashSet(size);
if(size >= minpts) {
- for(DBID id : rangeQuery.getRelation().iterDBIDs()) {
+ for(DBID id : relation.iterDBIDs()) {
if(!processedIDs.contains(id)) {
- expandCluster(database, rangeQuery, id, objprog, clusprog);
+ expandCluster(relation, rangeQuery, id, objprog, clusprog);
}
if(objprog != null && clusprog != null) {
objprog.setProcessed(processedIDs.size(), logger);
@@ -156,7 +155,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
}
}
else {
- for(DBID id : rangeQuery.getRelation().iterDBIDs()) {
+ for(DBID id : relation.iterDBIDs()) {
noise.add(id);
if(objprog != null && clusprog != null) {
objprog.setProcessed(noise.size(), logger);
@@ -189,12 +188,12 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
* <p/>
* Border-Objects become members of the first possible cluster.
*
- * @param database the database on which the algorithm is run
+ * @param relation Database relation to run on
* @param rangeQuery Range query to use
* @param startObjectID potential seed of a new potential cluster
* @param objprog the progress object for logging the current status
*/
- protected void expandCluster(Database database, RangeQuery<O, D> rangeQuery, DBID startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
+ protected void expandCluster(Relation<O> relation, RangeQuery<O, D> rangeQuery, DBID startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
List<DistanceResultPair<D>> seeds = rangeQuery.getRangeForDBID(startObjectID, epsilon);
// startObject is no core-object
@@ -245,7 +244,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
}
}
- if(processedIDs.size() == rangeQuery.getRelation().size() && noise.size() == 0) {
+ if(processedIDs.size() == relation.size() && noise.size() == 0) {
break;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
index ca401ddc..f1e6c945 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
index c1285659..a70a3f6f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,9 +25,10 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
import java.util.ArrayList;
import java.util.List;
-import java.util.Random;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansInitialization;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.RandomlyGeneratedInitialMeans;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -42,6 +43,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
@@ -58,8 +60,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualCons
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
* Provides the EM algorithm (clustering by expectation maximization).
@@ -113,6 +114,11 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
*/
public static final OptionID DELTA_ID = OptionID.getOrCreateOptionID("em.delta", "The termination criterion for maximization of E(M): " + "E(M) - E(M') < em.delta");
+ /**
+ * Parameter to specify the initialization method
+ */
+ public static final OptionID INIT_ID = OptionID.getOrCreateOptionID("kmeans.initialization", "Method to choose the initial means.");
+
private static final double MIN_LOGLIKELIHOOD = -100000;
/**
@@ -121,32 +127,27 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
private double delta;
/**
- * Parameter to specify the random generator seed.
+ * Store the individual probabilities, for use by EMOutlierDetection etc.
*/
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("em.seed", "The random number generator seed.");
+ private WritableDataStore<double[]> probClusterIGivenX;
/**
- * Holds the value of {@link #SEED_ID}.
+ * Class to choose the initial means
*/
- private Long seed;
-
- /**
- * Store the individual probabilities, for use by EMOutlierDetection etc.
- */
- private WritableDataStore<double[]> probClusterIGivenX;
+ private KMeansInitialization<V> initializer;
/**
* Constructor.
*
* @param k k parameter
* @param delta delta parameter
- * @param seed Seed parameter
+ * @param initializer Class to choose the initial means
*/
- public EM(int k, double delta, Long seed) {
+ public EM(int k, double delta, KMeansInitialization<V> initializer) {
super();
this.k = k;
this.delta = delta;
- this.seed = seed;
+ this.initializer = initializer;
}
/**
@@ -169,14 +170,14 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
if(logger.isVerbose()) {
logger.verbose("initializing " + k + " models");
}
- List<V> means = initialMeans(relation);
+ List<Vector> means = initializer.chooseInitialMeans(relation, k, EuclideanDistanceFunction.STATIC);
List<Matrix> covarianceMatrices = new ArrayList<Matrix>(k);
List<Double> normDistrFactor = new ArrayList<Double>(k);
List<Matrix> invCovMatr = new ArrayList<Matrix>(k);
List<Double> clusterWeights = new ArrayList<Double>(k);
probClusterIGivenX = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_SORTED, double[].class);
- int dimensionality = means.get(0).getDimensionality();
+ final int dimensionality = means.get(0).getDimensionality();
for(int i = 0; i < k; i++) {
Matrix m = Matrix.identity(dimensionality, dimensionality);
covarianceMatrices.add(m);
@@ -211,12 +212,12 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
em = emNew;
// recompute models
- List<V> meanSums = new ArrayList<V>(k);
+ List<Vector> meanSums = new ArrayList<Vector>(k);
double[] sumOfClusterProbabilities = new double[k];
for(int i = 0; i < k; i++) {
clusterWeights.set(i, 0.0);
- meanSums.add(means.get(i).nullVector());
+ meanSums.add(new Vector(dimensionality));
covarianceMatrices.set(i, Matrix.zeroMatrix(dimensionality));
}
@@ -226,24 +227,23 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
for(int i = 0; i < k; i++) {
sumOfClusterProbabilities[i] += clusterProbabilities[i];
- V summand = relation.get(id).multiplicate(clusterProbabilities[i]);
- V currentMeanSum = meanSums.get(i).plus(summand);
- meanSums.set(i, currentMeanSum);
+ Vector summand = relation.get(id).getColumnVector().timesEquals(clusterProbabilities[i]);
+ meanSums.get(i).plusEquals(summand);
}
}
final int n = relation.size();
for(int i = 0; i < k; i++) {
clusterWeights.set(i, sumOfClusterProbabilities[i] / n);
- V newMean = meanSums.get(i).multiplicate(1 / sumOfClusterProbabilities[i]);
+ Vector newMean = meanSums.get(i).timesEquals(1 / sumOfClusterProbabilities[i]);
means.set(i, newMean);
}
// covariance matrices
for(DBID id : relation.iterDBIDs()) {
double[] clusterProbabilities = probClusterIGivenX.get(id);
- V instance = relation.get(id);
+ Vector instance = relation.get(id).getColumnVector();
for(int i = 0; i < k; i++) {
- V difference = instance.minus(means.get(i));
- covarianceMatrices.get(i).plusEquals(difference.getColumnVector().times(difference.getRowVector()).times(clusterProbabilities[i]));
+ Vector difference = instance.minus(means.get(i));
+ covarianceMatrices.get(i).plusEquals(difference.timesTranspose(difference).timesEquals(clusterProbabilities[i]));
}
}
for(int i = 0; i < k; i++) {
@@ -281,13 +281,14 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
}
hardClusters.get(maxIndex).add(id);
}
+ final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
Clustering<EMModel<V>> result = new Clustering<EMModel<V>>("EM Clustering", "em-clustering");
// provide models within the result
for(int i = 0; i < k; i++) {
// TODO: re-do labeling.
// SimpleClassLabel label = new SimpleClassLabel();
// label.init(result.canonicalClusterLabel(i));
- Cluster<EMModel<V>> model = new Cluster<EMModel<V>>(hardClusters.get(i), new EMModel<V>(means.get(i), covarianceMatrices.get(i)));
+ Cluster<EMModel<V>> model = new Cluster<EMModel<V>>(hardClusters.get(i), new EMModel<V>(factory.newNumberVector(means.get(i).getArrayRef()), covarianceMatrices.get(i)));
result.addCluster(model);
}
return result;
@@ -308,24 +309,20 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
* @param clusterWeights the weights of the current clusters
* @return the expectation value of the current mixture of distributions
*/
- protected double assignProbabilitiesToInstances(Relation<V> database, List<Double> normDistrFactor, List<V> means, List<Matrix> invCovMatr, List<Double> clusterWeights, WritableDataStore<double[]> probClusterIGivenX) {
+ protected double assignProbabilitiesToInstances(Relation<V> database, List<Double> normDistrFactor, List<Vector> means, List<Matrix> invCovMatr, List<Double> clusterWeights, WritableDataStore<double[]> probClusterIGivenX) {
double emSum = 0.0;
for(DBID id : database.iterDBIDs()) {
- V x = database.get(id);
+ Vector x = database.get(id).getColumnVector();
List<Double> probabilities = new ArrayList<Double>(k);
for(int i = 0; i < k; i++) {
- V difference = x.minus(means.get(i));
- Matrix differenceRow = difference.getRowVector();
- Vector differenceCol = difference.getColumnVector();
- Matrix rowTimesCov = differenceRow.times(invCovMatr.get(i));
- Vector rowTimesCovTimesCol = rowTimesCov.times(differenceCol);
- double power = rowTimesCovTimesCol.get(0, 0) / 2.0;
+ Vector difference = x.minus(means.get(i));
+ double rowTimesCovTimesCol = difference.transposeTimesTimes(invCovMatr.get(i), difference);
+ double power = rowTimesCovTimesCol / 2.0;
double prob = normDistrFactor.get(i) * Math.exp(-power);
if(logger.isDebuggingFinest()) {
- logger.debugFinest(" difference vector= ( " + difference.toString() + " )\n" + " differenceRow:\n" + FormatUtil.format(differenceRow, " ") + "\n" + " differenceCol:\n" + FormatUtil.format(differenceCol, " ") + "\n" + " rowTimesCov:\n" + FormatUtil.format(rowTimesCov, " ") + "\n" + " rowTimesCovTimesCol:\n" + FormatUtil.format(rowTimesCovTimesCol, " ") + "\n" + " power= " + power + "\n" + " prob=" + prob + "\n" + " inv cov matrix: \n" + FormatUtil.format(invCovMatr.get(i), " "));
+ logger.debugFinest(" difference vector= ( " + difference.toString() + " )\n" + " difference:\n" + FormatUtil.format(difference, " ") + "\n" + " rowTimesCovTimesCol:\n" + rowTimesCovTimesCol + "\n" + " power= " + power + "\n" + " prob=" + prob + "\n" + " inv cov matrix: \n" + FormatUtil.format(invCovMatr.get(i), " "));
}
-
probabilities.add(prob);
}
double priorProbability = 0.0;
@@ -356,48 +353,6 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
}
/**
- * Creates {@link #k k} random points distributed uniformly within the
- * attribute ranges of the given database.
- *
- * @param relation the database must contain enough points in order to
- * ascertain the range of attribute values. Less than two points would
- * make no sense. The content of the database is not touched otherwise.
- * @return a list of {@link #k k} random points distributed uniformly within
- * the attribute ranges of the given database
- */
- protected List<V> initialMeans(Relation<V> relation) {
- final Random random;
- if(this.seed != null) {
- random = new Random(this.seed);
- }
- else {
- random = new Random();
- }
- if(relation.size() > 0) {
- final int dim = DatabaseUtil.dimensionality(relation);
- Pair<V, V> minmax = DatabaseUtil.computeMinMax(relation);
- List<V> means = new ArrayList<V>(k);
- if(logger.isVerbose()) {
- logger.verbose("initializing random vectors");
- }
- for(int i = 0; i < k; i++) {
- double[] r = MathUtil.randomDoubleArray(dim, random);
- // Rescale
- for (int d = 0; d < dim; d++) {
- r[d] = minmax.first.doubleValue(d + 1) + (minmax.second.doubleValue(d + 1) - minmax.first.doubleValue(d + 1)) * r[d];
- }
- // Instantiate
- V randomVector = minmax.first.newInstance(r);
- means.add(randomVector);
- }
- return means;
- }
- else {
- return new ArrayList<V>(0);
- }
- }
-
- /**
* Get the probabilities for a given point.
*
* @param index Point ID
@@ -429,7 +384,7 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
protected double delta;
- protected Long seed;
+ protected KMeansInitialization<V> initializer;
@Override
protected void makeOptions(Parameterization config) {
@@ -439,20 +394,20 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
k = kP.getValue();
}
+ ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
+ if(config.grab(initialP)) {
+ initializer = initialP.instantiateClass(config);
+ }
+
DoubleParameter deltaP = new DoubleParameter(DELTA_ID, new GreaterEqualConstraint(0.0), 0.0);
if(config.grab(deltaP)) {
delta = deltaP.getValue();
}
-
- LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
- }
}
@Override
protected EM<V> makeInstance() {
- return new EM<V>(k, delta, seed);
+ return new EM<V>(k, delta, initializer);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/KMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/KMeans.java
deleted file mode 100644
index 38ea89c2..00000000
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/KMeans.java
+++ /dev/null
@@ -1,307 +0,0 @@
-package de.lmu.ifi.dbs.elki.algorithm.clustering;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2011
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Random;
-
-import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.data.Cluster;
-import de.lmu.ifi.dbs.elki.data.Clustering;
-import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.model.MeanModel;
-import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
-import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.math.MathUtil;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
-
-/**
- * Provides the k-means algorithm.
- * <p>
- * Reference: J. MacQueen: Some Methods for Classification and Analysis of
- * Multivariate Observations. <br>
- * In 5th Berkeley Symp. Math. Statist. Prob., Vol. 1, 1967, pp 281-297.
- * </p>
- *
- * @author Arthur Zimek
- *
- * @apiviz.has MeanModel
- *
- * @param <D> a type of {@link Distance} as returned by the used distance
- * function
- * @param <V> a type of {@link NumberVector} as a suitable datatype for this
- * algorithm
- */
-@Title("K-Means")
-@Description("Finds a partitioning into k clusters.")
-@Reference(authors = "J. MacQueen", title = "Some Methods for Classification and Analysis of Multivariate Observations", booktitle = "5th Berkeley Symp. Math. Statist. Prob., Vol. 1, 1967, pp 281-297", url = "http://projecteuclid.org/euclid.bsmsp/1200512992")
-public class KMeans<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm<V, D, Clustering<MeanModel<V>>> implements ClusteringAlgorithm<Clustering<MeanModel<V>>> {
- /**
- * The logger for this class.
- */
- private static final Logging logger = Logging.getLogger(KMeans.class);
-
- /**
- * Parameter to specify the number of clusters to find, must be an integer
- * greater than 0.
- */
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("kmeans.k", "The number of clusters to find.");
-
- /**
- * Parameter to specify the number of clusters to find, must be an integer
- * greater or equal to 0, where 0 means no limit.
- */
- public static final OptionID MAXITER_ID = OptionID.getOrCreateOptionID("kmeans.maxiter", "The maximum number of iterations to do. 0 means no limit.");
-
- /**
- * Parameter to specify the random generator seed.
- */
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("kmeans.seed", "The random number generator seed.");
-
- /**
- * Holds the value of {@link #K_ID}.
- */
- private int k;
-
- /**
- * Holds the value of {@link #MAXITER_ID}.
- */
- private int maxiter;
-
- /**
- * Holds the value of {@link #SEED_ID}.
- */
- private Long seed;
-
- /**
- * Constructor.
- *
- * @param distanceFunction distance function
- * @param k k parameter
- * @param maxiter Maxiter parameter
- * @param seed Random generator seed
- */
- public KMeans(PrimitiveDistanceFunction<? super V, D> distanceFunction, int k, int maxiter, Long seed) {
- super(distanceFunction);
- this.k = k;
- this.maxiter = maxiter;
- this.seed = seed;
- }
-
- /**
- * Run k-means
- *
- * @param database Database
- * @param relation relation to use
- * @return result
- * @throws IllegalStateException
- */
- public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) throws IllegalStateException {
- final Random random = (this.seed != null) ? new Random(this.seed) : new Random();
- if(relation.size() > 0) {
- final int dim = DatabaseUtil.dimensionality(relation);
- Pair<V, V> minmax = DatabaseUtil.computeMinMax(relation);
- List<V> means = new ArrayList<V>(k);
- List<V> oldMeans;
- if(logger.isVerbose()) {
- logger.verbose("initializing random vectors");
- }
- for(int i = 0; i < k; i++) {
- double[] r = MathUtil.randomDoubleArray(dim, random);
- // Rescale
- for (int d = 0; d < dim; d++) {
- r[d] = minmax.first.doubleValue(d + 1) + (minmax.second.doubleValue(d + 1) - minmax.first.doubleValue(d + 1)) * r[d];
- }
- // Instantiate
- V randomVector = minmax.first.newInstance(r);
- means.add(randomVector);
- }
- List<? extends ModifiableDBIDs> clusters;
- clusters = sort(means, relation);
- boolean changed = true;
- int iteration = 1;
- while(changed) {
- if(logger.isVerbose()) {
- logger.verbose("iteration " + iteration);
- }
- oldMeans = new ArrayList<V>(means);
- means = means(clusters, means, relation);
- clusters = sort(means, relation);
- changed = !means.equals(oldMeans);
- iteration++;
-
- if(maxiter > 0 && iteration > maxiter) {
- break;
- }
- }
- Clustering<MeanModel<V>> result = new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
- for(int i = 0; i < clusters.size(); i++) {
- DBIDs ids = clusters.get(i);
- MeanModel<V> model = new MeanModel<V>(means.get(i));
- result.addCluster(new Cluster<MeanModel<V>>(ids, model));
- }
- return result;
- }
- else {
- return new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
- }
- }
-
- /**
- * Returns the mean vectors of the given clusters in the given database.
- *
- * @param clusters the clusters to compute the means
- * @param means the recent means
- * @param database the database containing the vectors
- * @return the mean vectors of the given clusters in the given database
- */
- protected List<V> means(List<? extends ModifiableDBIDs> clusters, List<V> means, Relation<V> database) {
- List<V> newMeans = new ArrayList<V>(k);
- for(int i = 0; i < k; i++) {
- ModifiableDBIDs list = clusters.get(i);
- V mean = null;
- for(Iterator<DBID> clusterIter = list.iterator(); clusterIter.hasNext();) {
- if(mean == null) {
- mean = database.get(clusterIter.next());
- }
- else {
- mean = mean.plus(database.get(clusterIter.next()));
- }
- }
- if(list.size() > 0) {
- assert mean != null;
- mean = mean.multiplicate(1.0 / list.size());
- }
- else {
- mean = means.get(i);
- }
- newMeans.add(mean);
- }
- return newMeans;
- }
-
- /**
- * Returns a list of clusters. The k<sup>th</sup> cluster contains the ids of
- * those FeatureVectors, that are nearest to the k<sup>th</sup> mean.
- *
- * @param means a list of k means
- * @param database the database to cluster
- * @return list of k clusters
- */
- protected List<? extends ModifiableDBIDs> sort(List<V> means, Relation<V> database) {
- List<ArrayModifiableDBIDs> clusters = new ArrayList<ArrayModifiableDBIDs>(k);
- for(int i = 0; i < k; i++) {
- clusters.add(DBIDUtil.newArray());
- }
-
- for(DBID id : database.iterDBIDs()) {
- List<D> distances = new ArrayList<D>(k);
- V fv = database.get(id);
- int minIndex = 0;
- for(int d = 0; d < k; d++) {
- distances.add(getDistanceFunction().distance(fv, means.get(d)));
- if(distances.get(d).compareTo(distances.get(minIndex)) < 0) {
- minIndex = d;
- }
- }
- clusters.get(minIndex).add(id);
- }
- for(ArrayModifiableDBIDs cluster : clusters) {
- Collections.sort(cluster);
- }
- return clusters;
- }
-
- @Override
- public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
- }
-
- @Override
- protected Logging getLogger() {
- return logger;
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<V, D> {
- protected int k;
-
- protected int maxiter;
-
- protected Long seed;
-
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0));
- if(config.grab(kP)) {
- k = kP.getValue();
- }
-
- IntParameter maxiterP = new IntParameter(MAXITER_ID, new GreaterEqualConstraint(0), 0);
- if(config.grab(maxiterP)) {
- maxiter = maxiterP.getValue();
- }
-
- LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
- }
- }
-
- @Override
- protected KMeans<V, D> makeInstance() {
- return new KMeans<V, D>(distanceFunction, k, maxiter, seed);
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
index 24985e24..2244b07b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java
index c233963d..d6c5872a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
index f7bd10c7..41e48b89 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java
index e1329888..45b12c43 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -133,18 +133,18 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
public Result run(Database database, Relation<O> relation) {
DistanceQuery<O, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
Class<D> distCls = (Class<D>) getDistanceFunction().getDistanceFactory().getClass();
- WritableRecordStore store = DataStoreUtil.makeRecordStorage(distQuery.getRelation().getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, DBID.class, distCls);
+ WritableRecordStore store = DataStoreUtil.makeRecordStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, DBID.class, distCls);
pi = store.getStorage(0, DBID.class);
lambda = store.getStorage(1, distCls);
// Temporary storage for m.
- WritableDataStore<D> m = DataStoreUtil.makeStorage(distQuery.getRelation().getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, distCls);
+ WritableDataStore<D> m = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, distCls);
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Clustering", distQuery.getRelation().size(), logger) : null;
+ FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Clustering", relation.size(), logger) : null;
// has to be an array for monotonicity reasons!
- ModifiableDBIDs processedIDs = DBIDUtil.newArray(distQuery.getRelation().size());
+ ModifiableDBIDs processedIDs = DBIDUtil.newArray(relation.size());
// apply the algorithm
- for(DBID id : distQuery.getRelation().iterDBIDs()) {
+ for(DBID id : relation.iterDBIDs()) {
step1(id);
step2(id, processedIDs, distQuery, m);
step3(id, processedIDs, m);
@@ -168,8 +168,8 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
BasicResult result = null;
// Build clusters identified by their target object
- int minc = minclusters != null ? minclusters : distQuery.getRelation().size();
- result = extractClusters(distQuery.getRelation().getDBIDs(), pi, lambda, minc);
+ int minc = minclusters != null ? minclusters : relation.size();
+ result = extractClusters(relation.getDBIDs(), pi, lambda, minc);
result.addChildResult(new MaterializedRelation<DBID>("SLINK pi", "slink-order", TypeUtil.DBID, pi, processedIDs));
result.addChildResult(new MaterializedRelation<D>("SLINK lambda", "slink-order", new SimpleTypeInformation<D>(distCls), lambda, processedIDs));
@@ -288,7 +288,7 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
D stopdist = null;
// sort by lambda
ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
- Collections.sort(order, new CompareByLambda<D>(lambda));
+ order.sort(new CompareByLambda<D>(lambda));
int index = ids.size() - minclusters - 1;
while(index >= 0) {
if(lambda.get(order.get(index)).equals(lambda.get(order.get(index + 1)))) {
@@ -458,7 +458,7 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
// extract a hierarchical clustering
ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
// sort by lambda
- Collections.sort(order, new CompareByLambda<D>(lambda));
+ order.sort(new CompareByLambda<D>(lambda));
D curdist = null;
D stopdist = null;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
index 3bde2932..7c3a13c9 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
import java.util.ArrayList;
import java.util.Iterator;
-import java.util.LinkedList;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
@@ -36,6 +35,7 @@ import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
@@ -200,8 +200,8 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
* @return the shared nearest neighbors of the specified query object in the
* given database
*/
- protected List<DBID> findSNNNeighbors(SimilarityQuery<O, IntegerDistance> snnInstance, DBID queryObject) {
- List<DBID> neighbors = new LinkedList<DBID>();
+ protected ArrayModifiableDBIDs findSNNNeighbors(SimilarityQuery<O, IntegerDistance> snnInstance, DBID queryObject) {
+ ArrayModifiableDBIDs neighbors = DBIDUtil.newArray();
for(DBID id : snnInstance.getRelation().iterDBIDs()) {
if(snnInstance.similarity(queryObject, id).compareTo(epsilon) >= 0) {
neighbors.add(id);
@@ -222,7 +222,7 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
* clustering
*/
protected void expandCluster(SimilarityQuery<O, IntegerDistance> snnInstance, DBID startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
- List<DBID> seeds = findSNNNeighbors(snnInstance, startObjectID);
+ ArrayModifiableDBIDs seeds = findSNNNeighbors(snnInstance, startObjectID);
// startObject is no core-object
if(seeds.size() < minpts) {
@@ -247,11 +247,10 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
noise.remove(seed);
}
}
- seeds.remove(0);
while(seeds.size() > 0) {
- DBID o = seeds.remove(0);
- List<DBID> neighborhood = findSNNNeighbors(snnInstance, o);
+ DBID o = seeds.remove(seeds.size() - 1);
+ ArrayModifiableDBIDs neighborhood = findSNNNeighbors(snnInstance, o);
if(neighborhood.size() >= minpts) {
for(DBID p : neighborhood) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
index 4a0b391c..b877415e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -55,7 +55,7 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.datasource.filter.NonNumericFeaturesException;
+import de.lmu.ifi.dbs.elki.datasource.filter.normalization.NonNumericFeaturesException;
import de.lmu.ifi.dbs.elki.distance.distancefunction.WeightedDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -84,7 +84,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
/**
* Provides the CASH algorithm, an subspace clustering algorithm based on the
- * hough transform.
+ * Hough transform.
+ *
+ * <b>Note:</b> CASH requires explicitly setting the input parser other than default to
+ * {@link de.lmu.ifi.dbs.elki.datasource.parser.ParameterizationFunctionLabelParser}:
+ * (in the MiniGui, set option: dbc.parser ParameterizationFunctionLabelParser).
+ *
* <p>
* Reference: E. Achtert, C. Böhm, J. David, P. Kröger, A. Zimek: Robust
* clustering in arbitrarily oriented subspaces. <br>
@@ -99,7 +104,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
// todo elke hierarchy (later)
@Title("CASH: Robust clustering in arbitrarily oriented subspaces")
-@Description("Subspace clustering algorithm based on the hough transform.")
+@Description("Subspace clustering algorithm based on the Hough transform.")
@Reference(authors = "E. Achtert, C. Böhm, J. David, P. Kröger, A. Zimek", title = "Robust clustering in arbitraily oriented subspaces", booktitle = "Proc. 8th SIAM Int. Conf. on Data Mining (SDM'08), Atlanta, GA, 2008", url = "http://www.siam.org/proceedings/datamining/2008/dm08_69_AchtertBoehmDavidKroegerZimek.pdf")
public class CASH extends AbstractAlgorithm<Clustering<Model>> implements ClusteringAlgorithm<Clustering<Model>> {
/**
@@ -349,7 +354,7 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
res.addCluster(c);
noiseIDs.removeDBIDs(interval.getIDs());
clusterIDs.addDBIDs(interval.getIDs());
- processedIDs.addAll(interval.getIDs());
+ processedIDs.addDBIDs(interval.getIDs());
}
// Rebuild heap
@@ -372,13 +377,13 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
if(dim == noiseDim) {
Cluster<Model> c = new Cluster<Model>(noiseIDs, true, ClusterModel.CLUSTER);
res.addCluster(c);
- processedIDs.addAll(noiseIDs);
+ processedIDs.addDBIDs(noiseIDs);
}
else if(noiseIDs.size() >= minPts) {
LinearEquationSystem les = runDerivator(fulldatabase, dim - 1, noiseIDs);
Cluster<Model> c = new Cluster<Model>(noiseIDs, true, new LinearEquationModel(les));
res.addCluster(c);
- processedIDs.addAll(noiseIDs);
+ processedIDs.addDBIDs(noiseIDs);
}
}
@@ -521,7 +526,7 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
private ParameterizationFunction project(Matrix basis, ParameterizationFunction f) {
// Matrix m = new Matrix(new
// double[][]{f.getPointCoordinates()}).times(basis);
- Matrix m = f.getRowVector().times(basis);
+ Matrix m = f.getColumnVector().transposeTimes(basis);
ParameterizationFunction f_t = new ParameterizationFunction(m.getColumnPackedCopy());
return f_t;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
index 8fc30b3d..575bf117 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
index 75633853..af4f677f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -244,7 +244,7 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
}
else {
ModifiableDBIDs merged = DBIDUtil.newHashSet(noise.getIDs());
- merged.addAll(clus.getIDs().asCollection());
+ merged.addDBIDs(clus.getIDs());
noise.setIDs(merged);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java
index 93d0cc99..98761962 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
index 92723428..1065682c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java
new file mode 100644
index 00000000..41ee1f69
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java
@@ -0,0 +1,566 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.Model;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.math.histograms.FlexiHistogram;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
+
+/**
+ * Linear manifold clustering in high dimensional spaces by stochastic search.
+ *
+ * Reference:
+ * <p>
+ * Robert Haralick, Rave Harpaz<br />
+ * Linear manifold clustering in high dimensional spaces by stochastic search<br/>
+ * In: Pattern Recognition volume 40, Issue 10
+ * </p>
+ *
+ * Implementation note: the LMCLUS algorithm seems to lack good stopping
+ * criterions. We can't entirely reproduce the good results from the original
+ * publication, in particular not on noisy data. But the questionable parts are
+ * as in the original publication, associated thesis and published source code.
+ * The minimum cluster size however can serve as a hidden stopping criterion.
+ *
+ * @author Ernst Waas
+ * @author Erich Schubert
+ */
+@Reference(authors = "Robert Haralick, Rave Harpaz", title = "Linear manifold clustering in high dimensional spaces by stochastic search", booktitle = "Pattern Recognition volume 40, Issue 10", url = "http://dx.doi.org/10.1016/j.patcog.2007.01.020")
+public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging logger = Logging.getLogger(LMCLUS.class);
+
+ /**
+ * Epsilon
+ */
+ private final static double NOT_FROM_ONE_CLUSTER_PROBABILITY = 0.2;
+
+ /**
+ * Histogram resolution
+ */
+ private final static int BINS = 50;
+
+ /**
+ * The current threshold value calculated by the findSeperation Method.
+ */
+ private final double sensitivityThreshold;
+
+ /**
+ * Maximum cluster dimensionality
+ */
+ private final int maxLMDim;
+
+ /**
+ * Minimum cluster size
+ */
+ private final int minsize;
+
+ /**
+ * Number of sampling rounds to find a good split
+ */
+ private final int samplingLevel;
+
+ /**
+ * Constructor.
+ *
+ * @param maxdim Maximum dimensionality
+ * @param minsize Minimum cluster size
+ * @param samplingLevel Sampling level
+ * @param sensitivityThreshold Threshold
+ */
+ public LMCLUS(int maxdim, int minsize, int samplingLevel, double sensitivityThreshold) {
+ super();
+ this.maxLMDim = maxdim;
+ this.minsize = minsize;
+ this.samplingLevel = samplingLevel;
+ this.sensitivityThreshold = sensitivityThreshold;
+ }
+
+ /**
+ * The main LMCLUS (Linear manifold clustering algorithm) is processed in this
+ * method.
+ *
+ * <PRE>
+ * The algorithm samples random linear manifolds and tries to find clusters in it.
+ * It calculates a distance histogram searches for a threshold and partitions the
+ * points in two groups the ones in the cluster and everything else.
+ * Then the best fitting linear manifold is searched and registered as a cluster.
+ * The process is started over until all points are clustered.
+ * The last cluster should contain all the outliers. (or the whole data if no clusters have been found.)
+ * For details see {@link LMCLUS}.
+ * </PRE>
+ *
+ * @param database The database to operate on
+ * @param relation Relation
+ * @return Clustering result
+ * @throws de.lmu.ifi.dbs.elki.utilities.UnableToComplyException
+ */
+ public Clustering<Model> run(Database database, Relation<NumberVector<?, ?>> relation) throws UnableToComplyException {
+ Clustering<Model> ret = new Clustering<Model>("LMCLUS Clustering", "lmclus-clustering");
+ FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Clustered objects", relation.size(), logger) : null;
+ IndefiniteProgress cprogress = logger.isVerbose() ? new IndefiniteProgress("Clusters found", logger) : null;
+ ModifiableDBIDs unclustered = DBIDUtil.newHashSet(relation.getDBIDs());
+
+ final int maxdim = Math.min(maxLMDim, DatabaseUtil.dimensionality(relation));
+ int cnum = 0;
+ while(unclustered.size() > minsize) {
+ DBIDs current = unclustered;
+ int lmDim = 1;
+ for(int k = 1; k <= maxdim; k++) {
+ // Implementation note: this while loop is from the original publication
+ // and the published LMCLUS source code. It doesn't make sense to me -
+ // it is lacking a stop criterion other than "cluster is too small" and
+ // "cluster is inseparable"! Additionally, there is good criterion for
+ // stopping at the appropriate dimensionality either.
+ while(true) {
+ Separation separation = findSeparation(relation, current, k);
+ // logger.verbose("k: " + k + " goodness: " + separation.goodness +
+ // " threshold: " + separation.threshold);
+ if(separation.goodness <= sensitivityThreshold) {
+ break;
+ }
+ ModifiableDBIDs subset = DBIDUtil.newArray(current.size());
+ for(DBID id : current) {
+ if(deviation(relation.get(id).getColumnVector().minusEquals(separation.originV), separation.basis) < separation.threshold) {
+ subset.add(id);
+ }
+ }
+ // logger.verbose("size:"+subset.size());
+ if(subset.size() < minsize) {
+ break;
+ }
+ current = subset;
+ lmDim = k;
+ // System.out.println("Partition: " + subset.size());
+ }
+ }
+ // No more clusters found
+ if(current.size() < minsize || current == unclustered) {
+ break;
+ }
+ // New cluster found
+ // TODO: annotate cluster with dimensionality
+ final Cluster<Model> cluster = new Cluster<Model>(current);
+ cluster.setName("Cluster_" + lmDim + "d_" + cnum);
+ cnum++;
+ ret.addCluster(cluster);
+ // Remove from main working set.
+ unclustered.removeDBIDs(current);
+ if(progress != null) {
+ progress.setProcessed(relation.size() - unclustered.size(), logger);
+ }
+ if(cprogress != null) {
+ cprogress.setProcessed(cnum, logger);
+ }
+ }
+ // Remaining objects are noise
+ if(unclustered.size() > 0) {
+ ret.addCluster(new Cluster<Model>(unclustered, true));
+ }
+ if(progress != null) {
+ progress.setProcessed(relation.size(), logger);
+ progress.ensureCompleted(logger);
+ }
+ if(cprogress != null) {
+ cprogress.setCompleted(logger);
+ }
+ return ret;
+ }
+
+ /**
+ * Deviation from a manifold described by beta.
+ *
+ * @param delta Delta from origin vector
+ * @param beta Manifold
+ * @return Deviation score
+ */
+ private double deviation(Vector delta, Matrix beta) {
+ double a = delta.euclideanLength();
+ double b = beta.transposeTimes(delta).euclideanLength();
+ return Math.sqrt((a * a) - (b * b));
+ }
+
+ /**
+ * This method samples a number of linear manifolds an tries to determine
+ * which the one with the best cluster is.
+ *
+ * <PRE>
+ * A number of sample points according to the dimension of the linear manifold are taken.
+ * The basis (B) and the origin(o) of the manifold are calculated.
+ * A distance histogram using the distance function ||x-o|| -||B^t*(x-o)|| is generated.
+ * The best threshold is searched using the elevate threshold function.
+ * The overall goodness of the threshold is determined.
+ * The process is redone until a specific number of samples is taken.
+ * </PRE>
+ *
+ * @param relation The vector relation
+ * @param currentids Current DBIDs
+ * @param dimension the dimension of the linear manifold to sample.
+ * @return the overall goodness of the separation. The values origin basis and
+ * threshold are returned indirectly over class variables.
+ */
+ private Separation findSeparation(Relation<NumberVector<?, ?>> relation, DBIDs currentids, int dimension) {
+ Separation separation = new Separation();
+ // determine the number of samples needed, to secure that with a specific
+ // probability
+ // in at least on sample every sampled point is from the same cluster.
+ int samples = (int) Math.min(Math.log(NOT_FROM_ONE_CLUSTER_PROBABILITY) / (Math.log(1 - Math.pow((1.0d / samplingLevel), dimension))), (double) currentids.size());
+ // System.out.println("Number of samples: " + samples);
+ Random r = new Random();
+ int remaining_retries = 100;
+ for(int i = 1; i <= samples; i++) {
+ DBIDs sample = DBIDUtil.randomSample(currentids, dimension + 1, r.nextLong());
+ final Iterator<DBID> iter = sample.iterator();
+ // Use first as origin
+ DBID origin = iter.next();
+ Vector originV = relation.get(origin).getColumnVector();
+ // Build orthogonal basis from remainder
+ Matrix basis;
+ {
+ List<Vector> vectors = new ArrayList<Vector>(sample.size() - 1);
+ while(iter.hasNext()) {
+ Vector vec = relation.get(iter.next()).getColumnVector();
+ vectors.add(vec.minusEquals(originV));
+ }
+ // generate orthogonal basis
+ basis = generateOrthonormalBasis(vectors);
+ if(basis == null) {
+ // new sample has to be taken.
+ i--;
+ remaining_retries--;
+ if(remaining_retries < 0) {
+ throw new AbortException("Too many retries in sampling, and always a linear dependant data set.");
+ }
+ continue;
+ }
+ }
+ // Generate and fill a histogram.
+ FlexiHistogram<Double, Double> histogram = FlexiHistogram.DoubleSumHistogram(BINS);
+ double w = 1.0 / currentids.size();
+ for(DBID point : currentids) {
+ // Skip sampled points
+ if(sample.contains(point)) {
+ continue;
+ }
+ Vector vec = relation.get(point).getColumnVector().minusEquals(originV);
+ final double distance = deviation(vec, basis);
+ histogram.aggregate(distance, w);
+ }
+ double[] th = findAndEvaluateThreshold(histogram); // evaluate threshold
+ if(th[1] > separation.goodness) {
+ separation.goodness = th[1];
+ separation.threshold = th[0];
+ separation.originV = originV;
+ separation.basis = basis;
+ }
+ }
+ return separation;
+ }
+
+ /**
+ * This Method generates an orthonormal basis from a set of Vectors. It uses
+ * the established Gram-Schmidt algorithm for orthonormalisation:
+ *
+ * <PRE>
+ * u_1 = v_1
+ * u_k = v_k -proj_u1(v_k)...proj_u(k-1)(v_k);
+ *
+ * Where proj_u(v) = <v,u>/<u,u> *u
+ * </PRE>
+ *
+ * @param vectors The set of vectors to generate the orthonormal basis from
+ * @return the orthonormal basis generated by this method.
+ * @throws RuntimeException if the given vectors are not linear independent.
+ */
+ private Matrix generateOrthonormalBasis(List<Vector> vectors) {
+ Vector first = vectors.get(0);
+ first = first.times(1.0 / first.euclideanLength());
+ Matrix ret = new Matrix(first.getDimensionality(), vectors.size());
+ ret.setCol(0, first);
+ for(int i = 1; i < vectors.size(); i++) {
+ // System.out.println("Matrix:" + ret);
+ Vector v_i = vectors.get(i);
+ Vector u_i = v_i.copy();
+ // System.out.println("Vector " + i + ":" + partialSol);
+ for(int j = 0; j < i; j++) {
+ Vector v_j = ret.getCol(j);
+ double f = v_i.transposeTimes(v_j) / v_j.transposeTimes(v_j);
+ if(Double.isNaN(f)) {
+ if(logger.isDebuggingFine()) {
+ logger.debugFine("Zero vector encountered? " + v_j);
+ }
+ return null;
+ }
+ u_i.minusTimesEquals(v_j, f);
+ }
+ // check if the vectors weren't independent
+ final double len_u_i = u_i.euclideanLength();
+ if(len_u_i == 0.0) {
+ if(logger.isDebuggingFine()) {
+ logger.debugFine("Points not independent - no orthonormalization.");
+ }
+ return null;
+ }
+ // System.out.println("Vector " + i + ":" + partialSol);
+ u_i.timesEquals(1 / len_u_i);
+ ret.setCol(i, u_i);
+ }
+ return ret;
+ }
+
+ /**
+ * Evaluate the histogram to find a suitable threshold
+ *
+ * @param histogram Histogram to evaluate
+ * @return Position and goodness
+ */
+ private double[] findAndEvaluateThreshold(FlexiHistogram<Double, Double> histogram) {
+ int n = histogram.getNumBins();
+ double[] p1 = new double[n];
+ double[] p2 = new double[n];
+ double[] mu1 = new double[n];
+ double[] mu2 = new double[n];
+ double[] sigma1 = new double[n];
+ double[] sigma2 = new double[n];
+ double[] jt = new double[n];
+ // Forward pass
+ {
+ MeanVariance mv = new MeanVariance();
+ Iterator<DoubleObjPair<Double>> forward = histogram.iterator();
+ for(int i = 0; forward.hasNext(); i++) {
+ DoubleObjPair<Double> pair = forward.next();
+ p1[i] = pair.second + ((i > 0) ? p1[i - 1] : 0);
+ mv.put(i, pair.second);
+ mu1[i] = mv.getMean();
+ sigma1[i] = mv.getNaiveStddev();
+ }
+ }
+ // Backwards pass
+ {
+ MeanVariance mv = new MeanVariance();
+ Iterator<DoubleObjPair<Double>> backwards = histogram.reverseIterator();
+ for(int j = n - 1; backwards.hasNext(); j--) {
+ DoubleObjPair<Double> pair = backwards.next();
+ p2[j] = pair.second + ((j + 1 < n) ? p2[j + 1] : 0);
+ mv.put(j, pair.second);
+ mu2[j] = mv.getMean();
+ sigma2[j] = mv.getNaiveStddev();
+ }
+ }
+
+ for(int i = 0; i < n; i++) {
+ jt[i] = 1.0 + 2 * (p1[i] * (Math.log(sigma1[i]) - Math.log(p1[i])) + p2[i] * (Math.log(sigma2[i]) - Math.log(p2[i])));
+ }
+
+ int bestpos = -1;
+ double bestgoodness = Double.NEGATIVE_INFINITY;
+
+ double devPrev = jt[1] - jt[0];
+ for(int i = 1; i < jt.length - 1; i++) {
+ double devCur = jt[i + 1] - jt[i];
+ // System.out.println(p1[i]);
+ // System.out.println(jt[i + 1]);
+ // System.out.println(jt[i]);
+ // System.out.println(devCur);
+ // Local minimum found - calculate depth
+ if(devCur >= 0 && devPrev <= 0) {
+ double lowestMaxima = Double.POSITIVE_INFINITY;
+ for(int j = i - 1; j > 0; j--) {
+ if(jt[j - 1] < jt[j]) {
+ lowestMaxima = Math.min(lowestMaxima, jt[j]);
+ break;
+ }
+ }
+ for(int j = i + 1; j < n - 2; j++) {
+ if(jt[j + 1] < jt[j]) {
+ lowestMaxima = Math.min(lowestMaxima, jt[j]);
+ break;
+ }
+ }
+ double localDepth = lowestMaxima - jt[i];
+
+ final double mud = mu1[i] - mu2[i];
+ double discriminability = mud * mud / (sigma1[i] * sigma1[i] + sigma2[i] * sigma2[i]);
+ if(Double.isNaN(discriminability)) {
+ discriminability = -1;
+ }
+ double goodness = localDepth * discriminability;
+ if(goodness > bestgoodness) {
+ bestgoodness = goodness;
+ bestpos = i;
+ }
+ }
+ devPrev = devCur;
+ }
+ return new double[] { histogram.getBinMax(bestpos), bestgoodness };
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return logger;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ }
+
+ /**
+ * Class to represent a linear manifold separation
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private static class Separation {
+ /**
+ * Goodness of separation
+ */
+ double goodness = Double.NEGATIVE_INFINITY;
+
+ /**
+ * Threshold
+ */
+ double threshold = Double.NEGATIVE_INFINITY;
+
+ /**
+ * Basis of manifold
+ */
+ Matrix basis = null;
+
+ /**
+ * Origin vector
+ */
+ Vector originV = null;
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Parameter with the maximum dimension to search for
+ */
+ public static final OptionID MAXDIM_ID = OptionID.getOrCreateOptionID("lmclus.maxdim", "Maximum linear manifold dimension to search.");
+
+ /**
+ * Parameter for the minimum cluster size
+ */
+ public static final OptionID MINSIZE_ID = OptionID.getOrCreateOptionID("lmclus.minsize", "Minimum cluster size to allow.");
+
+ /**
+ * Sampling intensity level
+ */
+ public static final OptionID SAMPLINGL_ID = OptionID.getOrCreateOptionID("lmclus.sampling-level", "A number used to determine how many samples are taken in each search.");
+
+ /**
+ * Global significance threshold
+ */
+ public static final OptionID THRESHOLD_ID = OptionID.getOrCreateOptionID("lmclus.threshold", "Threshold to determine if a cluster was found.");
+
+ /**
+ * Maximum dimensionality to search for
+ */
+ private int maxdim = Integer.MAX_VALUE;
+
+ /**
+ * Minimum cluster size.
+ */
+ private int minsize;
+
+ /**
+ * Sampling level
+ */
+ private int samplingLevel;
+
+ /**
+ * Threshold
+ */
+ private double threshold;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntParameter maxLMDimP = new IntParameter(MAXDIM_ID, new GreaterEqualConstraint(1), true);
+ if(config.grab(maxLMDimP)) {
+ maxdim = maxLMDimP.getValue();
+ }
+ IntParameter minsizeP = new IntParameter(MINSIZE_ID, new GreaterEqualConstraint(1));
+ if(config.grab(minsizeP)) {
+ minsize = minsizeP.getValue();
+ }
+ IntParameter samplingLevelP = new IntParameter(SAMPLINGL_ID, 100);
+ if(config.grab(samplingLevelP)) {
+ samplingLevel = samplingLevelP.getValue();
+ }
+ DoubleParameter sensivityThresholdP = new DoubleParameter(THRESHOLD_ID);
+ if(config.grab(sensivityThresholdP)) {
+ threshold = sensivityThresholdP.getValue();
+ }
+ }
+
+ @Override
+ protected LMCLUS makeInstance() {
+ return new LMCLUS(maxdim, minsize, samplingLevel, threshold);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
index 924e1786..eb5608fc 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -50,6 +50,7 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
@@ -420,14 +421,14 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
/**
* Returns the union of the two specified clusters.
*
- * @param database the database holding the objects
+ * @param relation the database holding the objects
* @param distFunc the distance function
* @param c1 the first cluster
* @param c2 the second cluster
* @param dim the dimensionality of the union cluster
* @return the union of the two specified clusters
*/
- private ORCLUSCluster union(Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, ORCLUSCluster c1, ORCLUSCluster c2, int dim) {
+ private ORCLUSCluster union(Relation<V> relation, DistanceQuery<V, DoubleDistance> distFunc, ORCLUSCluster c1, ORCLUSCluster c2, int dim) {
ORCLUSCluster c = new ORCLUSCluster();
c.objectIDs = DBIDUtil.newHashSet(c1.objectIDs);
@@ -436,11 +437,13 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
c.objectIDs = DBIDUtil.newArray(c.objectIDs);
if(c.objectIDs.size() > 0) {
- c.centroid = DatabaseUtil.centroid(database, c.objectIDs);
- c.basis = findBasis(database, distFunc, c, dim);
+ c.centroid = DatabaseUtil.centroid(relation, c.objectIDs);
+ c.basis = findBasis(relation, distFunc, c, dim);
}
else {
- c.centroid = c1.centroid.plus(c2.centroid).multiplicate(0.5);
+ V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
+ Vector cent = c1.centroid.getColumnVector().plusEquals(c2.centroid.getColumnVector()).timesEquals(0.5);
+ c.centroid = factory.newNumberVector(cent.getArrayRef());
double[][] doubles = new double[c1.basis.getRowDimensionality()][dim];
for(int i = 0; i < dim; i++) {
doubles[i][i] = 1;
@@ -460,9 +463,9 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
* @return the projection of double vector o in the subspace of cluster c
*/
private V projection(ORCLUSCluster c, V o, V factory) {
- Matrix o_proj = o.getRowVector().times(c.basis);
+ Matrix o_proj = o.getColumnVector().transposeTimes(c.basis);
double[] values = o_proj.getColumnPackedCopy();
- return factory.newInstance(values);
+ return factory.newNumberVector(values);
}
@Override
@@ -523,7 +526,7 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
for(int d = 1; d <= o.getDimensionality(); d++) {
values[d - 1] = o.doubleValue(d);
}
- this.centroid = factory.newInstance(values);
+ this.centroid = factory.newNumberVector(values);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java
index 6c5db740..46112498 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java
index 62ff658f..86e045cb 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java
index 8cd156e8..8b6d104c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java
index 82a1f1e1..665de632 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
new file mode 100644
index 00000000..d3c73b53
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
@@ -0,0 +1,310 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Abstract base class for k-means implementations.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ * @param <D> Distance type
+ */
+public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm<NumberVector<?, ?>, D, Clustering<MeanModel<V>>> {
+ /**
+ * Parameter to specify the number of clusters to find, must be an integer
+ * greater than 0.
+ */
+ public static final OptionID K_ID = OptionID.getOrCreateOptionID("kmeans.k", "The number of clusters to find.");
+
+ /**
+ * Parameter to specify the number of clusters to find, must be an integer
+ * greater or equal to 0, where 0 means no limit.
+ */
+ public static final OptionID MAXITER_ID = OptionID.getOrCreateOptionID("kmeans.maxiter", "The maximum number of iterations to do. 0 means no limit.");
+
+ /**
+ * Parameter to specify the random generator seed.
+ */
+ public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("kmeans.seed", "The random number generator seed.");
+
+ /**
+ * Parameter to specify the initialization method
+ */
+ public static final OptionID INIT_ID = OptionID.getOrCreateOptionID("kmeans.initialization", "Method to choose the initial means.");
+
+ /**
+ * Holds the value of {@link #K_ID}.
+ */
+ protected int k;
+
+ /**
+ * Holds the value of {@link #MAXITER_ID}.
+ */
+ protected int maxiter;
+
+ /**
+ * Method to choose initial means.
+ */
+ protected KMeansInitialization<V> initializer;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction distance function
+ * @param k k parameter
+ * @param maxiter Maxiter parameter
+ */
+ public AbstractKMeans(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+ super(distanceFunction);
+ this.k = k;
+ this.maxiter = maxiter;
+ this.initializer = initializer;
+ }
+
+ /**
+ * Returns a list of clusters. The k<sup>th</sup> cluster contains the ids of
+ * those FeatureVectors, that are nearest to the k<sup>th</sup> mean.
+ *
+ * @param relation the database to cluster
+ * @param means a list of k means
+ * @param clusters cluster assignment
+ * @return true when the object was reassigned
+ */
+ protected boolean assignToNearestCluster(Relation<V> relation, List<Vector> means, List<? extends ModifiableDBIDs> clusters) {
+ boolean changed = false;
+
+ if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
+ @SuppressWarnings("unchecked")
+ final PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>>) getDistanceFunction();
+ for(DBID id : relation.iterDBIDs()) {
+ double mindist = Double.POSITIVE_INFINITY;
+ V fv = relation.get(id);
+ int minIndex = 0;
+ for(int i = 0; i < k; i++) {
+ double dist = df.doubleDistance(fv, means.get(i));
+ if(dist < mindist) {
+ minIndex = i;
+ mindist = dist;
+ }
+ }
+ if(clusters.get(minIndex).add(id)) {
+ changed = true;
+ // Remove from previous cluster
+ // TODO: keep a list of cluster assignments to save this search?
+ for(int i = 0; i < k; i++) {
+ if(i != minIndex) {
+ if(clusters.get(i).remove(id)) {
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ else {
+ final PrimitiveDistanceFunction<? super NumberVector<?, ?>, D> df = getDistanceFunction();
+ for(DBID id : relation.iterDBIDs()) {
+ D mindist = df.getDistanceFactory().infiniteDistance();
+ V fv = relation.get(id);
+ int minIndex = 0;
+ for(int i = 0; i < k; i++) {
+ D dist = df.distance(fv, means.get(i));
+ if(dist.compareTo(mindist) < 0) {
+ minIndex = i;
+ mindist = dist;
+ }
+ }
+ if(clusters.get(minIndex).add(id)) {
+ changed = true;
+ // Remove from previous cluster
+ // TODO: keep a list of cluster assignments to save this search?
+ for(int i = 0; i < k; i++) {
+ if(i != minIndex) {
+ if(clusters.get(i).remove(id)) {
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ return changed;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ }
+
+ /**
+ * Returns the mean vectors of the given clusters in the given database.
+ *
+ * @param clusters the clusters to compute the means
+ * @param means the recent means
+ * @param database the database containing the vectors
+ * @return the mean vectors of the given clusters in the given database
+ */
+ protected List<Vector> means(List<? extends ModifiableDBIDs> clusters, List<Vector> means, Relation<V> database) {
+ List<Vector> newMeans = new ArrayList<Vector>(k);
+ for(int i = 0; i < k; i++) {
+ ModifiableDBIDs list = clusters.get(i);
+ Vector mean = null;
+ for(Iterator<DBID> clusterIter = list.iterator(); clusterIter.hasNext();) {
+ if(mean == null) {
+ mean = database.get(clusterIter.next()).getColumnVector();
+ }
+ else {
+ mean.plusEquals(database.get(clusterIter.next()).getColumnVector());
+ }
+ }
+ if(list.size() > 0) {
+ assert mean != null;
+ mean.timesEquals(1.0 / list.size());
+ }
+ else {
+ mean = means.get(i);
+ }
+ newMeans.add(mean);
+ }
+ return newMeans;
+ }
+
+ /**
+ * Compute an incremental update for the mean
+ *
+ * @param mean Mean to update
+ * @param vec Object vector
+ * @param newsize (New) size of cluster
+ * @param op Cluster size change / Weight change
+ */
+ protected void incrementalUpdateMean(Vector mean, V vec, int newsize, double op) {
+ if(newsize == 0) {
+ return; // Keep old mean
+ }
+ Vector delta = vec.getColumnVector();
+ // Compute difference from mean
+ delta.minusEquals(mean);
+ delta.timesEquals(op / newsize);
+ mean.plusEquals(delta);
+ }
+
+ /**
+ * Perform a MacQueen style iteration.
+ *
+ * @param relation Relation
+ * @param means Means
+ * @param clusters Clusters
+ * @return true when the means have changed
+ */
+ protected boolean macQueenIterate(Relation<V> relation, List<Vector> means, List<ModifiableDBIDs> clusters) {
+ boolean changed = false;
+
+ if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
+ // Raw distance function
+ @SuppressWarnings("unchecked")
+ final PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>>) getDistanceFunction();
+
+ // Incremental update
+ for(DBID id : relation.iterDBIDs()) {
+ double mindist = Double.POSITIVE_INFINITY;
+ V fv = relation.get(id);
+ int minIndex = 0;
+ for(int i = 0; i < k; i++) {
+ double dist = df.doubleDistance(fv, means.get(i));
+ if(dist < mindist) {
+ minIndex = i;
+ mindist = dist;
+ }
+ }
+ // Update the cluster mean incrementally:
+ for(int i = 0; i < k; i++) {
+ ModifiableDBIDs ci = clusters.get(i);
+ if(i == minIndex) {
+ if(ci.add(id)) {
+ incrementalUpdateMean(means.get(i), relation.get(id), ci.size(), +1);
+ changed = true;
+ }
+ }
+ else if(ci.remove(id)) {
+ incrementalUpdateMean(means.get(i), relation.get(id), ci.size() + 1, -1);
+ changed = true;
+ }
+ }
+ }
+ }
+ else {
+ // Raw distance function
+ final PrimitiveDistanceFunction<? super NumberVector<?, ?>, D> df = getDistanceFunction();
+
+ // Incremental update
+ for(DBID id : relation.iterDBIDs()) {
+ D mindist = df.getDistanceFactory().infiniteDistance();
+ V fv = relation.get(id);
+ int minIndex = 0;
+ for(int i = 0; i < k; i++) {
+ D dist = df.distance(fv, means.get(i));
+ if(dist.compareTo(mindist) < 0) {
+ minIndex = i;
+ mindist = dist;
+ }
+ }
+ // Update the cluster mean incrementally:
+ for(int i = 0; i < k; i++) {
+ ModifiableDBIDs ci = clusters.get(i);
+ if(i == minIndex) {
+ if(ci.add(id)) {
+ incrementalUpdateMean(means.get(i), relation.get(id), ci.size(), +1);
+ changed = true;
+ }
+ }
+ else if(ci.remove(id)) {
+ incrementalUpdateMean(means.get(i), relation.get(id), ci.size() + 1, -1);
+ changed = true;
+ }
+ }
+ }
+ }
+ return changed;
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
new file mode 100644
index 00000000..b5f088fb
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
@@ -0,0 +1,71 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+
+/**
+ * Abstract base class for common k-means initializations.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ */
+public abstract class AbstractKMeansInitialization<V extends NumberVector<V, ?>> implements KMeansInitialization<V> {
+ /**
+ * Holds the value of {@link KMeansLloyd#SEED_ID}.
+ */
+ protected Long seed;
+
+ /**
+ * Constructor.
+ *
+ * @param seed Random seed.
+ */
+ public AbstractKMeansInitialization(Long seed) {
+ this.seed = seed;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public abstract static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ protected Long seed;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ LongParameter seedP = new LongParameter(AbstractKMeans.SEED_ID, true);
+ if(config.grab(seedP)) {
+ seed = seedP.getValue();
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
new file mode 100644
index 00000000..78ccd426
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
@@ -0,0 +1,74 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Initialize K-means by using the first k objects as initial means.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ */
+public class FirstKInitialMeans<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization<V> {
+ /**
+ * Constructor.
+ */
+ public FirstKInitialMeans() {
+ super(null);
+ }
+
+ @Override
+ public List<Vector> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+ Iterator<DBID> iter = relation.iterDBIDs();
+ List<Vector> means = new ArrayList<Vector>(k);
+ for(int i = 0; i < k && iter.hasNext(); i++) {
+ means.add(relation.get(iter.next()).getColumnVector());
+ }
+ return means;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ @Override
+ protected FirstKInitialMeans<V> makeInstance() {
+ return new FirstKInitialMeans<V>();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
new file mode 100644
index 00000000..f4c0d9c7
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
@@ -0,0 +1,49 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+
+/**
+ * Interface for initializing K-Means
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ */
+public interface KMeansInitialization<V extends NumberVector<V, ?>> {
+ /**
+ * Choose initial means
+ *
+ * @param relation Relation
+ * @param k Parameter k
+ * @param distanceFunction Distance function
+ * @return List of chosen means for k-means
+ */
+ public abstract List<Vector> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction);
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
new file mode 100644
index 00000000..fda1d6c0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
@@ -0,0 +1,176 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Provides the k-means algorithm, using Lloyd-style bulk iterations.
+ *
+ * <p>
+ * Reference:<br />
+ * S. Lloyd<br/>
+ * Least squares quantization in PCM<br/>
+ * IEEE Transactions on Information Theory 28 (2)<br/>
+ * previously published as Bell Telephone Laboratories Paper
+ * </p>
+ *
+ * @author Arthur Zimek
+ *
+ * @apiviz.has MeanModel
+ *
+ * @param <V> vector datatype
+ * @param <D> distance value type
+ */
+@Title("K-Means")
+@Description("Finds a partitioning into k clusters.")
+@Reference(authors = "S. Lloyd", title = "Least squares quantization in PCM", booktitle = "IEEE Transactions on Information Theory 28 (2): 129–137.", url = "http://dx.doi.org/10.1109/TIT.1982.1056489")
+public class KMeansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractKMeans<V, D> implements ClusteringAlgorithm<Clustering<MeanModel<V>>> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging logger = Logging.getLogger(KMeansLloyd.class);
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction distance function
+ * @param k k parameter
+ * @param maxiter Maxiter parameter
+ */
+ public KMeansLloyd(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+ super(distanceFunction, k, maxiter, initializer);
+ }
+
+ /**
+ * Run k-means
+ *
+ * @param database Database
+ * @param relation relation to use
+ * @return result
+ * @throws IllegalStateException
+ */
+ public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) throws IllegalStateException {
+ if(relation.size() <= 0) {
+ return new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
+ }
+ // Choose initial means
+ List<Vector> means = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
+ // Setup cluster assignment store
+ List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+ for(int i = 0; i < k; i++) {
+ clusters.add(DBIDUtil.newHashSet(relation.size() / k));
+ }
+
+ for(int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
+ if(logger.isVerbose()) {
+ logger.verbose("K-Means iteration " + (iteration + 1));
+ }
+ boolean changed = assignToNearestCluster(relation, means, clusters);
+ // Stop if no cluster assignment changed.
+ if(!changed) {
+ break;
+ }
+ // Recompute means.
+ means = means(clusters, means, relation);
+ }
+ // Wrap result
+ final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
+ Clustering<MeanModel<V>> result = new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
+ for(int i = 0; i < clusters.size(); i++) {
+ MeanModel<V> model = new MeanModel<V>(factory.newNumberVector(means.get(i).getArrayRef()));
+ result.addCluster(new Cluster<MeanModel<V>>(clusters.get(i), model));
+ }
+ return result;
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return logger;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?, ?>, D> {
+ protected int k;
+
+ protected int maxiter;
+
+ protected KMeansInitialization<V> initializer;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0));
+ if(config.grab(kP)) {
+ k = kP.getValue();
+ }
+
+ ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
+ if(config.grab(initialP)) {
+ initializer = initialP.instantiateClass(config);
+ }
+
+ IntParameter maxiterP = new IntParameter(MAXITER_ID, new GreaterEqualConstraint(0), 0);
+ if(config.grab(maxiterP)) {
+ maxiter = maxiterP.getValue();
+ }
+ }
+
+ @Override
+ protected AbstractKMeans<V, D> makeInstance() {
+ return new KMeansLloyd<V, D>(distanceFunction, k, maxiter, initializer);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
new file mode 100644
index 00000000..56492dd0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
@@ -0,0 +1,177 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Provides the k-means algorithm, using MacQueen style incremental updates.
+ *
+ * <p>
+ * Reference:<br />
+ * J. MacQueen: Some Methods for Classification and Analysis of Multivariate
+ * Observations. <br />
+ * In 5th Berkeley Symp. Math. Statist. Prob., Vol. 1, 1967, pp 281-297.
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has MeanModel
+ *
+ * @param <V> vector type to use
+ * @param <D> distance function value type
+ */
+@Title("K-Means")
+@Description("Finds a partitioning into k clusters.")
+@Reference(authors = "J. MacQueen", title = "Some Methods for Classification and Analysis of Multivariate Observations", booktitle = "5th Berkeley Symp. Math. Statist. Prob., Vol. 1, 1967, pp 281-297", url = "http://projecteuclid.org/euclid.bsmsp/1200512992")
+public class KMeansMacQueen<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractKMeans<V, D> implements ClusteringAlgorithm<Clustering<MeanModel<V>>> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging logger = Logging.getLogger(KMeansMacQueen.class);
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction distance function
+ * @param k k parameter
+ * @param maxiter Maxiter parameter
+ */
+ public KMeansMacQueen(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+ super(distanceFunction, k, maxiter, initializer);
+ }
+
+ /**
+ * Run k-means
+ *
+ * @param database Database
+ * @param relation relation to use
+ * @return result
+ * @throws IllegalStateException
+ */
+ public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) throws IllegalStateException {
+ if(relation.size() <= 0) {
+ return new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
+ }
+ // Choose initial means
+ List<Vector> means = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
+ // Initialize cluster and assign objects
+ List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+ for(int i = 0; i < k; i++) {
+ clusters.add(DBIDUtil.newHashSet(relation.size() / k));
+ }
+ assignToNearestCluster(relation, means, clusters);
+ // Initial recomputation of the means.
+ means = means(clusters, means, relation);
+
+ // Refine result
+ for(int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
+ if(logger.isVerbose()) {
+ logger.verbose("K-Means iteration " + (iteration + 1));
+ }
+ boolean changed = macQueenIterate(relation, means, clusters);
+ if(!changed) {
+ break;
+ }
+ }
+ final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
+ Clustering<MeanModel<V>> result = new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
+ for(int i = 0; i < clusters.size(); i++) {
+ DBIDs ids = clusters.get(i);
+ MeanModel<V> model = new MeanModel<V>(factory.newNumberVector(means.get(i).getArrayRef()));
+ result.addCluster(new Cluster<MeanModel<V>>(ids, model));
+ }
+ return result;
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return logger;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?, ?>, D> {
+ protected int k;
+
+ protected int maxiter;
+
+ protected KMeansInitialization<V> initializer;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0));
+ if(config.grab(kP)) {
+ k = kP.getValue();
+ }
+
+ ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
+ if(config.grab(initialP)) {
+ initializer = initialP.instantiateClass(config);
+ }
+
+ IntParameter maxiterP = new IntParameter(MAXITER_ID, new GreaterEqualConstraint(0), 0);
+ if(config.grab(maxiterP)) {
+ maxiter = maxiterP.getValue();
+ }
+ }
+
+ @Override
+ protected AbstractKMeans<V, D> makeInstance() {
+ return new KMeansMacQueen<V, D>(distanceFunction, k, maxiter, initializer);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
new file mode 100644
index 00000000..c7a2fa1d
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
@@ -0,0 +1,213 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+
+/**
+ * K-Means++ initialization for k-means.
+ *
+ * Reference:
+ * <p>
+ * D. Arthur, S. Vassilvitskii<br />
+ * k-means++: the advantages of careful seeding<br />
+ * In: Proc. of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms,
+ * SODA 2007
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ * @param <D> Distance type
+ */
+@Reference(authors = "D. Arthur, S. Vassilvitskii", title = "k-means++: the advantages of careful seeding", booktitle = "Proc. of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms, SODA 2007", url = "http://dx.doi.org/10.1145/1283383.1283494")
+public class KMeansPlusPlusInitialMeans<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractKMeansInitialization<V> {
+ /**
+ * Constructor.
+ *
+ * @param seed Random seed.
+ */
+ public KMeansPlusPlusInitialMeans(Long seed) {
+ super(seed);
+ }
+
+ @Override
+ public List<Vector> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+ // Get a distance query
+ if(!(distanceFunction.getDistanceFactory() instanceof NumberDistance)) {
+ throw new AbortException("K-Means++ initialization can only be used with numerical distances.");
+ }
+ @SuppressWarnings("unchecked")
+ final PrimitiveDistanceFunction<? super V, D> distF = (PrimitiveDistanceFunction<? super V, D>) distanceFunction;
+ DistanceQuery<V, D> distQ = relation.getDatabase().getDistanceQuery(relation, distF);
+
+ // Chose first mean
+ List<Vector> means = new ArrayList<Vector>(k);
+
+ Random random = (seed != null) ? new Random(seed) : new Random();
+ DBID first = DBIDUtil.randomSample(relation.getDBIDs(), 1, random.nextLong()).iterator().next();
+ means.add(relation.get(first).getColumnVector());
+
+ ModifiableDBIDs chosen = DBIDUtil.newHashSet(k);
+ chosen.add(first);
+ ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
+ // Initialize weights
+ double[] weights = new double[ids.size()];
+ double weightsum = initialWeights(weights, ids, first, distQ);
+ while(means.size() < k) {
+ if(weightsum > Double.MAX_VALUE) {
+ LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
+ }
+ if(weightsum < Double.MIN_NORMAL) {
+ LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few data points?");
+ }
+ double r = random.nextDouble() * weightsum;
+ int pos = 0;
+ while(r > 0 && pos < weights.length) {
+ r -= weights[pos];
+ pos++;
+ }
+ // Add new mean:
+ DBID newmean = ids.get(pos);
+ means.add(relation.get(newmean).getColumnVector());
+ chosen.add(newmean);
+ // Update weights:
+ weights[pos] = 0.0;
+ // Choose optimized version for double distances, if applicable.
+ if (distF instanceof PrimitiveDoubleDistanceFunction) {
+ @SuppressWarnings("unchecked")
+ PrimitiveDoubleDistanceFunction<V> ddist = (PrimitiveDoubleDistanceFunction<V>) distF;
+ weightsum = updateWeights(weights, ids, newmean, ddist, relation);
+ } else {
+ weightsum = updateWeights(weights, ids, newmean, distQ);
+ }
+ }
+
+ return means;
+ }
+
+ /**
+ * Initialize the weight list.
+ *
+ * @param weights Weight list
+ * @param ids IDs
+ * @param latest Added ID
+ * @param distQ Distance query
+ * @return Weight sum
+ */
+ protected double initialWeights(double[] weights, ArrayDBIDs ids, DBID latest, DistanceQuery<V, D> distQ) {
+ double weightsum = 0.0;
+ DBIDIter it = ids.iter();
+ for(int i = 0; i < weights.length; i++, it.advance()) {
+ DBID id = it.getDBID();
+ if(latest.equals(id)) {
+ weights[i] = 0.0;
+ }
+ else {
+ double d = distQ.distance(latest, id).doubleValue();
+ weights[i] = d * d;
+ }
+ weightsum += weights[i];
+ }
+ return weightsum;
+ }
+
+ /**
+ * Update the weight list.
+ *
+ * @param weights Weight list
+ * @param ids IDs
+ * @param latest Added ID
+ * @param distQ Distance query
+ * @return Weight sum
+ */
+ protected double updateWeights(double[] weights, ArrayDBIDs ids, DBID latest, DistanceQuery<V, D> distQ) {
+ double weightsum = 0.0;
+ DBIDIter it = ids.iter();
+ for(int i = 0; i < weights.length; i++, it.advance()) {
+ DBID id = it.getDBID();
+ if(weights[i] > 0.0) {
+ double d = distQ.distance(latest, id).doubleValue();
+ weights[i] = Math.min(weights[i], d * d);
+ weightsum += weights[i];
+ }
+ }
+ return weightsum;
+ }
+
+ /**
+ * Update the weight list.
+ *
+ * @param weights Weight list
+ * @param ids IDs
+ * @param latest Added ID
+ * @param distF Distance function
+ * @return Weight sum
+ */
+ protected double updateWeights(double[] weights, ArrayDBIDs ids, DBID latest, PrimitiveDoubleDistanceFunction<V> distF, Relation<V> rel) {
+ final V lv = rel.get(latest);
+ double weightsum = 0.0;
+ DBIDIter it = ids.iter();
+ for(int i = 0; i < weights.length; i++, it.advance()) {
+ DBID id = it.getDBID();
+ if(weights[i] > 0.0) {
+ double d = distF.doubleDistance(lv, rel.get(id));
+ weights[i] = Math.min(weights[i], d * d);
+ weightsum += weights[i];
+ }
+ }
+ return weightsum;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
+ @Override
+ protected KMeansPlusPlusInitialMeans<V, D> makeInstance() {
+ return new KMeansPlusPlusInitialMeans<V, D>(seed);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
new file mode 100644
index 00000000..30e59453
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
@@ -0,0 +1,78 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+
+/**
+ * Initialize K-means by randomly choosing k exsiting elements as cluster
+ * centers.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ */
+public class RandomlyChosenInitialMeans<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization<V> {
+ /**
+ * Constructor.
+ *
+ * @param seed Random seed.
+ */
+ public RandomlyChosenInitialMeans(Long seed) {
+ super(seed);
+ }
+
+ @Override
+ public List<Vector> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+ DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), k, seed);
+ List<Vector> means = new ArrayList<Vector>(k);
+ for(DBID id : ids) {
+ means.add(relation.get(id).getColumnVector());
+ }
+ return means;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
+
+ @Override
+ protected RandomlyChosenInitialMeans<V> makeInstance() {
+ return new RandomlyChosenInitialMeans<V>(seed);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
new file mode 100644
index 00000000..e8a466dd
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
@@ -0,0 +1,87 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * Initialize k-means by generating random vectors (within the data sets value
+ * range).
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ */
+public class RandomlyGeneratedInitialMeans<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization<V> {
+ /**
+ * Constructor.
+ *
+ * @param seed Random seed.
+ */
+ public RandomlyGeneratedInitialMeans(Long seed) {
+ super(seed);
+ }
+
+ @Override
+ public List<Vector> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+ final int dim = DatabaseUtil.dimensionality(relation);
+ Pair<V, V> minmax = DatabaseUtil.computeMinMax(relation);
+ List<Vector> means = new ArrayList<Vector>(k);
+ final Random random = (this.seed != null) ? new Random(this.seed) : new Random();
+ for(int i = 0; i < k; i++) {
+ double[] r = MathUtil.randomDoubleArray(dim, random);
+ // Rescale
+ for(int d = 0; d < dim; d++) {
+ r[d] = minmax.first.doubleValue(d + 1) + (minmax.second.doubleValue(d + 1) - minmax.first.doubleValue(d + 1)) * r[d];
+ }
+ means.add(new Vector(r));
+ }
+ return means;
+ }
+
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
+
+ @Override
+ protected RandomlyGeneratedInitialMeans<V> makeInstance() {
+ return new RandomlyGeneratedInitialMeans<V>(seed);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java
new file mode 100644
index 00000000..2ce625b0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java
@@ -0,0 +1,26 @@
+/**
+ * <p>K-means clustering and variations.</p>
+ */
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2012
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java
index 660a7a4f..eed031df 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java
@@ -1,5 +1,5 @@
/**
- * <p>Clustering algorithms</p>
+ * <p>Clustering algorithms.</p>
*
* Clustering algorithms are supposed to implement the {@link de.lmu.ifi.dbs.elki.algorithm.Algorithm}-Interface.
* The more specialized interface {@link de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm}
@@ -15,7 +15,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
index dfc4e1cd..e3b274a6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
index 987c7eda..c4c1687b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
index 36473cc0..40ab60a8 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
index 92c2248c..3f16e907 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
index 22e9c150..4ca5a564 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
index c0edb2ea..963c0922 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -44,7 +44,7 @@ import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.AbstractDimensionsSelectingDoubleDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionsSelectingEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
@@ -94,7 +94,7 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
/**
* The distance function to determine the distance between database objects.
* <p>
- * Default value: {@link DimensionsSelectingEuclideanDistanceFunction}
+ * Default value: {@link SubspaceEuclideanDistanceFunction}
* </p>
* <p>
* Key: {@code -subclu.distancefunction}
@@ -477,7 +477,7 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<AbstractDimensionsSelectingDoubleDistanceFunction<V>> param = new ObjectParameter<AbstractDimensionsSelectingDoubleDistanceFunction<V>>(DISTANCE_FUNCTION_ID, AbstractDimensionsSelectingDoubleDistanceFunction.class, DimensionsSelectingEuclideanDistanceFunction.class);
+ ObjectParameter<AbstractDimensionsSelectingDoubleDistanceFunction<V>> param = new ObjectParameter<AbstractDimensionsSelectingDoubleDistanceFunction<V>>(DISTANCE_FUNCTION_ID, AbstractDimensionsSelectingDoubleDistanceFunction.class, SubspaceEuclideanDistanceFunction.class);
if(config.grab(param)) {
distance = param.instantiateClass(config);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
index 1874c9e8..eff71a35 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
index 4b6fa9ad..db687567 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
/**
@@ -265,7 +266,7 @@ public class CLIQUEUnit<V extends NumberVector<V, ?>> {
resultIntervals.add(this.intervals.last());
resultIntervals.add(other.intervals.last());
- ModifiableDBIDs resultIDs = DBIDUtil.newHashSet(this.ids);
+ HashSetModifiableDBIDs resultIDs = DBIDUtil.newHashSet(this.ids);
resultIDs.retainAll(other.ids);
if(resultIDs.size() / all >= tau) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java
index 444cb0e6..7a686190 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java
index 168ceadb..2a1eb930 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java
@@ -10,7 +10,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java
index 02350db3..43c6a218 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -85,9 +85,7 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl
public static final OptionID MULTIPLE_ID = OptionID.getOrCreateOptionID("bylabelclustering.multiple", "Flag to indicate that only subspaces with large coverage " + "(i.e. the fraction of the database that is covered by the dense units) " + "are selected, the rest will be pruned.");
/**
- * Flag to indicate that multiple cluster assignment is possible. If an
- * assignment to multiple clusters is desired, the labels indicating the
- * clusters need to be separated by blanks.
+ * Pattern to recognize noise clusters by.
*/
public static final OptionID NOISE_ID = OptionID.getOrCreateOptionID("bylabelclustering.noise", "Pattern to recognize noise classes by their label.");
@@ -144,7 +142,7 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl
ModifiableDBIDs noiseids = DBIDUtil.newArray();
Clustering<Model> result = new Clustering<Model>("By Label Clustering", "bylabel-clustering");
for(Entry<String, ModifiableDBIDs> entry : labelMap.entrySet()) {
- ModifiableDBIDs ids = labelMap.get(entry.getKey());
+ ModifiableDBIDs ids = entry.getValue();
if(ids.size() <= 1) {
noiseids.addDBIDs(ids);
continue;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java
index 5b8041d7..228cc7e7 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java
new file mode 100644
index 00000000..cd45cda2
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java
@@ -0,0 +1,163 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.regex.Pattern;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.model.Model;
+import de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorInterface;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
+
+/**
+ * Pseudo clustering using annotated models.
+ *
+ * This "algorithm" puts elements into the same cluster when they agree in their
+ * model. I.e. it just uses a predefined clustering, and is mostly useful for
+ * testing and evaluation (e.g. comparing the result of a real algorithm to the
+ * reference result / golden standard used by the generator).
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.uses Model
+ */
+@Title("Clustering by model")
+@Description("Cluster points by a (pre-assigned!) model. For comparing results with a reference clustering.")
+public class ByModelClustering extends AbstractAlgorithm<Clustering<Model>> implements ClusteringAlgorithm<Clustering<Model>> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging logger = Logging.getLogger(ByModelClustering.class);
+
+ /**
+ * Pattern to recognize noise clusters with
+ */
+ public static final OptionID NOISE_ID = OptionID.getOrCreateOptionID("bymodel.noise", "Pattern to recognize noise models by their label.");
+
+ /**
+ * Holds the value of {@link #NOISE_ID}.
+ */
+ private Pattern noisepattern = null;
+
+ /**
+ * Constructor.
+ *
+ * @param noisepattern Noise pattern
+ */
+ public ByModelClustering(Pattern noisepattern) {
+ super();
+ this.noisepattern = noisepattern;
+ }
+
+ /**
+ * Constructor without parameters
+ */
+ public ByModelClustering() {
+ this(null);
+ }
+
+ /**
+ * Run the actual clustering algorithm.
+ *
+ * @param relation The data input we use
+ */
+ public Clustering<Model> run(Relation<Model> relation) {
+ // Build model mapping
+ HashMap<Model, ModifiableDBIDs> modelMap = new HashMap<Model, ModifiableDBIDs>();
+ for(DBID id : relation.iterDBIDs()) {
+ Model model = relation.get(id);
+ ModifiableDBIDs modelids = modelMap.get(model);
+ if(modelids == null) {
+ modelids = DBIDUtil.newHashSet();
+ modelMap.put(model, modelids);
+ }
+ modelids.add(id);
+ }
+
+ Clustering<Model> result = new Clustering<Model>("By Model Clustering", "bymodel-clustering");
+ for(Entry<Model, ModifiableDBIDs> entry : modelMap.entrySet()) {
+ final Model model = entry.getKey();
+ final ModifiableDBIDs ids = entry.getValue();
+ final String name = (model instanceof GeneratorInterface) ? ((GeneratorInterface) model).getName() : model.toString();
+ Cluster<Model> c = new Cluster<Model>(name, ids, model);
+ if(noisepattern != null && noisepattern.matcher(name).find()) {
+ c.setNoise(true);
+ }
+ result.addCluster(c);
+ }
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.MODEL);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return logger;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ protected Pattern noisepat;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ PatternParameter noisepatP = new PatternParameter(NOISE_ID, true);
+ if(config.grab(noisepatP)) {
+ noisepat = noisepatP.getValue();
+ }
+ }
+
+ @Override
+ protected ByModelClustering makeInstance() {
+ return new ByModelClustering(noisepat);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java
index a316ce57..2e7d006d 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java
index b85f5445..c497632c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java
index 5629855c..5870a736 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
index 50365996..f0b31d32 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,9 +26,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.PriorityQueue;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -38,7 +35,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
@@ -46,6 +43,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
@@ -55,10 +54,11 @@ import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFu
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -68,7 +68,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualCons
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.FCPair;
+import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
/**
* Angle-Based Outlier Detection
@@ -177,10 +177,10 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
public OutlierResult getRanking(Relation<V> relation, int k) {
// Fix a static set of IDs
staticids = DBIDUtil.newArray(relation.getDBIDs());
- Collections.sort(staticids);
+ staticids.sort();
KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, relation, staticids);
- PriorityQueue<FCPair<Double, DBID>> pq = new PriorityQueue<FCPair<Double, DBID>>(relation.size(), Collections.reverseOrder());
+ Heap<DoubleObjPair<DBID>> pq = new Heap<DoubleObjPair<DBID>>(relation.size(), Collections.reverseOrder());
// preprocess kNN neighborhoods
assert (k == this.k);
@@ -190,7 +190,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
MeanVariance s = new MeanVariance();
// System.out.println("Processing: " +objKey);
- List<DistanceResultPair<DoubleDistance>> neighbors = knnQuery.getKNNForDBID(objKey, k);
+ KNNResult<DoubleDistance> neighbors = knnQuery.getKNNForDBID(objKey, k);
Iterator<DistanceResultPair<DoubleDistance>> iter = neighbors.iterator();
while(iter.hasNext()) {
DBID key1 = iter.next().getDBID();
@@ -214,14 +214,14 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
}
// Sample variance probably would be correct, however the numerical
// instabilities can actually break ABOD here.
- pq.add(new FCPair<Double, DBID>(s.getNaiveVariance(), objKey));
+ pq.add(new DoubleObjPair<DBID>(s.getNaiveVariance(), objKey));
}
DoubleMinMax minmaxabod = new DoubleMinMax();
- WritableDataStore<Double> abodvalues = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
- for(FCPair<Double, DBID> pair : pq) {
- abodvalues.put(pair.getSecond(), pair.getFirst());
- minmaxabod.put(pair.getFirst());
+ WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ for(DoubleObjPair<DBID> pair : pq) {
+ abodvalues.putDouble(pair.getSecond(), pair.first);
+ minmaxabod.put(pair.first);
}
// Build result representation.
Relation<Double> scoreResult = new MaterializedRelation<Double>("Angle-based Outlier Degree", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs());
@@ -240,16 +240,16 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
public OutlierResult getFastRanking(Relation<V> relation, int k, int sampleSize) {
// Fix a static set of IDs
staticids = DBIDUtil.newArray(relation.getDBIDs());
- Collections.sort(staticids);
+ staticids.sort();
KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, relation, staticids);
- PriorityQueue<FCPair<Double, DBID>> pq = new PriorityQueue<FCPair<Double, DBID>>(relation.size(), Collections.reverseOrder());
+ Heap<DoubleObjPair<DBID>> pq = new Heap<DoubleObjPair<DBID>>(relation.size(), Collections.reverseOrder());
// get Candidate Ranking
for(DBID aKey : relation.iterDBIDs()) {
HashMap<DBID, Double> dists = new HashMap<DBID, Double>(relation.size());
// determine kNearestNeighbors and pairwise distances
- PriorityQueue<FCPair<Double, DBID>> nn;
+ Heap<DoubleObjPair<DBID>> nn;
if(!useRNDSample) {
nn = calcDistsandNN(relation, kernelMatrix, sampleSize, aKey, dists);
}
@@ -269,15 +269,15 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
}
// getFilter
double var = getAbofFilter(kernelMatrix, aKey, dists, counter[1], counter[0], neighbors);
- pq.add(new FCPair<Double, DBID>(var, aKey));
+ pq.add(new DoubleObjPair<DBID>(var, aKey));
// System.out.println("prog "+(prog++));
}
// refine Candidates
- PriorityQueue<FCPair<Double, DBID>> resqueue = new PriorityQueue<FCPair<Double, DBID>>(k);
+ Heap<DoubleObjPair<DBID>> resqueue = new Heap<DoubleObjPair<DBID>>(k);
// System.out.println(pq.size() + " objects ordered into candidate list.");
// int v = 0;
while(!pq.isEmpty()) {
- if(resqueue.size() == k && pq.peek().getFirst() > resqueue.peek().getFirst()) {
+ if(resqueue.size() == k && pq.peek().first > resqueue.peek().first) {
break;
}
// double approx = pq.peek().getFirst();
@@ -313,22 +313,22 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
double var = s.getSampleVariance();
// System.out.println(aKey+ " : " + approx +" " + var);
if(resqueue.size() < k) {
- resqueue.add(new FCPair<Double, DBID>(var, aKey));
+ resqueue.add(new DoubleObjPair<DBID>(var, aKey));
}
else {
- if(resqueue.peek().getFirst() > var) {
+ if(resqueue.peek().first > var) {
resqueue.remove();
- resqueue.add(new FCPair<Double, DBID>(var, aKey));
+ resqueue.add(new DoubleObjPair<DBID>(var, aKey));
}
}
}
// System.out.println(v + " Punkte von " + data.size() + " verfeinert !!");
DoubleMinMax minmaxabod = new DoubleMinMax();
- WritableDataStore<Double> abodvalues = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
- for(FCPair<Double, DBID> pair : pq) {
- abodvalues.put(pair.getSecond(), pair.getFirst());
- minmaxabod.put(pair.getFirst());
+ WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ for(DoubleObjPair<DBID> pair : pq) {
+ abodvalues.putDouble(pair.getSecond(), pair.first);
+ minmaxabod.put(pair.first);
}
// Build result representation.
Relation<Double> scoreResult = new MaterializedRelation<Double>("Angle-based Outlier Detection", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs());
@@ -336,31 +336,6 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
return new OutlierResult(scoreMeta, scoreResult);
}
- // TODO: remove?
- @SuppressWarnings("unused")
- private double[] calcNormalization(Integer xKey, HashMap<Integer, Double> dists) {
- double[] result = new double[2];
- for(Integer yKey : dists.keySet()) {
- if(yKey.equals(xKey)) {
- continue;
- }
- for(Integer zKey : dists.keySet()) {
- if(zKey <= yKey) {
- continue;
- }
- if(zKey.equals(xKey)) {
- continue;
- }
- if(dists.get(yKey) != 0 && dists.get(zKey) != 0) {
- double sqr = Math.sqrt(dists.get(yKey) * dists.get(zKey));
- result[0] += 1 / sqr;
- result[1] += 1 / (dists.get(yKey) * dists.get(zKey) * sqr);
- }
- }
- }
- return result;
- }
-
private double[] calcFastNormalization(DBID x, HashMap<DBID, Double> dists) {
double[] result = new double[2];
@@ -439,7 +414,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
private int mapDBID(DBID aKey) {
// TODO: this is not the most efficient...
- int off = Collections.binarySearch(staticids, aKey);
+ int off = staticids.binarySearch(aKey);
if(off < 0) {
throw new AbortException("Did not find id " + aKey.toString() + " in staticids. " + staticids.contains(aKey));
}
@@ -457,33 +432,33 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
return (kernelMatrix.getDistance(ai, ai) + kernelMatrix.getDistance(bi, ci) - kernelMatrix.getDistance(ai, ci) - kernelMatrix.getDistance(ai, bi));
}
- private PriorityQueue<FCPair<Double, DBID>> calcDistsandNN(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBID aKey, HashMap<DBID, Double> dists) {
- PriorityQueue<FCPair<Double, DBID>> nn = new PriorityQueue<FCPair<Double, DBID>>(sampleSize);
+ private Heap<DoubleObjPair<DBID>> calcDistsandNN(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBID aKey, HashMap<DBID, Double> dists) {
+ Heap<DoubleObjPair<DBID>> nn = new Heap<DoubleObjPair<DBID>>(sampleSize);
for(DBID bKey : data.iterDBIDs()) {
double val = calcCos(kernelMatrix, aKey, bKey);
dists.put(bKey, val);
if(nn.size() < sampleSize) {
- nn.add(new FCPair<Double, DBID>(val, bKey));
+ nn.add(new DoubleObjPair<DBID>(val, bKey));
}
else {
- if(val < nn.peek().getFirst()) {
+ if(val < nn.peek().first) {
nn.remove();
- nn.add(new FCPair<Double, DBID>(val, bKey));
+ nn.add(new DoubleObjPair<DBID>(val, bKey));
}
}
}
return nn;
}
- private PriorityQueue<FCPair<Double, DBID>> calcDistsandRNDSample(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBID aKey, HashMap<DBID, Double> dists) {
- PriorityQueue<FCPair<Double, DBID>> nn = new PriorityQueue<FCPair<Double, DBID>>(sampleSize);
+ private Heap<DoubleObjPair<DBID>> calcDistsandRNDSample(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBID aKey, HashMap<DBID, Double> dists) {
+ Heap<DoubleObjPair<DBID>> nn = new Heap<DoubleObjPair<DBID>>(sampleSize);
int step = (int) ((double) data.size() / (double) sampleSize);
int counter = 0;
for(DBID bKey : data.iterDBIDs()) {
double val = calcCos(kernelMatrix, aKey, bKey);
dists.put(bKey, val);
if(counter % step == 0) {
- nn.add(new FCPair<Double, DBID>(val, bKey));
+ nn.add(new DoubleObjPair<DBID>(val, bKey));
}
counter++;
}
@@ -499,13 +474,13 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
public void getExplanations(Relation<V> data) {
KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, data, staticids);
// PQ for Outlier Ranking
- PriorityQueue<FCPair<Double, DBID>> pq = new PriorityQueue<FCPair<Double, DBID>>(data.size(), Collections.reverseOrder());
- HashMap<DBID, LinkedList<DBID>> explaintab = new HashMap<DBID, LinkedList<DBID>>();
+ Heap<DoubleObjPair<DBID>> pq = new Heap<DoubleObjPair<DBID>>(data.size(), Collections.reverseOrder());
+ HashMap<DBID, DBIDs> explaintab = new HashMap<DBID, DBIDs>();
// test all objects
for(DBID objKey : data.iterDBIDs()) {
MeanVariance s = new MeanVariance();
// Queue for the best explanation
- PriorityQueue<FCPair<Double, DBID>> explain = new PriorityQueue<FCPair<Double, DBID>>();
+ Heap<DoubleObjPair<DBID>> explain = new Heap<DoubleObjPair<DBID>>();
// determine Object
// for each pair of other objects
Iterator<DBID> iter = data.iterDBIDs();
@@ -529,13 +504,13 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
s2.put(tmp, 1 / sqr);
}
}
- explain.add(new FCPair<Double, DBID>(s2.getSampleVariance(), key1));
+ explain.add(new DoubleObjPair<DBID>(s2.getSampleVariance(), key1));
s.put(s2);
}
// build variance of the observed vectors
- pq.add(new FCPair<Double, DBID>(s.getSampleVariance(), objKey));
+ pq.add(new DoubleObjPair<DBID>(s.getSampleVariance(), objKey));
//
- LinkedList<DBID> expList = new LinkedList<DBID>();
+ ModifiableDBIDs expList = DBIDUtil.newArray();
expList.add(explain.remove().getSecond());
while(!explain.isEmpty()) {
DBID nextKey = explain.remove().getSecond();
@@ -564,26 +539,26 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
if(count > 10) {
break;
}
- double factor = pq.peek().getFirst();
+ double factor = pq.peek().first;
DBID key = pq.remove().getSecond();
System.out.print(data.get(key) + " ");
System.out.println(count + " Factor=" + factor + " " + key);
- LinkedList<DBID> expList = explaintab.get(key);
+ DBIDs expList = explaintab.get(key);
generateExplanation(data, key, expList);
count++;
}
System.out.println("--------------------------------------------");
}
- private void generateExplanation(Relation<V> data, DBID key, LinkedList<DBID> expList) {
- V vect1 = data.get(key);
+ private void generateExplanation(Relation<V> data, DBID key, DBIDs expList) {
+ Vector vect1 = data.get(key).getColumnVector();
Iterator<DBID> iter = expList.iterator();
while(iter.hasNext()) {
System.out.println("Outlier: " + vect1);
- V exp = data.get(iter.next());
+ Vector exp = data.get(iter.next()).getColumnVector();
System.out.println("Most common neighbor: " + exp);
// determine difference Vector
- V vals = exp.minus(vect1);
+ Vector vals = exp.minus(vect1);
System.out.println(vals);
// System.out.println(new FeatureVector(
// "Diff-"+vect1.getPrimaryKey(),vals ));
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
index 3be73ca6..994ce8e2 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,7 +31,7 @@ import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
@@ -54,8 +54,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
*
* <p>
* Reference: <br />
- * Outlier detection for high dimensional data Outlier detection for high
- * dimensional data <br />
+ * Outlier detection for high dimensional data<br />
* C.C. Aggarwal, P. S. Yu<br />
* International Conference on Management of Data Proceedings of the 2001 ACM
* SIGMOD international conference on Management of data 2001, Santa Barbara,
@@ -147,7 +146,7 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex
if(r == phi - 1) {
end = size;
}
- ArrayDBIDs currange = DBIDUtil.newArray(phi + 1);
+ ArrayModifiableDBIDs currange = DBIDUtil.newArray(phi + 1);
for(int i = start; i < end; i++) {
currange.add(axis.get(i).second);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java
index 23496389..1d77af3a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java
@@ -1,37 +1,37 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2011
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2011
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
@@ -79,10 +79,8 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
*
*/
public OutlierResult run(Database database, Relation<O> relation) throws IllegalStateException {
- DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
-
// Run the actual score process
- DataStore<Double> dbodscore = computeOutlierScores(database, distFunc, d);
+ DataStore<Double> dbodscore = computeOutlierScores(database, relation, d);
// Build result representation.
Relation<Double> scoreResult = new MaterializedRelation<Double>("Density-Based Outlier Detection", "db-outlier", TypeUtil.DOUBLE, dbodscore, relation.getDBIDs());
@@ -92,8 +90,13 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
/**
* computes an outlier score for each object of the database.
+ *
+ * @param database Database
+ * @param relation Relation
+ * @param d distance
+ * @return computed scores
*/
- protected abstract DataStore<Double> computeOutlierScores(Database database, DistanceQuery<O, D> distFunc, D d);
+ protected abstract DataStore<Double> computeOutlierScores(Database database, Relation<O> relation, D d);
@Override
public TypeInformation[] getInputTypeRestriction() {
@@ -108,8 +111,11 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
* @apiviz.exclude
*/
public static abstract class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Query radius
+ */
protected D d = null;
-
+
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
index aba5576e..5d357744 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2011
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import java.util.ArrayList;
import java.util.Arrays;
@@ -35,13 +36,13 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
@@ -65,8 +66,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
* detect outliers for high dimensional data.
* <p>
* Reference: <br />
- * Outlier detection for high dimensional data Outlier detection for high
- * dimensional data <br />
+ * Outlier detection for high dimensional data<br />
* C.C. Aggarwal, P. S. Yu <br />
* Proceedings of the 2001 ACM SIGMOD international conference on Management of
* data 2001, Santa Barbara, California, United States
@@ -147,23 +147,23 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
Collection<Individuum> individuums = (new EvolutionarySearch(relation, ranges, m, seed)).run();
- WritableDataStore<Double> outlierScore = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore outlierScore = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
for(Individuum ind : individuums) {
DBIDs ids = computeSubspaceForGene(ind.getGene(), ranges);
double sparsityC = sparsity(ids.size(), dbsize, k);
for(DBID id : ids) {
- Double prev = outlierScore.get(id);
- if(prev == null || sparsityC < prev) {
- outlierScore.put(id, sparsityC);
+ double prev = outlierScore.doubleValue(id);
+ if(Double.isNaN(prev) || sparsityC < prev) {
+ outlierScore.putDouble(id, sparsityC);
}
}
}
DoubleMinMax minmax = new DoubleMinMax();
for(DBID id : relation.iterDBIDs()) {
- Double val = outlierScore.get(id);
- if(val == null) {
- outlierScore.put(id, 0.0);
+ double val = outlierScore.doubleValue(id);
+ if(Double.isNaN(val)) {
+ outlierScore.putDouble(id, 0.0);
val = 0.0;
}
minmax.put(val);
@@ -224,9 +224,10 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
this.m = m;
this.dbsize = database.size();
this.dim = DatabaseUtil.dimensionality(database);
- if (seed != null) {
+ if(seed != null) {
this.random = new Random(seed);
- } else {
+ }
+ else {
this.random = new Random();
}
}
@@ -274,7 +275,6 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
/**
* check the termination criterion
- *
*/
private boolean checkConvergence(Collection<Individuum> pop) {
if(pop.size() == 0) {
@@ -683,15 +683,15 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
}
/**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
public static class Parameterizer<V extends NumberVector<?, ?>> extends AbstractAggarwalYuOutlier.Parameterizer {
protected int m = 0;
-
+
protected Long seed = null;
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java
index af80c264..190211c3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2011
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import java.util.ArrayList;
import java.util.Vector;
@@ -29,13 +30,13 @@ import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
@@ -55,8 +56,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
*
* <p>
* Reference: <br />
- * Outlier detection for high dimensional data Outlier detection for high
- * dimensional data <br />
+ * Outlier detection for high dimensional data<br />
* C.C. Aggarwal, P. S. Yu<br />
* International Conference on Management of Data Proceedings of the 2001 ACM
* SIGMOD international conference on Management of data 2001, Santa Barbara,
@@ -140,7 +140,7 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar
}
}
- WritableDataStore<Double> sparsity = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore sparsity = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
// calculate the sparsity coefficient
for(Vector<IntIntPair> sub : Rk) {
DBIDs ids = computeSubspace(sub, ranges);
@@ -148,18 +148,18 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar
if(sparsityC < 0) {
for(DBID id : ids) {
- Double prev = sparsity.get(id);
- if(prev == null || sparsityC < prev) {
- sparsity.put(id, sparsityC);
+ double prev = sparsity.doubleValue(id);
+ if(Double.isNaN(prev) || sparsityC < prev) {
+ sparsity.putDouble(id, sparsityC);
}
}
}
}
DoubleMinMax minmax = new DoubleMinMax();
for(DBID id : relation.iterDBIDs()) {
- Double val = sparsity.get(id);
- if(val == null) {
- sparsity.put(id, 0.0);
+ double val = sparsity.doubleValue(id);
+ if(Double.isNaN(val)) {
+ sparsity.putDouble(id, 0.0);
val = 0.0;
}
minmax.put(val);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java
index 0d5f115b..f4b0ba35 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java
@@ -1,40 +1,41 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2011
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2011
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import java.util.Iterator;
-import java.util.List;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -99,11 +100,14 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
}
@Override
- protected DataStore<Double> computeOutlierScores(Database database, DistanceQuery<O, D> distFunc, D neighborhoodSize) {
+ protected DataStore<Double> computeOutlierScores(Database database, Relation<O> relation, D neighborhoodSize) {
+ DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY);
+
// maximum number of objects in the D-neighborhood of an outlier
int m = (int) ((distFunc.getRelation().size()) * (1 - p));
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC);
if(logger.isVerbose()) {
logger.verbose("computing outlier flag");
}
@@ -112,21 +116,20 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
int counter = 0;
// if index exists, kNN query. if the distance to the mth nearest neighbor
// is more than d -> object is outlier
- KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, m, DatabaseQuery.HINT_OPTIMIZED_ONLY);
if(knnQuery != null) {
for(DBID id : distFunc.getRelation().iterDBIDs()) {
counter++;
- final List<DistanceResultPair<D>> knns = knnQuery.getKNNForDBID(id, m);
+ final KNNResult<D> knns = knnQuery.getKNNForDBID(id, m);
if(logger.isDebugging()) {
logger.debugFine("distance to mth nearest neighbour" + knns.toString());
}
if(knns.get(Math.min(m, knns.size()) - 1).getDistance().compareTo(neighborhoodSize) <= 0) {
// flag as outlier
- scores.put(id, 1.0);
+ scores.putDouble(id, 1.0);
}
else {
// flag as no outlier
- scores.put(id, 0.0);
+ scores.putDouble(id, 0.0);
}
}
if(progressOFlags != null) {
@@ -150,11 +153,11 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
if(count < m) {
// flag as outlier
- scores.put(id, 1.0);
+ scores.putDouble(id, 1.0);
}
else {
// flag as no outlier
- scores.put(id, 0.0);
+ scores.putDouble(id, 0.0);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java
index c72675f8..ec83a2a2 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java
@@ -1,35 +1,37 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2011
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2011
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -71,17 +73,18 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier<
}
@Override
- protected DataStore<Double> computeOutlierScores(Database database, DistanceQuery<O, D> distFunc, D d) {
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ protected DataStore<Double> computeOutlierScores(Database database, Relation<O> relation, D d) {
+ DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O, D> rangeQuery = database.getRangeQuery(distFunc);
final double size = distFunc.getRelation().size();
+
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC);
// TODO: use bulk when implemented.
for(DBID id : distFunc.getRelation().iterDBIDs()) {
// compute percentage of neighbors in the given neighborhood with size d
double n = (rangeQuery.getRangeForDBID(id, d).size()) / size;
- scores.put(id, 1.0 - n);
+ scores.putDouble(id, 1.0 - n);
}
- scores.toString();
return scores;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
index b1464bbb..92d92036 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -32,11 +32,11 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
@@ -89,14 +89,14 @@ public class EMOutlier<V extends NumberVector<V, ?>> extends AbstractAlgorithm<O
Clustering<EMModel<V>> emresult = emClustering.run(database, relation);
double globmax = 0.0;
- WritableDataStore<Double> emo_score = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Double.class);
+ WritableDoubleDataStore emo_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
for(DBID id : relation.iterDBIDs()) {
double maxProb = Double.POSITIVE_INFINITY;
double[] probs = emClustering.getProbClusterIGivenX(id);
for(double prob : probs) {
maxProb = Math.min(1 - prob, maxProb);
}
- emo_score.put(id, maxProb);
+ emo_score.putDouble(id, maxProb);
globmax = Math.max(maxProb, globmax);
}
Relation<Double> scoreres = new MaterializedRelation<Double>("EM outlier scores", "em-outlier", TypeUtil.DOUBLE, emo_score, relation.getDBIDs());
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
index 13f047e7..ae47c100 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,8 +28,9 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -37,7 +38,6 @@ import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
@@ -95,11 +95,11 @@ public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorit
public OutlierResult run(Relation<V> relation) throws IllegalStateException {
DoubleMinMax mm = new DoubleMinMax();
// resulting scores
- WritableDataStore<Double> oscores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Double.class);
+ WritableDoubleDataStore oscores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// Compute mean and covariance Matrix
CovarianceMatrix temp = CovarianceMatrix.make(relation);
- V mean = temp.getMeanVector(relation);
+ Vector mean = temp.getMeanVector(relation).getColumnVector();
// debugFine(mean.toString());
Matrix covarianceMatrix = temp.destroyToNaiveMatrix();
// debugFine(covarianceMatrix.toString());
@@ -110,21 +110,20 @@ public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorit
// for each object compute Mahalanobis distance
for(DBID id : relation.iterDBIDs()) {
- V x = relation.get(id);
- Vector x_minus_mean = x.minus(mean).getColumnVector();
+ Vector x = relation.get(id).getColumnVector().minusEquals(mean);
// Gaussian PDF
- final double mDist = x_minus_mean.transposeTimes(covarianceTransposed).times(x_minus_mean).get(0, 0);
+ final double mDist = x.transposeTimesTimes(covarianceTransposed, x);
final double prob = fakt * Math.exp(-mDist / 2.0);
mm.put(prob);
- oscores.put(id, prob);
+ oscores.putDouble(id, prob);
}
final OutlierScoreMeta meta;
if(invert) {
double max = mm.getMax() != 0 ? mm.getMax() : 1.;
for(DBID id : relation.iterDBIDs()) {
- oscores.put(id, (max - oscores.get(id)) / max);
+ oscores.putDouble(id, (max - oscores.doubleValue(id)) / max);
}
meta = new BasicOutlierScoreMeta(0.0, 1.0);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
index 520c3673..aa352582 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -30,19 +30,19 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.generic.MaskedDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
@@ -137,7 +137,7 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra
// Positive masked collection
DBIDs anomalousObjs = new MaskedDBIDs(objids, bits, false);
// resulting scores
- WritableDataStore<Double> oscores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Double.class);
+ WritableDoubleDataStore oscores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// compute loglikelihood
double logLike = relation.size() * logml + loglikelihoodNormal(normalObjs, relation);
// logger.debugFine("normalsize " + normalObjs.size() + " anormalsize " +
@@ -159,7 +159,7 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra
// if the loglike increases more than a threshold, object stays in
// anomalous set and is flagged as outlier
final double loglikeGain = currentLogLike - logLike;
- oscores.put(curid, loglikeGain);
+ oscores.putDouble(curid, loglikeGain);
minmax.put(loglikeGain);
if(loglikeGain > c) {
@@ -206,7 +206,7 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra
return 0;
}
double prob = 0;
- V mean = DatabaseUtil.centroid(database, objids);
+ Vector mean = DatabaseUtil.centroid(database, objids).getColumnVector();
Matrix covarianceMatrix = DatabaseUtil.covarianceMatrix(database, objids);
// test singulaere matrix
@@ -216,10 +216,8 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra
double fakt = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, DatabaseUtil.dimensionality(database)) * covarianceDet);
// for each object compute probability and sum
for(DBID id : objids) {
- V x = database.get(id);
-
- Vector x_minus_mean = x.minus(mean).getColumnVector();
- double mDist = x_minus_mean.transposeTimes(covInv).times(x_minus_mean).get(0, 0);
+ Vector x = database.get(id).getColumnVector().minusEquals(mean);
+ double mDist = x.transposeTimesTimes(covInv, x);
prob += Math.log(fakt * Math.exp(-mDist / 2.0));
}
return prob;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java
index ee4a77ba..083a72a6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -22,8 +22,6 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.List;
-
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -31,19 +29,20 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
@@ -133,9 +132,9 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
// RNNS
WritableDataStore<ModifiableDBIDs> rnns = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, ModifiableDBIDs.class);
// density
- WritableDataStore<Double> density = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Double.class);
+ WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// init knns and rnns
- for(DBID id : distFunc.getRelation().iterDBIDs()) {
+ for(DBID id : relation.iterDBIDs()) {
knns.put(id, DBIDUtil.newArray());
rnns.put(id, DBIDUtil.newArray());
}
@@ -149,13 +148,11 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
ModifiableDBIDs s;
if(!processedIDs.contains(id)) {
// TODO: use exactly k neighbors?
- List<DistanceResultPair<D>> list = knnQuery.getKNNForDBID(id, k);
- for(DistanceResultPair<D> d : list) {
- knns.get(id).add(d.getDBID());
- }
+ KNNResult<D> list = knnQuery.getKNNForDBID(id, k);
+ knns.get(id).addDBIDs(list.asDBIDs());
processedIDs.add(id);
s = knns.get(id);
- density.put(id, 1 / list.get(k - 1).getDistance().doubleValue());
+ density.putDouble(id, 1 / list.get(k - 1).getDistance().doubleValue());
}
else {
@@ -164,11 +161,9 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
for(DBID q : s) {
if(!processedIDs.contains(q)) {
// TODO: use exactly k neighbors?
- List<DistanceResultPair<D>> listQ = knnQuery.getKNNForDBID(q, k);
- for(DistanceResultPair<D> dq : listQ) {
- knns.get(q).add(dq.getDBID());
- }
- density.put(q, 1 / listQ.get(k - 1).getDistance().doubleValue());
+ KNNResult<D> listQ = knnQuery.getKNNForDBID(q, k);
+ knns.get(q).addDBIDs(listQ.asDBIDs());
+ density.putDouble(q, 1 / listQ.getKNNDistance().doubleValue());
processedIDs.add(q);
}
@@ -186,28 +181,28 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
// Calculate INFLO for any Object
// IF Object is pruned INFLO=1.0
DoubleMinMax inflominmax = new DoubleMinMax();
- WritableDataStore<Double> inflos = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
- for(DBID id : distFunc.getRelation().iterDBIDs()) {
+ WritableDoubleDataStore inflos = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ for(DBID id : relation.iterDBIDs()) {
if(!pruned.contains(id)) {
ModifiableDBIDs knn = knns.get(id);
ModifiableDBIDs rnn = rnns.get(id);
- double denP = density.get(id);
- knn.addAll(rnn);
+ double denP = density.doubleValue(id);
+ knn.addDBIDs(rnn);
double den = 0;
for(DBID q : knn) {
- double denQ = density.get(q);
+ double denQ = density.doubleValue(q);
den = den + denQ;
}
den = den / rnn.size();
den = den / denP;
- inflos.put(id, den);
+ inflos.putDouble(id, den);
// update minimum and maximum
inflominmax.put(den);
}
if(pruned.contains(id)) {
- inflos.put(id, 1.0);
+ inflos.putDouble(id, 1.0);
inflominmax.put(1.0);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java
index fa89f954..ee748f99 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -22,26 +22,24 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.List;
-
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
@@ -115,15 +113,13 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista
FiniteProgress progressKNNDistance = logger.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), logger) : null;
DoubleMinMax minmax = new DoubleMinMax();
- WritableDataStore<Double> knno_score = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore knno_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
// compute distance to the k nearest neighbor.
- for(DBID id : distanceQuery.getRelation().iterDBIDs()) {
+ for(DBID id : relation.iterDBIDs()) {
// distance to the kth nearest neighbor
- final List<DistanceResultPair<D>> knns = knnQuery.getKNNForDBID(id, k);
- final int last = Math.min(k - 1, knns.size() - 1);
-
- double dkn = knns.get(last).getDistance().doubleValue();
- knno_score.put(id, dkn);
+ final KNNResult<D> knns = knnQuery.getKNNForDBID(id, k);
+ double dkn = knns.getKNNDistance().doubleValue();
+ knno_score.putDouble(id, dkn);
minmax.put(dkn);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
index 02e0789b..e9657e12 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
@@ -1,28 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2011
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-import java.util.List;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2011
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
@@ -30,18 +29,19 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
@@ -118,20 +118,17 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac
// compute distance to the k nearest neighbor. n objects with the highest
// distance are flagged as outliers
- WritableDataStore<Double> knnw_score = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
- for(DBID id : distanceQuery.getRelation().iterDBIDs()) {
+ WritableDoubleDataStore knnw_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ for(DBID id : relation.iterDBIDs()) {
// compute sum of the distances to the k nearest neighbors
- List<DistanceResultPair<D>> knn = knnQuery.getKNNForDBID(id, k);
- D skn = knn.get(0).getDistance();
- final int last = Math.min(k + 1, knn.size());
- for(int i = 1; i < last; i++) {
- skn = skn.plus(knn.get(i).getDistance());
+ final KNNResult<D> knn = knnQuery.getKNNForDBID(id, k);
+ double skn = 0;
+ for(DistanceResultPair<D> r : knn) {
+ skn += r.getDistance().doubleValue();
}
-
- double doubleSkn = skn.getValue().doubleValue();
- knnw_score.put(id, doubleSkn);
- minmax.put(doubleSkn);
+ knnw_score.putDouble(id, skn);
+ minmax.put(skn);
if(progressKNNWeight != null) {
progressKNNWeight.incrementProcessed(logger);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java
index 04ffe8cf..d9256428 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java
@@ -1,28 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2011
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-import java.util.List;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2011
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
@@ -30,18 +29,19 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
@@ -117,7 +117,7 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
// track the maximum value for normalization
DoubleMinMax ldofminmax = new DoubleMinMax();
// compute the ldof values
- WritableDataStore<Double> ldofs = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.class);
+ WritableDoubleDataStore ldofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// compute LOF_SCORE of each db object
if(logger.isVerbose()) {
@@ -125,17 +125,17 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
}
FiniteProgress progressLDOFs = logger.isVerbose() ? new FiniteProgress("LDOF_SCORE for objects", relation.size(), logger) : null;
- for(DBID id : distFunc.getRelation().iterDBIDs()) {
- List<DistanceResultPair<D>> neighbors = knnQuery.getKNNForDBID(id, k);
+ for(DBID id : relation.iterDBIDs()) {
+ KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k);
int nsize = neighbors.size() - 1;
// skip the point itself
double dxp = 0;
double Dxp = 0;
for(DistanceResultPair<D> neighbor1 : neighbors) {
- if(neighbor1.getDBID() != id) {
+ if(!neighbor1.getDBID().equals(id)) {
dxp += neighbor1.getDistance().doubleValue();
for(DistanceResultPair<D> neighbor2 : neighbors) {
- if(neighbor1.getDBID() != neighbor2.getDBID() && neighbor2.getDBID() != id) {
+ if(!neighbor1.getDBID().equals(neighbor2.getDBID()) && !neighbor2.getDBID().equals(id)) {
Dxp += distFunc.distance(neighbor1.getDBID(), neighbor2.getDBID()).doubleValue();
}
}
@@ -147,7 +147,7 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
if(ldof.isNaN() || ldof.isInfinite()) {
ldof = 1.0;
}
- ldofs.put(id, ldof);
+ ldofs.putDouble(id, ldof);
// update maximum
ldofminmax.put(ldof);
@@ -176,11 +176,11 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
}
/**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
protected int k = 0;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java
index 57c04be3..cfd8623c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -34,17 +34,19 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableRecordStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
@@ -185,80 +187,66 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
}
// LOCI main step
FiniteProgress progressLOCI = logger.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), logger) : null;
- WritableRecordStore store = DataStoreUtil.makeRecordStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class, Double.class);
- WritableDataStore<Double> mdef_norm = store.getStorage(0, Double.class);
- WritableDataStore<Double> mdef_radius = store.getStorage(1, Double.class);
+ WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ DoubleMinMax minmax = new DoubleMinMax();
+
for(DBID id : relation.iterDBIDs()) {
+ final List<DoubleIntPair> cdist = interestingDistances.get(id);
+ final double maxdist = cdist.get(cdist.size() - 1).first;
+ final int maxneig = cdist.get(cdist.size() - 1).second;
+
double maxmdefnorm = 0.0;
double maxnormr = 0;
- List<DoubleIntPair> cdist = interestingDistances.get(id);
- double maxdist = cdist.get(cdist.size() - 1).first;
- int maxneig = cdist.get(cdist.size() - 1).second;
if(maxneig >= nmin) {
D range = distFunc.getDistanceFactory().fromDouble(maxdist);
// Compute the largest neighborhood we will need.
List<DistanceResultPair<D>> maxneighbors = rangeQuery.getRangeForDBID(id, range);
+ // Ensure the set is sorted. Should be a no-op with most indexes.
+ Collections.sort(maxneighbors);
+ // For any critical distance, compute the normalized MDEF score.
for(DoubleIntPair c : cdist) {
- double alpha_r = alpha * c.first;
- // compute n(p_i, \alpha * r) from list
- int n_alphar = 0;
- for(DoubleIntPair c2 : cdist) {
- if(c2.first <= alpha_r) {
- n_alphar = c2.second;
- }
- else {
- break;
- }
- }
- // compute \hat{n}(p_i, r, \alpha)
- double nhat_r_alpha = 0.0;
- double sigma_nhat_r_alpha = 0.0;
- // Build the sublist from maxneighbors to match the radius c.first
- List<DistanceResultPair<D>> rneighbors = null;
- for(int i = nmin; i < maxneighbors.size(); i++) {
- DistanceResultPair<D> ne = maxneighbors.get(i);
- if(ne.getDistance().doubleValue() > c.first) {
- rneighbors = maxneighbors.subList(1, i);
- break;
- }
- }
- if(rneighbors == null) {
+ // Only start when minimum size is fulfilled
+ if (c.second < nmin) {
continue;
}
- for(DistanceResultPair<D> rn : rneighbors) {
- List<DoubleIntPair> rncdist = interestingDistances.get(rn.getDBID());
- int rn_alphar = 0;
- for(DoubleIntPair c2 : rncdist) {
- if(c2.first <= alpha_r) {
- rn_alphar = c2.second;
- }
- else {
- break;
- }
+ final double r = c.first;
+ final double alpha_r = alpha * r;
+ // compute n(p_i, \alpha * r) from list (note: alpha_r is different from c!)
+ final int n_alphar = elementsAtRadius(cdist, alpha_r);
+ // compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
+ MeanVariance mv_n_r_alpha = new MeanVariance();
+ for(DistanceResultPair<D> ne : maxneighbors) {
+ // Stop at radius r
+ if(ne.getDistance().doubleValue() > r) {
+ break;
}
- nhat_r_alpha = nhat_r_alpha + rn_alphar;
- sigma_nhat_r_alpha = sigma_nhat_r_alpha + (rn_alphar * rn_alphar);
+ int rn_alphar = elementsAtRadius(interestingDistances.get(ne.getDBID()), alpha_r);
+ mv_n_r_alpha.put(rn_alphar);
}
- // finalize average and deviation
- nhat_r_alpha = nhat_r_alpha / rneighbors.size();
- sigma_nhat_r_alpha = Math.sqrt(sigma_nhat_r_alpha / rneighbors.size() - nhat_r_alpha * nhat_r_alpha);
- double mdef = 1.0 - (n_alphar / nhat_r_alpha);
- double sigmamdef = sigma_nhat_r_alpha / nhat_r_alpha;
- double mdefnorm = mdef / sigmamdef;
+ // We only use the average and standard deviation
+ final double nhat_r_alpha = mv_n_r_alpha.getMean();
+ final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev();
+
+ // Redundant divisions removed.
+ final double mdef = (nhat_r_alpha - n_alphar); // / nhat_r_alpha;
+ final double sigmamdef = sigma_nhat_r_alpha; // / nhat_r_alpha;
+ final double mdefnorm = mdef / sigmamdef;
if(mdefnorm > maxmdefnorm) {
maxmdefnorm = mdefnorm;
- maxnormr = c.first;
+ maxnormr = r;
}
}
}
else {
- // FIXME: when nmin was never fulfilled - what is the proper value then?
+ // FIXME: when nmin was not fulfilled - what is the proper value then?
maxmdefnorm = 1.0;
maxnormr = maxdist;
}
- mdef_norm.put(id, maxmdefnorm);
- mdef_radius.put(id, maxnormr);
+ mdef_norm.putDouble(id, maxmdefnorm);
+ mdef_radius.putDouble(id, maxnormr);
+ minmax.put(maxmdefnorm);
if(progressLOCI != null) {
progressLOCI.incrementProcessed(logger);
}
@@ -267,13 +255,34 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
progressLOCI.ensureCompleted(logger);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("LOCI normalized MDEF", "loci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
- // TODO: actually provide min and max?
- OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(Double.NaN, Double.NaN, 0.0, Double.POSITIVE_INFINITY, 0.0);
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.POSITIVE_INFINITY, 0.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
result.addChildResult(new MaterializedRelation<Double>("LOCI MDEF Radius", "loci-critical-radius", TypeUtil.DOUBLE, mdef_radius, relation.getDBIDs()));
return result;
}
+ /**
+ * Get the number of objects for a given radius, from the list of critical
+ * distances, storing (radius, count) pairs.
+ *
+ * @param criticalDistances
+ * @param radius
+ * @return Number of elements at the given radius
+ */
+ protected int elementsAtRadius(List<DoubleIntPair> criticalDistances, final double radius) {
+ int n_r = 0;
+ for(DoubleIntPair c2 : criticalDistances) {
+ if(c2.first > radius) {
+ break;
+ }
+ if(c2.second != Integer.MIN_VALUE) {
+ // Update
+ n_r = c2.second;
+ }
+ }
+ return n_r;
+ }
+
@Override
public TypeInformation[] getInputTypeRestriction() {
return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
index 5f5f3568..85e1aef2 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.List;
-
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
@@ -34,13 +32,14 @@ import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
@@ -170,8 +169,21 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Constructor.
+ *
+ * @param k the value of k
+ * @param distanceFunction the distance function
+ *
+ * Uses the same distance function for neighborhood computation and reachability distance (standard as in the original publication),
+ * same as {@link #LOF(int, DistanceFunction, DistanceFunction) LOF(int, distanceFunction, distanceFunction)}.
+ */
+ public LOF(int k, DistanceFunction<? super O, D> distanceFunction) {
+ this(k, distanceFunction, distanceFunction);
+ }
+
+ /**
* Performs the Generalized LOF_SCORE algorithm on the given database by
- * calling {@code #doRunInTime(Database)}.
+ * calling {@link #doRunInTime}.
*
* @param relation Data to process
*/
@@ -180,7 +192,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(relation, stepprog);
KNNQuery<O, D> kNNRefer = pair.getFirst();
KNNQuery<O, D> kNNReach = pair.getSecond();
- return doRunInTime(kNNRefer, kNNReach, stepprog).getResult();
+ return doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog).getResult();
}
/**
@@ -231,7 +243,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
* function
* @param kNNReach the kNN query w.r.t. reachability distance function
*/
- protected LOFResult<O, D> doRunInTime(KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, StepProgress stepprog) throws IllegalStateException {
+ protected LOFResult<O, D> doRunInTime(DBIDs ids, KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, StepProgress stepprog) throws IllegalStateException {
// Assert we got something
if(kNNRefer == null) {
throw new AbortException("No kNN queries supported by database for reference neighborhood distance function.");
@@ -244,14 +256,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
if(stepprog != null) {
stepprog.beginStep(2, "Computing LRDs.", logger);
}
- WritableDataStore<Double> lrds = computeLRDs(kNNReach.getRelation().getDBIDs(), kNNReach);
+ WritableDoubleDataStore lrds = computeLRDs(ids, kNNReach);
// compute LOF_SCORE of each db object
if(stepprog != null) {
stepprog.beginStep(3, "Computing LOFs.", logger);
}
- Pair<WritableDataStore<Double>, DoubleMinMax> lofsAndMax = computeLOFs(kNNRefer.getRelation().getDBIDs(), lrds, kNNRefer);
- WritableDataStore<Double> lofs = lofsAndMax.getFirst();
+ Pair<WritableDoubleDataStore, DoubleMinMax> lofsAndMax = computeLOFs(ids, lrds, kNNRefer);
+ WritableDoubleDataStore lofs = lofsAndMax.getFirst();
// track the maximum value for normalization.
DoubleMinMax lofminmax = lofsAndMax.getSecond();
@@ -260,7 +272,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, kNNRefer.getRelation().getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<Double>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
@@ -275,22 +287,22 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
* reachability distance
* @return the LRDs of the objects
*/
- protected WritableDataStore<Double> computeLRDs(DBIDs ids, KNNQuery<O, D> knnReach) {
- WritableDataStore<Double> lrds = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.class);
+ protected WritableDoubleDataStore computeLRDs(DBIDs ids, KNNQuery<O, D> knnReach) {
+ WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress lrdsProgress = logger.isVerbose() ? new FiniteProgress("LRD", ids.size(), logger) : null;
for(DBID id : ids) {
double sum = 0;
- List<DistanceResultPair<D>> neighbors = knnReach.getKNNForDBID(id, k);
+ KNNResult<D> neighbors = knnReach.getKNNForDBID(id, k);
int nsize = neighbors.size() - (objectIsInKNN ? 0 : 1);
for(DistanceResultPair<D> neighbor : neighbors) {
if(objectIsInKNN || !neighbor.getDBID().equals(id)) {
- List<DistanceResultPair<D>> neighborsNeighbors = knnReach.getKNNForDBID(neighbor.getDBID(), k);
- sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.get(neighborsNeighbors.size() - 1).getDistance().doubleValue());
+ KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor.getDBID(), k);
+ sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue());
}
}
// Avoid division by 0
- Double lrd = (sum > 0) ? nsize / sum : 0.0;
- lrds.put(id, lrd);
+ double lrd = (sum > 0) ? nsize / sum : 0.0;
+ lrds.putDouble(id, lrd);
if(lrdsProgress != null) {
lrdsProgress.incrementProcessed(logger);
}
@@ -310,17 +322,17 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
* reference distance
* @return the LOFs of the objects and the maximum LOF
*/
- protected Pair<WritableDataStore<Double>, DoubleMinMax> computeLOFs(DBIDs ids, DataStore<Double> lrds, KNNQuery<O, D> knnRefer) {
- WritableDataStore<Double> lofs = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, Double.class);
+ protected Pair<WritableDoubleDataStore, DoubleMinMax> computeLOFs(DBIDs ids, DataStore<Double> lrds, KNNQuery<O, D> knnRefer) {
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
FiniteProgress progressLOFs = logger.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), logger) : null;
for(DBID id : ids) {
double lrdp = lrds.get(id);
- final Double lof;
+ final double lof;
if(lrdp > 0) {
- List<DistanceResultPair<D>> neighbors = knnRefer.getKNNForDBID(id, k);
+ final KNNResult<D> neighbors = knnRefer.getKNNForDBID(id, k);
int nsize = neighbors.size() - (objectIsInKNN ? 0 : 1);
// skip the point itself
// neighbors.remove(0);
@@ -335,7 +347,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
else {
lof = 1.0;
}
- lofs.put(id, lof);
+ lofs.putDouble(id, lof);
// update minimum and maximum
lofminmax.put(lof);
@@ -346,7 +358,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
if(progressLOFs != null) {
progressLOFs.ensureCompleted(logger);
}
- return new Pair<WritableDataStore<Double>, DoubleMinMax>(lofs, lofminmax);
+ return new Pair<WritableDoubleDataStore, DoubleMinMax>(lofs, lofminmax);
}
@Override
@@ -399,12 +411,12 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
/**
* The LRD values of the objects.
*/
- private final WritableDataStore<Double> lrds;
+ private final WritableDoubleDataStore lrds;
/**
* The LOF values of the objects.
*/
- private final WritableDataStore<Double> lofs;
+ private final WritableDoubleDataStore lofs;
/**
* Encapsulates information generated during a run of the {@link LOF}
@@ -416,7 +428,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
* @param lrds the LRD values of the objects
* @param lofs the LOF values of the objects
*/
- public LOFResult(OutlierResult result, KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, WritableDataStore<Double> lrds, WritableDataStore<Double> lofs) {
+ public LOFResult(OutlierResult result, KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, WritableDoubleDataStore lrds, WritableDoubleDataStore lofs) {
this.result = result;
this.kNNRefer = kNNRefer;
this.kNNReach = kNNReach;
@@ -441,14 +453,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
/**
* @return the LRD values of the objects
*/
- public WritableDataStore<Double> getLrds() {
+ public WritableDoubleDataStore getLrds() {
return lrds;
}
/**
* @return the LOF values of the objects
*/
- public WritableDataStore<Double> getLofs() {
+ public WritableDoubleDataStore getLofs() {
return lofs;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java
index dfb32bb9..f1c273f6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.List;
-
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
@@ -33,12 +31,13 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
@@ -48,8 +47,8 @@ import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
-import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
@@ -226,19 +225,19 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
}
// Probabilistic distances
- WritableDataStore<Double> pdists = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.class);
+ WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
{// computing PRDs
if(stepprog != null) {
stepprog.beginStep(3, "Computing pdists", logger);
}
FiniteProgress prdsProgress = logger.isVerbose() ? new FiniteProgress("pdists", relation.size(), logger) : null;
for(DBID id : relation.iterDBIDs()) {
- List<DistanceResultPair<D>> neighbors = knnReach.getKNNForDBID(id, kreach);
+ final KNNResult<D> neighbors = knnReach.getKNNForDBID(id, kreach);
double sqsum = 0.0;
// use first kref neighbors as reference set
int ks = 0;
for(DistanceResultPair<D> neighbor : neighbors) {
- if(objectIsInKNN || neighbor.getDBID() != id) {
+ if(objectIsInKNN || !neighbor.getDBID().equals(id)) {
double d = neighbor.getDistance().doubleValue();
sqsum += d * d;
ks++;
@@ -247,15 +246,15 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
}
}
}
- Double pdist = lambda * Math.sqrt(sqsum / ks);
- pdists.put(id, pdist);
+ double pdist = lambda * Math.sqrt(sqsum / ks);
+ pdists.putDouble(id, pdist);
if(prdsProgress != null) {
prdsProgress.incrementProcessed(logger);
}
}
}
// Compute PLOF values.
- WritableDataStore<Double> plofs = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.class);
+ WritableDoubleDataStore plofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
MeanVariance mvplof = new MeanVariance();
{// compute LOOP_SCORE of each db object
if(stepprog != null) {
@@ -264,24 +263,24 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
FiniteProgress progressPLOFs = logger.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), logger) : null;
for(DBID id : relation.iterDBIDs()) {
- List<DistanceResultPair<D>> neighbors = knnComp.getKNNForDBID(id, kcomp);
+ final KNNResult<D> neighbors = knnComp.getKNNForDBID(id, kcomp);
MeanVariance mv = new MeanVariance();
// use first kref neighbors as comparison set.
int ks = 0;
for(DistanceResultPair<D> neighbor1 : neighbors) {
- if(objectIsInKNN || neighbor1.getDBID() != id) {
- mv.put(pdists.get(neighbor1.getDBID()));
+ if(objectIsInKNN || !neighbor1.getDBID().equals(id)) {
+ mv.put(pdists.doubleValue(neighbor1.getDBID()));
ks++;
if(ks >= kcomp) {
break;
}
}
}
- double plof = Math.max(pdists.get(id) / mv.getMean(), 1.0);
+ double plof = Math.max(pdists.doubleValue(id) / mv.getMean(), 1.0);
if(Double.isNaN(plof) || Double.isInfinite(plof)) {
plof = 1.0;
}
- plofs.put(id, plof);
+ plofs.putDouble(id, plof);
mvplof.put((plof - 1.0) * (plof - 1.0));
if(progressPLOFs != null) {
@@ -296,7 +295,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
}
// Compute final LoOP values.
- WritableDataStore<Double> loops = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore loops = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
{// compute LOOP_SCORE of each db object
if(stepprog != null) {
stepprog.beginStep(5, "Computing LoOP scores", logger);
@@ -304,7 +303,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
FiniteProgress progressLOOPs = logger.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), logger) : null;
for(DBID id : relation.iterDBIDs()) {
- loops.put(id, MathUtil.erf((plofs.get(id) - 1) / (nplof * sqrt2)));
+ loops.putDouble(id, NormalDistribution.erf((plofs.doubleValue(id) - 1) / (nplof * sqrt2)));
if(progressLOOPs != null) {
progressLOOPs.incrementProcessed(logger);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
index 369db4d3..2f120c44 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -33,11 +33,13 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
@@ -111,30 +113,31 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
RangeQuery<O, D> rangeQuery = database.getRangeQuery(distQuery);
DBIDs ids = relation.getDBIDs();
- WritableDataStore<List<DistanceResultPair<D>>> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, List.class);
- WritableDataStore<Double> coreDistance = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.class);
+ // FIXME: implicit preprocessor.
+ WritableDataStore<KNNResult<D>> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNResult.class);
+ WritableDoubleDataStore coreDistance = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
WritableDataStore<Integer> minPtsNeighborhoodSize = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Integer.class);
// Pass 1
// N_minpts(id) and core-distance(id)
for(DBID id : relation.iterDBIDs()) {
- List<DistanceResultPair<D>> minptsNeighbours = knnQuery.getKNNForDBID(id, minpts);
- D d = minptsNeighbours.get(minptsNeighbours.size() - 1).getDistance();
+ KNNResult<D> minptsNeighbours = knnQuery.getKNNForDBID(id, minpts);
+ D d = minptsNeighbours.getKNNDistance();
nMinPts.put(id, minptsNeighbours);
- coreDistance.put(id, d.doubleValue());
+ coreDistance.putDouble(id, d.doubleValue());
minPtsNeighborhoodSize.put(id, rangeQuery.getRangeForDBID(id, d).size());
}
// Pass 2
WritableDataStore<List<Double>> reachDistance = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, List.class);
- WritableDataStore<Double> lrds = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.class);
+ WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
for(DBID id : relation.iterDBIDs()) {
List<Double> core = new ArrayList<Double>();
double lrd = 0;
for(DistanceResultPair<D> neighPair : nMinPts.get(id)) {
DBID idN = neighPair.getDBID();
- double coreDist = coreDistance.get(idN);
+ double coreDist = coreDistance.doubleValue(idN);
double dist = distQuery.distance(id, idN).doubleValue();
Double rd = Math.max(coreDist, dist);
lrd = rd + lrd;
@@ -142,22 +145,22 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
}
lrd = (minPtsNeighborhoodSize.get(id) / lrd);
reachDistance.put(id, core);
- lrds.put(id, lrd);
+ lrds.putDouble(id, lrd);
}
// Pass 3
DoubleMinMax ofminmax = new DoubleMinMax();
- WritableDataStore<Double> ofs = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore ofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
for(DBID id : relation.iterDBIDs()) {
double of = 0;
for(DistanceResultPair<D> pair : nMinPts.get(id)) {
DBID idN = pair.getDBID();
- double lrd = lrds.get(id);
- double lrdN = lrds.get(idN);
+ double lrd = lrds.doubleValue(id);
+ double lrdN = lrds.doubleValue(idN);
of = of + lrdN / lrd;
}
of = of / minPtsNeighborhoodSize.get(id);
- ofs.put(id, of);
+ ofs.putDouble(id, of);
// update minimum and maximum
ofminmax.put(of);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OUTRES.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OUTRES.java
new file mode 100644
index 00000000..912f878a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OUTRES.java
@@ -0,0 +1,368 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.math.statistics.EpanechnikovKernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution;
+import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
+/**
+ * Adaptive outlierness for subspace outlier ranking (OUTRES).
+ *
+ * Note: this algorithm seems to have a O(n^3) complexity without appropriate
+ * index structures to accelerate range queries: each object in each tested
+ * subspace will need to know the mean and standard deviation of the density of
+ * the neighbors, which in turn needs another range query.
+ *
+ * Reference:
+ * <p>
+ * E. Müller, M. Schiffer, T. Seidl<br />
+ * Adaptive outlierness for subspace outlier ranking<br />
+ * in: Proc. 19th ACM International Conference on Information and knowledge
+ * management
+ * </p>
+ *
+ * @author Pleintinger Viktoria
+ * @author Erich Schubert
+ */
+@Reference(authors = "E. Müller, M. Schiffer, T. Seidl", title = "Adaptive outlierness for subspace outlier ranking", booktitle = "Proc. 19th ACM International Conference on Information and knowledge management")
+public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging logger = Logging.getLogger(OUTRES.class);
+
+ /**
+ * The epsilon (in 2d) parameter
+ */
+ private final double eps;
+
+ /**
+ * Constant for Kolmogorov-Smirnov at alpha=0.01 (table value)
+ */
+ private static final double K_S_CRITICAL001 = 1.63;
+
+ /**
+ * Constructor.
+ *
+ * @param eps Epsilon
+ */
+ public OUTRES(double eps) {
+ super();
+ this.eps = eps;
+ }
+
+ /**
+ * Main loop for OUTRES
+ *
+ * @param relation Relation to process
+ * @return Outlier detection result
+ */
+ public OutlierResult run(Relation<V> relation) {
+ WritableDoubleDataStore ranks = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ DoubleMinMax minmax = new DoubleMinMax();
+
+ KernelDensityEstimator kernel = new KernelDensityEstimator(relation);
+ BitSet subspace = new BitSet(kernel.dim);
+
+ FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("OutRank scores", relation.size(), logger) : null;
+
+ for(DBID object : relation.iterDBIDs()) {
+ subspace.clear();
+ double score = outresScore(0, subspace, object, kernel);
+ ranks.putDouble(object, score);
+ minmax.put(score);
+ if(progress != null) {
+ progress.incrementProcessed(logger);
+ }
+ }
+ if(progress != null) {
+ progress.ensureCompleted(logger);
+ }
+
+ OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., 1., 1.);
+ OutlierResult outresResult = new OutlierResult(meta, new MaterializedRelation<Double>("OUTRES", "outres-score", TypeUtil.DOUBLE, ranks, relation.getDBIDs()));
+ return outresResult;
+ }
+
+ /**
+ * Main loop of OUTRES. Run for each object
+ *
+ * @param s start dimension
+ * @param subspace Current subspace
+ * @param id Current object ID
+ * @param kernel Kernel
+ * @return Score
+ */
+ public double outresScore(final int s, BitSet subspace, DBID id, KernelDensityEstimator kernel) {
+ double score = 1.0; // Initial score is 1.0
+
+ for(int i = s; i < kernel.dim; i++) {
+ if(subspace.get(i)) { // TODO: needed? Or should we always start with i=0?
+ continue;
+ }
+ subspace.set(i);
+ final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace);
+ final DoubleDistance range = new DoubleDistance(kernel.adjustedEps(kernel.dim));
+ RangeQuery<V, DoubleDistance> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);
+
+ List<DistanceResultPair<DoubleDistance>> neigh = rq.getRangeForDBID(id, range);
+ if(neigh.size() > 2) {
+ // Relevance test
+ if(relevantSubspace(subspace, neigh, kernel)) {
+ final double density = kernel.subspaceDensity(subspace, neigh);
+ final double deviation;
+ // Compute mean and standard deviation for densities of neighbors.
+ MeanVariance meanv = new MeanVariance();
+ for(DistanceResultPair<DoubleDistance> pair : neigh) {
+ List<DistanceResultPair<DoubleDistance>> n2 = rq.getRangeForDBID(pair.getDBID(), range);
+ meanv.put(kernel.subspaceDensity(subspace, n2));
+ }
+ deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev());
+ // High deviation:
+ if(deviation >= 1) {
+ score *= (density / deviation);
+ }
+ // Recursion
+ score *= outresScore(i + 1, subspace, id, kernel);
+ }
+ }
+ subspace.clear(i);
+ }
+ return score;
+ }
+
+ /**
+ *
+ * @param test: subspace that will be tested about scattering
+ * @return if the subspace is scattered return will be 0, else 1
+ */
+ protected boolean relevantSubspace(BitSet subspace, List<DistanceResultPair<DoubleDistance>> neigh, KernelDensityEstimator kernel) {
+ Relation<V> relation = kernel.relation;
+ final double crit = K_S_CRITICAL001 / Math.sqrt(neigh.size());
+
+ for(int dim = subspace.nextSetBit(0); dim > 0; dim = subspace.nextSetBit(dim + 1)) {
+ // TODO: can we save this copy somehow?
+ double[] data = new double[neigh.size()];
+ {
+ int count = 0;
+ for(DistanceResultPair<DoubleDistance> object : neigh) {
+ V vector = relation.get(object.getDBID());
+ data[count] = vector.doubleValue(dim + 1);
+ count++;
+ }
+ assert (count == neigh.size());
+ }
+ Arrays.sort(data);
+
+ final double norm = data[data.length - 1] - data[0];
+ final double min = data[0];
+
+ // Kolmogorow-Smirnow-Test against uniform distribution:
+ for(int j = 1; j < data.length - 2; j++) {
+ double delta = (j / (data.length - 1)) - ((data[j] - min) / norm);
+ if(Math.abs(delta) > crit) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Kernel density estimation and utility class.
+ *
+ * @author Erich Schubert
+ */
+ protected class KernelDensityEstimator {
+ /**
+ * Actual kernel in use
+ */
+ final KernelDensityFunction kernel = EpanechnikovKernelDensityFunction.KERNEL;
+
+ /**
+ * Relation to retrieve data from
+ */
+ final Relation<V> relation;
+
+ /**
+ * Epsilon values for different subspace dimensionalities
+ */
+ final double[] epsilons;
+
+ /**
+ * Optimal bandwidth for a dimensionality of 2
+ */
+ final double hopttwo;
+
+ /**
+ * Dimensionality of data set
+ */
+ final int dim;
+
+ /**
+ * Constructor.
+ *
+ * @param relation Relation to apply to
+ */
+ public KernelDensityEstimator(Relation<V> relation) {
+ super();
+ this.relation = relation;
+ dim = DatabaseUtil.dimensionality(relation);
+ hopttwo = optimalBandwidth(2);
+ epsilons = new double[dim + 1];
+ Arrays.fill(epsilons, Double.NEGATIVE_INFINITY);
+ epsilons[2] = OUTRES.this.eps;
+ }
+
+ /**
+ * Compute density in the given subspace.
+ *
+ * @param subspace Subspace
+ * @param neighbours Neighbor distance list
+ * @return Density
+ */
+ protected double subspaceDensity(BitSet subspace, List<DistanceResultPair<DoubleDistance>> neighbours) {
+ final double bandwidth = optimalBandwidth(subspace.cardinality());
+
+ // TODO: optimize by moving instanceof outside?
+ double density = 0;
+ for(DistanceResultPair<DoubleDistance> pair : neighbours) {
+ if(pair instanceof DoubleDistanceResultPair) {
+ density += kernel.density(((DoubleDistanceResultPair) pair).getDoubleDistance() / bandwidth);
+ }
+ else {
+ density += kernel.density(pair.getDistance().doubleValue() / bandwidth);
+ }
+ }
+
+ return density / relation.size();
+ }
+
+ /**
+ * Compute optimal kernel bandwidth
+ *
+ * @param dim Dimensionality of subspace
+ * @return optimal bandwidth
+ */
+ protected double optimalBandwidth(int dim) {
+ // Pi in the publication is redundant and cancels out!
+ double hopt = 8 * Math.exp(GammaDistribution.logGamma(dim / 2.0 + 1)) * (dim + 4) * Math.pow(2, dim);
+ return hopt * Math.pow(relation.size(), (-1 / (dim + 4)));
+ }
+
+ /**
+ * Rescale the query radius based on the given dimensionality.
+ *
+ * @param dim Dimensionality
+ * @return Query radius
+ */
+ protected double adjustedEps(int dim) {
+ // Cached
+ double e = epsilons[dim];
+ if(e < 0) {
+ e = epsilons[2] * optimalBandwidth(dim) / hopttwo;
+ epsilons[dim] = e;
+ }
+ return e;
+ }
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return logger;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Viktoria Pleintinger
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O extends NumberVector<O, ?>> extends AbstractParameterizer {
+ /**
+ * Option ID for Epsilon parameter
+ */
+ public static final OptionID D_ID = OptionID.getOrCreateOptionID("outres.epsilon", "Range value for OUTRES in 2 dimensions.");
+
+ /**
+ * Query radius
+ */
+ protected double eps;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final DoubleParameter param = new DoubleParameter(D_ID);
+ if(config.grab(param)) {
+ eps = param.getValue();
+ }
+ }
+
+ @Override
+ protected OUTRES<O> makeInstance() {
+ return new OUTRES<O>(eps);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java
index a5115fdf..ad17398c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,7 +26,7 @@ import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
@@ -37,6 +37,7 @@ import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -101,14 +102,14 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
RKNNQuery<O, D> rkNNRefer = queries.getSecond().getFirst();
RKNNQuery<O, D> rkNNReach = queries.getSecond().getSecond();
- LOFResult<O, D> lofResult = super.doRunInTime(kNNRefer, kNNReach, stepprog);
+ LOFResult<O, D> lofResult = super.doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog);
lofResult.setRkNNRefer(rkNNRefer);
lofResult.setRkNNReach(rkNNReach);
// add listener
KNNListener l = new LOFKNNListener(lofResult);
- ((MaterializeKNNPreprocessor<O, D>)((PreprocessorKNNQuery<O, D>) lofResult.getKNNRefer()).getPreprocessor()).addKNNListener(l);
- ((MaterializeKNNPreprocessor<O, D>)((PreprocessorKNNQuery<O, D>) lofResult.getKNNReach()).getPreprocessor()).addKNNListener(l);
+ ((MaterializeKNNPreprocessor<O, D>)((PreprocessorKNNQuery<O, D, ? extends KNNResult<D>>) lofResult.getKNNRefer()).getPreprocessor()).addKNNListener(l);
+ ((MaterializeKNNPreprocessor<O, D>)((PreprocessorKNNQuery<O, D, ? extends KNNResult<D>>) lofResult.getKNNReach()).getPreprocessor()).addKNNListener(l);
return lofResult.getResult();
}
@@ -194,8 +195,8 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
@Override
public void kNNsChanged(KNNChangeEvent e) {
- AbstractMaterializeKNNPreprocessor<O, D> p1 = ((PreprocessorKNNQuery<O, D>) lofResult.getKNNRefer()).getPreprocessor();
- AbstractMaterializeKNNPreprocessor<O, D> p2 = ((PreprocessorKNNQuery<O, D>) lofResult.getKNNReach()).getPreprocessor();
+ AbstractMaterializeKNNPreprocessor<O, D, ?> p1 = ((PreprocessorKNNQuery<O, D, ?>) lofResult.getKNNRefer()).getPreprocessor();
+ AbstractMaterializeKNNPreprocessor<O, D, ?> p2 = ((PreprocessorKNNQuery<O, D, ?>) lofResult.getKNNReach()).getPreprocessor();
if(firstEventReceived == null) {
if(e.getSource().equals(p1) && e.getSource().equals(p2)) {
@@ -266,13 +267,13 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(DBIDUtil.union(insertions, updates2));
List<List<DistanceResultPair<D>>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
- ArrayDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
- WritableDataStore<Double> new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
+ ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
+ WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
for(DBID id : affected_lrd_id_candidates) {
- Double new_lrd = new_lrds.get(id);
- Double old_lrd = lofResult.getLrds().get(id);
- if(old_lrd == null || !old_lrd.equals(new_lrd)) {
- lofResult.getLrds().put(id, new_lrd);
+ double new_lrd = new_lrds.doubleValue(id);
+ double old_lrd = lofResult.getLrds().doubleValue(id);
+ if(Double.isNaN(old_lrd) || old_lrd != new_lrd) {
+ lofResult.getLrds().putDouble(id, new_lrd);
affected_lrd_ids.add(id);
}
}
@@ -325,13 +326,13 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(updates2);
List<List<DistanceResultPair<D>>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
- ArrayDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
- WritableDataStore<Double> new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
+ ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
+ WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
for(DBID id : affected_lrd_id_candidates) {
- Double new_lrd = new_lrds.get(id);
- Double old_lrd = lofResult.getLrds().get(id);
- if(!old_lrd.equals(new_lrd)) {
- lofResult.getLrds().put(id, new_lrd);
+ double new_lrd = new_lrds.doubleValue(id);
+ double old_lrd = lofResult.getLrds().doubleValue(id);
+ if(old_lrd != new_lrd) {
+ lofResult.getLrds().putDouble(id, new_lrd);
affected_lrd_ids.add(id);
}
}
@@ -364,7 +365,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
* ids
*/
private ArrayModifiableDBIDs mergeIDs(List<List<DistanceResultPair<D>>> queryResults, DBIDs... ids) {
- ModifiableDBIDs result = DBIDUtil.newTreeSet();
+ ModifiableDBIDs result = DBIDUtil.newHashSet();
for(DBIDs dbids : ids) {
result.addDBIDs(dbids);
}
@@ -383,10 +384,10 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
* @param lofResult the result of the former LOF run
*/
private void recomputeLOFs(DBIDs ids, LOFResult<O, D> lofResult) {
- Pair<WritableDataStore<Double>, DoubleMinMax> lofsAndMax = computeLOFs(ids, lofResult.getLrds(), lofResult.getKNNRefer());
- WritableDataStore<Double> new_lofs = lofsAndMax.getFirst();
+ Pair<WritableDoubleDataStore, DoubleMinMax> lofsAndMax = computeLOFs(ids, lofResult.getLrds(), lofResult.getKNNRefer());
+ WritableDoubleDataStore new_lofs = lofsAndMax.getFirst();
for(DBID id : ids) {
- lofResult.getLofs().put(id, new_lofs.get(id));
+ lofResult.getLofs().putDouble(id, new_lofs.doubleValue(id));
}
// track the maximum value for normalization.
DoubleMinMax new_lofminmax = lofsAndMax.getSecond();
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
index fb0a89d5..2b122183 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
index e7895bad..befd03ed 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2011
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2011
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import java.util.ArrayList;
import java.util.Collection;
@@ -35,17 +36,18 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.math.Mean;
import de.lmu.ifi.dbs.elki.result.ReferencePointsResult;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
@@ -53,6 +55,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -144,45 +147,51 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
DBIDs ids = relation.getDBIDs();
// storage of distance/score values.
- WritableDataStore<Double> rbod_score = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, Double.class);
- // compute density for one reference point, to initialize the first density
- // value for each object
+ WritableDoubleDataStore rbod_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_HOT);
- double density = 0;
- V firstRef = refPoints.iterator().next();
- // compute distance vector for the first reference point
- List<DistanceResultPair<D>> firstReferenceDists = computeDistanceVector(firstRef, relation, distFunc);
- // order ascending
- Collections.sort(firstReferenceDists);
- for(int l = 0; l < firstReferenceDists.size(); l++) {
- density = computeDensity(firstReferenceDists, l);
- rbod_score.put(firstReferenceDists.get(l).getDBID(), density);
- }
- // compute density values for all remaining reference points
- for(V refPoint : refPoints) {
- List<DistanceResultPair<D>> referenceDists = computeDistanceVector(refPoint, relation, distFunc);
- // order ascending
- Collections.sort(referenceDists);
- // compute density value for each object
- for(int l = 0; l < referenceDists.size(); l++) {
- density = computeDensity(referenceDists, l);
- if(density < rbod_score.get(referenceDists.get(l).getDBID())) {
- rbod_score.put(referenceDists.get(l).getDBID(), density);
+ // Compute density estimation:
+ {
+ // compute density for one reference point, to initialize the first
+ // density
+ // value for each object, then update
+ final Iterator<V> iter = refPoints.iterator();
+ if(!iter.hasNext()) {
+ throw new AbortException("Cannot compute ROS without reference points!");
+ }
+ V firstRef = iter.next();
+ // compute distance vector for the first reference point
+ List<DistanceResultPair<D>> firstReferenceDists = computeDistanceVector(firstRef, relation, distFunc);
+ for(int l = 0; l < firstReferenceDists.size(); l++) {
+ double density = computeDensity(firstReferenceDists, l);
+ // Initial value
+ rbod_score.putDouble(firstReferenceDists.get(l).getDBID(), density);
+ }
+ // compute density values for all remaining reference points
+ while(iter.hasNext()) {
+ V refPoint = iter.next();
+ List<DistanceResultPair<D>> referenceDists = computeDistanceVector(refPoint, relation, distFunc);
+ // compute density value for each object
+ for(int l = 0; l < referenceDists.size(); l++) {
+ double density = computeDensity(referenceDists, l);
+ // Update minimum
+ if(density < rbod_score.doubleValue(referenceDists.get(l).getDBID())) {
+ rbod_score.putDouble(referenceDists.get(l).getDBID(), density);
+ }
}
}
}
// compute maximum density
double maxDensity = 0.0;
for(DBID id : relation.iterDBIDs()) {
- double dens = rbod_score.get(id);
+ double dens = rbod_score.doubleValue(id);
if(dens > maxDensity) {
maxDensity = dens;
}
}
- // compute REFOD_SCORE
+ // compute ROS
for(DBID id : relation.iterDBIDs()) {
- double score = 1 - (rbod_score.get(id) / maxDensity);
- rbod_score.put(id, score);
+ double score = 1 - (rbod_score.doubleValue(id) / maxDensity);
+ rbod_score.putDouble(id, score);
}
// adds reference points to the result. header information for the
@@ -207,13 +216,13 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
* database object and the object id
*/
protected List<DistanceResultPair<D>> computeDistanceVector(V refPoint, Relation<V> database, DistanceQuery<V, D> distFunc) {
+ // TODO: optimize for double distances?
List<DistanceResultPair<D>> referenceDists = new ArrayList<DistanceResultPair<D>>(database.size());
- int counter = 0;
- for(Iterator<DBID> iter = database.iterDBIDs(); iter.hasNext(); counter++) {
- DBID id = iter.next();
- DistanceResultPair<D> referenceDist = new GenericDistanceResultPair<D>(distFunc.distance(id, refPoint), id);
- referenceDists.add(counter, referenceDist);
+ for(DBID id : database.iterDBIDs()) {
+ final D distance = distFunc.distance(id, refPoint);
+ referenceDists.add(new GenericDistanceResultPair<D>(distance, id));
}
+ Collections.sort(referenceDists);
return referenceDists;
}
@@ -230,51 +239,53 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
* @return density for one object and reference point
*/
protected double computeDensity(List<DistanceResultPair<D>> referenceDists, int index) {
- double density = 0.0;
- DistanceResultPair<D> x = referenceDists.get(index);
- double xDist = x.getDistance().doubleValue();
+ final DistanceResultPair<D> x = referenceDists.get(index);
+ final double xDist = x.getDistance().doubleValue();
- int j = 0;
- int n = index - 1;
- int m = index + 1;
- while(j < k) {
- double mdist = 0;
- double ndist = 0;
- if(n >= 0) {
- ndist = referenceDists.get(n).getDistance().doubleValue();
- if(m < referenceDists.size()) {
- mdist = referenceDists.get(m).getDistance().doubleValue();
- if(Math.abs(ndist - xDist) < Math.abs(mdist - xDist)) {
- density += Math.abs(ndist - xDist);
- n--;
- j++;
- }
- else {
- density += Math.abs(mdist - xDist);
- m++;
- j++;
- }
+ int lef = index - 1;
+ int rig = index + 1;
+ Mean mean = new Mean();
+ double lef_d = (lef >= 0) ? referenceDists.get(lef).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
+ double rig_d = (rig < referenceDists.size()) ? referenceDists.get(rig).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
+ while(mean.getCount() < k) {
+ if(lef >= 0 && rig < referenceDists.size()) {
+ // Prefer n or m?
+ if(Math.abs(lef_d - xDist) < Math.abs(rig_d - xDist)) {
+ mean.put(Math.abs(lef_d - xDist));
+ // Update n
+ lef--;
+ lef_d = (lef >= 0) ? referenceDists.get(lef).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
}
else {
- density += Math.abs(ndist - xDist);
- n--;
- j++;
+ mean.put(Math.abs(rig_d - xDist));
+ // Update right
+ rig++;
+ rig_d = (rig < referenceDists.size()) ? referenceDists.get(rig).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
}
}
- else if(m < referenceDists.size()) {
- mdist = referenceDists.get(m).getDistance().doubleValue();
- density += Math.abs(mdist - xDist);
- m++;
- j++;
- }
else {
- throw new IndexOutOfBoundsException();
+ if(lef >= 0) {
+ // Choose left, since right is not available.
+ mean.put(Math.abs(lef_d - xDist));
+ // update left
+ lef--;
+ lef_d = (lef >= 0) ? referenceDists.get(lef).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
+ }
+ else if(rig < referenceDists.size()) {
+ // Choose right, since left is not available
+ mean.put(Math.abs(rig_d - xDist));
+ // Update right
+ rig++;
+ rig_d = (rig < referenceDists.size()) ? referenceDists.get(rig).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
+ }
+ else {
+ // Not enough objects in database?
+ throw new IndexOutOfBoundsException();
+ }
}
}
- double densityDegree = 1.0 / ((1.0 / k) * density);
-
- return densityDegree;
+ return 1.0 / mean.getMean();
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SOD.java
index 5e7184a3..a09bbcfd 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SOD.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SOD.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -35,16 +35,17 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionsSelectingEuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.IntegerDistance;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.SharedNearestNeighborSimilarityFunction;
+import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -54,10 +55,9 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.textwriter.TextWriteable;
import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream;
-import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNList;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TiedTopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -69,6 +69,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstrain
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
/**
* @author Arthur Zimek
@@ -82,7 +84,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@Title("SOD: Subspace outlier degree")
@Description("Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data")
@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data", booktitle = "Proceedings of the 13th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD), Bangkok, Thailand, 2009", url = "http://dx.doi.org/10.1007/978-3-642-01307-2")
-public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -93,7 +95,7 @@ public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlier
* considered for learning the subspace properties., must be an integer
* greater than 0.
*/
- public static final OptionID KNN_ID = OptionID.getOrCreateOptionID("sod.knn", "The number of shared nearest neighbors to be considered for learning the subspace properties.");
+ public static final OptionID KNN_ID = OptionID.getOrCreateOptionID("sod.knn", "The number of most snn-similar objects to use as reference set for learning the subspace properties.");
/**
* Parameter to indicate the multiplier for the discriminance value for
@@ -102,6 +104,11 @@ public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlier
public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("sod.alpha", "The multiplier for the discriminance value for discerning small from large variances.");
/**
+ * Parameter for the similarity function.
+ */
+ public static final OptionID SIM_ID = OptionID.getOrCreateOptionID("sod.similarity", "The similarity function used for the neighborhood set.");
+
+ /**
* Holds the value of {@link #KNN_ID}.
*/
private int knn;
@@ -112,9 +119,9 @@ public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlier
private double alpha;
/**
- * The similarity function.
+ * The similarity function {@link #SIM_ID}.
*/
- private SharedNearestNeighborSimilarityFunction<V> similarityFunction;
+ private SimilarityFunction<V, D> similarityFunction;
/**
* Constructor with parameters.
@@ -123,7 +130,7 @@ public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlier
* @param alpha Alpha parameter
* @param similarityFunction Shared nearest neighbor similarity function
*/
- public SOD(int knn, double alpha, SharedNearestNeighborSimilarityFunction<V> similarityFunction) {
+ public SOD(int knn, double alpha, SimilarityFunction<V, D> similarityFunction) {
super();
this.knn = knn;
this.alpha = alpha;
@@ -136,7 +143,7 @@ public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlier
* @param relation Data relation to process
*/
public OutlierResult run(Relation<V> relation) throws IllegalStateException {
- SimilarityQuery<V, IntegerDistance> snnInstance = similarityFunction.instantiate(relation);
+ SimilarityQuery<V, D> snnInstance = similarityFunction.instantiate(relation);
FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), logger) : null;
WritableDataStore<SODModel<?>> sod_models = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, SODModel.class);
DoubleMinMax minmax = new DoubleMinMax();
@@ -145,7 +152,7 @@ public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlier
if(progress != null) {
progress.incrementProcessed(logger);
}
- DBIDs knnList = getKNN(relation, snnInstance, queryObject).asDBIDs();
+ DBIDs knnList = getNearestNeighbors(relation, snnInstance, queryObject);
SODModel<V> model = new SODModel<V>(relation, knnList, alpha, relation.get(queryObject));
sod_models.put(queryObject, model);
minmax.put(model.getSod());
@@ -168,23 +175,30 @@ public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlier
* <p/>
* The query object is excluded from the knn list.
*
- * @param database the database holding the objects
- * @param snnInstance similarity function
+ * @param relation the database holding the objects
+ * @param simQ similarity function
* @param queryObject the query object for which the kNNs should be determined
* @return the k nearest neighbors in terms of the shared nearest neighbor
* distance without the query object
*/
- private KNNList<DoubleDistance> getKNN(Relation<V> database, SimilarityQuery<V, IntegerDistance> snnInstance, DBID queryObject) {
+ private DBIDs getNearestNeighbors(Relation<V> relation, SimilarityQuery<V, D> simQ, DBID queryObject) {
// similarityFunction.getPreprocessor().getParameters();
- KNNHeap<DoubleDistance> kNearestNeighbors = new KNNHeap<DoubleDistance>(knn, new DoubleDistance(Double.POSITIVE_INFINITY));
- for(Iterator<DBID> iter = database.iterDBIDs(); iter.hasNext();) {
- DBID id = iter.next();
+ Heap<DoubleObjPair<DBID>> nearestNeighbors = new TiedTopBoundedHeap<DoubleObjPair<DBID>>(knn);
+ for(DBID id : relation.iterDBIDs()) {
if(!id.equals(queryObject)) {
- double distance = 1.0 / snnInstance.similarity(queryObject, id).doubleValue();
- kNearestNeighbors.add(new DoubleDistanceResultPair(distance, id));
+ double sim = simQ.similarity(queryObject, id).doubleValue();
+ if(sim > 0) {
+ nearestNeighbors.add(new DoubleObjPair<DBID>(sim, id));
+ }
}
}
- return kNearestNeighbors.toKNNList();
+ // Collect DBIDs
+ ArrayModifiableDBIDs dbids = DBIDUtil.newArray(nearestNeighbors.size());
+ while(nearestNeighbors.size() > 0) {
+ final DoubleObjPair<DBID> next = nearestNeighbors.poll();
+ dbids.add(next.second);
+ }
+ return dbids;
}
@Override
@@ -201,13 +215,13 @@ public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlier
*
*
* @author Arthur Zimek
- * @param <O> the type of DatabaseObjects handled by this Result
+ * @param <V> the type of DatabaseObjects handled by this Result
*/
// TODO: arthur comment
- public static class SODModel<O extends NumberVector<O, ?>> implements TextWriteable, Comparable<SODModel<?>> {
+ public static class SODModel<V extends NumberVector<V, ?>> implements TextWriteable, Comparable<SODModel<?>> {
private double[] centerValues;
- private O center;
+ private V center;
private double[] variances;
@@ -220,61 +234,71 @@ public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlier
/**
* Initialize SOD Model
*
- * @param database Database
+ * @param relation Database
* @param neighborhood Neighborhood
* @param alpha Alpha value
* @param queryObject Query object
*/
- public SODModel(Relation<O> database, DBIDs neighborhood, double alpha, O queryObject) {
- // TODO: store database link?
- centerValues = new double[DatabaseUtil.dimensionality(database)];
- variances = new double[centerValues.length];
- for(DBID id : neighborhood) {
- O databaseObject = database.get(id);
- for(int d = 0; d < centerValues.length; d++) {
- centerValues[d] += databaseObject.doubleValue(d + 1);
+ public SODModel(Relation<V> relation, DBIDs neighborhood, double alpha, V queryObject) {
+ if(neighborhood.size() > 0) {
+ // TODO: store database link?
+ centerValues = new double[DatabaseUtil.dimensionality(relation)];
+ variances = new double[centerValues.length];
+ for(DBID id : neighborhood) {
+ V databaseObject = relation.get(id);
+ for(int d = 0; d < centerValues.length; d++) {
+ centerValues[d] += databaseObject.doubleValue(d + 1);
+ }
}
- }
- for(int d = 0; d < centerValues.length; d++) {
- centerValues[d] /= neighborhood.size();
- }
- for(DBID id : neighborhood) {
- O databaseObject = database.get(id);
for(int d = 0; d < centerValues.length; d++) {
- // distance
- double distance = centerValues[d] - databaseObject.doubleValue(d + 1);
- // variance
- variances[d] += distance * distance;
+ centerValues[d] /= neighborhood.size();
}
- }
- expectationOfVariance = 0;
- for(int d = 0; d < variances.length; d++) {
- variances[d] /= neighborhood.size();
- expectationOfVariance += variances[d];
- }
- expectationOfVariance /= variances.length;
- weightVector = new BitSet(variances.length);
- for(int d = 0; d < variances.length; d++) {
- if(variances[d] < alpha * expectationOfVariance) {
- weightVector.set(d, true);
+ for(DBID id : neighborhood) {
+ V databaseObject = relation.get(id);
+ for(int d = 0; d < centerValues.length; d++) {
+ // distance
+ double distance = centerValues[d] - databaseObject.doubleValue(d + 1);
+ // variance
+ variances[d] += distance * distance;
+ }
}
+ expectationOfVariance = 0;
+ for(int d = 0; d < variances.length; d++) {
+ variances[d] /= neighborhood.size();
+ expectationOfVariance += variances[d];
+ }
+ expectationOfVariance /= variances.length;
+ weightVector = new BitSet(variances.length);
+ for(int d = 0; d < variances.length; d++) {
+ if(variances[d] < alpha * expectationOfVariance) {
+ weightVector.set(d, true);
+ }
+ }
+ center = DatabaseUtil.assumeVectorField(relation).getFactory().newNumberVector(centerValues);
+ sod = subspaceOutlierDegree(queryObject, center, weightVector);
+ }
+ else {
+ center = queryObject;
+ sod = 0.0;
}
- center = DatabaseUtil.assumeVectorField(database).getFactory().newInstance(centerValues);
- sod = subspaceOutlierDegree(queryObject, center, weightVector);
}
/**
- *
+ * Compute SOD score
*
* @param queryObject
* @param center
* @param weightVector
* @return sod value
*/
- private double subspaceOutlierDegree(O queryObject, O center, BitSet weightVector) {
- final DimensionsSelectingEuclideanDistanceFunction df = new DimensionsSelectingEuclideanDistanceFunction(weightVector);
+ private double subspaceOutlierDegree(V queryObject, V center, BitSet weightVector) {
+ final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(weightVector);
+ final int card = weightVector.cardinality();
+ if(card == 0) {
+ return 0;
+ }
double distance = df.distance(queryObject, center).doubleValue();
- distance /= weightVector.cardinality();
+ distance /= card;
return distance;
}
@@ -316,7 +340,7 @@ public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlier
* Model result this is a proxy for.
*/
Relation<SODModel<?>> models;
-
+
/**
* The IDs we are defined for
*/
@@ -326,7 +350,7 @@ public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlier
* Constructor.
*
* @param models Models result
- * @param dbids IDs we are defined for
+ * @param dbids IDs we are defined for
*/
public SODProxyScoreResult(Relation<SODModel<?>> models, DBIDs dbids) {
super();
@@ -353,7 +377,7 @@ public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlier
public DBIDs getDBIDs() {
return dbids;
}
-
+
@Override
public IterableIterator<DBID> iterDBIDs() {
return IterableUtil.fromIterator(dbids.iterator());
@@ -402,7 +426,7 @@ public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlier
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
/**
* Holds the value of {@link #KNN_ID}.
*/
@@ -414,14 +438,19 @@ public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlier
private double alpha = 1.1;
/**
- * The similarity function.
+ * The similarity function - {@link #SIM_ID}.
*/
- private SharedNearestNeighborSimilarityFunction<V> similarityFunction;
+ private SimilarityFunction<V, D> similarityFunction;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter knnP = new IntParameter(KNN_ID, new GreaterConstraint(0), 1);
+ final ObjectParameter<SimilarityFunction<V, D>> simP = new ObjectParameter<SimilarityFunction<V, D>>(SIM_ID, SimilarityFunction.class, SharedNearestNeighborSimilarityFunction.class);
+ if(config.grab(simP)) {
+ similarityFunction = simP.instantiateClass(config);
+ }
+
+ final IntParameter knnP = new IntParameter(KNN_ID, new GreaterConstraint(0));
if(config.grab(knnP)) {
knn = knnP.getValue();
}
@@ -430,14 +459,11 @@ public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlier
if(config.grab(alphaP)) {
alpha = alphaP.getValue();
}
-
- Class<SharedNearestNeighborSimilarityFunction<V>> cls = ClassGenericsUtil.uglyCastIntoSubclass(SharedNearestNeighborSimilarityFunction.class);
- similarityFunction = config.tryInstantiate(cls);
}
@Override
- protected SOD<V> makeInstance() {
- return new SOD<V>(knn, alpha, similarityFunction);
+ protected SOD<V, D> makeInstance() {
+ return new SOD<V, D>(knn, alpha, similarityFunction);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
index 5a4503fd..22447454 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -39,14 +39,14 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.datasource.parser.AbstractParser;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
@@ -140,7 +140,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
* @return Result
*/
public OutlierResult run(Database database, Relation<?> relation) {
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
Pattern colSep = Pattern.compile(AbstractParser.WHITESPACE_PATTERN);
DoubleMinMax minmax = new DoubleMinMax();
@@ -156,7 +156,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
else if(line.length() > 0) {
String[] cols = colSep.split(line);
Integer id = null;
- Double score = null;
+ double score = Double.NaN;
for(String str : cols) {
Matcher mi = idpattern.matcher(str);
Matcher ms = scorepattern.matcher(str);
@@ -172,17 +172,17 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
id = Integer.parseInt(str.substring(mi.end()));
}
if(msf) {
- if(score != null) {
+ if(!Double.isNaN(score)) {
throw new AbortException("Score pattern matched twice: previous value " + score + " second value: " + str);
}
score = Double.parseDouble(str.substring(ms.end()));
}
}
- if(id != null && score != null) {
- scores.put(DBIDUtil.importInteger(id), score);
+ if(id != null && !Double.isNaN(score)) {
+ scores.putDouble(DBIDUtil.importInteger(id), score);
minmax.put(score);
}
- else if(id == null && score == null) {
+ else if(id == null && Double.isNaN(score)) {
logger.warning("Line did not match either ID nor score nor comment: " + line);
}
else {
@@ -213,7 +213,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
for(DBID id : relation.iterDBIDs()) {
double val = scoresult.get(id); // scores.get(id);
val = scaling.getScaled(val);
- scores.put(id, val);
+ scores.putDouble(id, val);
mm.put(val);
}
meta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax());
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
index d3e738a1..c8da9501 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
import java.util.ArrayList;
import java.util.BitSet;
-import java.util.HashMap;
import java.util.Random;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
@@ -36,11 +35,11 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionsSelectingEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -60,6 +59,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
* A simple ensemble method called "Feature bagging" for outlier detection.
@@ -144,8 +144,8 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
FiniteProgress prog = logger.isVerbose() ? new FiniteProgress("LOF iterations", num, logger) : null;
for(int i = 0; i < num; i++) {
BitSet dimset = randomSubspace(dbdim, mindim, maxdim);
- DimensionsSelectingEuclideanDistanceFunction df = new DimensionsSelectingEuclideanDistanceFunction(dimset);
- LOF<NumberVector<?, ?>, DoubleDistance> lof = new LOF<NumberVector<?, ?>, DoubleDistance>(k, df, df);
+ SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
+ LOF<NumberVector<?, ?>, DoubleDistance> lof = new LOF<NumberVector<?, ?>, DoubleDistance>(k, df);
// run LOF and collect the result
OutlierResult result = lof.run(relation);
@@ -159,28 +159,34 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
}
}
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
if(breadth) {
FiniteProgress cprog = logger.isVerbose() ? new FiniteProgress("Combining results", relation.size(), logger) : null;
- HashMap<IterableIterator<DBID>, Relation<Double>> IDVectorOntoScoreVector = new HashMap<IterableIterator<DBID>, Relation<Double>>();
+ Pair<IterableIterator<DBID>, Relation<Double>>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size());
// Mapping score-sorted DBID-Iterators onto their corresponding scores.
// We need to initialize them now be able to iterate them "in parallel".
- for(OutlierResult r : results) {
- IDVectorOntoScoreVector.put(r.getOrdering().iter(relation.getDBIDs()), r.getScores());
+ {
+ int i = 0;
+ for(OutlierResult r : results) {
+ IDVectorOntoScoreVector[i] = new Pair<IterableIterator<DBID>, Relation<Double>>(r.getOrdering().iter(relation.getDBIDs()), r.getScores());
+ i++;
+ }
}
// Iterating over the *lines* of the AS_t(i)-matrix.
for(int i = 0; i < relation.size(); i++) {
// Iterating over the elements of a line (breadth-first).
- for(IterableIterator<DBID> iter : IDVectorOntoScoreVector.keySet()) {
- if(iter.hasNext()) { // Always true if every algorithm returns a
- // complete result (one score for every DBID).
+ for(Pair<IterableIterator<DBID>, Relation<Double>> pair : IDVectorOntoScoreVector) {
+ IterableIterator<DBID> iter = pair.first;
+ // Always true if every algorithm returns a complete result (one score
+ // for every DBID).
+ if(iter.hasNext()) {
DBID tmpID = iter.next();
- double score = IDVectorOntoScoreVector.get(iter).get(tmpID);
- if(scores.get(tmpID) == null) {
- scores.put(tmpID, score);
+ double score = pair.second.get(tmpID);
+ if(Double.isNaN(scores.doubleValue(tmpID))) {
+ scores.putDouble(tmpID, score);
minmax.put(score);
}
}
@@ -202,9 +208,12 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
for(DBID id : relation.iterDBIDs()) {
double sum = 0.0;
for(OutlierResult r : results) {
- sum += r.getScores().get(id);
+ final Double s = r.getScores().get(id);
+ if (s != null && !Double.isNaN(s)) {
+ sum += s;
+ }
}
- scores.put(id, sum);
+ scores.putDouble(id, sum);
minmax.put(sum);
if(cprog != null) {
cprog.incrementProcessed(logger);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
index 18f62549..9634cd59 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -33,12 +33,12 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.result.ResultUtil;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
@@ -102,13 +102,13 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult
((OutlierScalingFunction) scaling).prepare(or);
}
- WritableDataStore<Double> scaledscores = DataStoreUtil.makeStorage(scores.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore scaledscores = DataStoreUtil.makeDoubleStorage(scores.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
for(DBID id : scores.iterDBIDs()) {
double val = scores.get(id);
val = scaling.getScaled(val);
- scaledscores.put(id, val);
+ scaledscores.putDouble(id, val);
minmax.put(val);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java
index 6565e144..d7e78281 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java
index 372383f8..ea5d3ec4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java
@@ -9,7 +9,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java
index 79fae09d..1caf7582 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java
index 4e1e0fa1..f0c05e1e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
index e8a7415b..b4070e0c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,9 +23,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.Collections;
-import java.util.List;
-
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -34,13 +31,14 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.ProxyView;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -48,8 +46,8 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
@@ -123,7 +121,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
* @return Algorithm result
*/
public OutlierResult run(Relation<V> relationx, Relation<? extends NumberVector<?, ?>> relationy) {
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relationx.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relationx.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax mm = new DoubleMinMax(0.0, 0.0);
// Outlier detection loop
@@ -131,14 +129,14 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
ModifiableDBIDs idview = DBIDUtil.newHashSet(relationx.getDBIDs());
ProxyView<V> proxy = new ProxyView<V>(relationx.getDatabase(), idview, relationx);
- double phialpha = MathUtil.standardNormalProbit(1.0 - alpha / 2);
+ double phialpha = NormalDistribution.standardNormalProbit(1.0 - alpha / 2);
// Detect outliers while significant.
while(true) {
Pair<DBID, Double> candidate = singleIteration(proxy, relationy);
if(candidate.second < phialpha) {
break;
}
- scores.put(candidate.first, candidate.second);
+ scores.putDouble(candidate.first, candidate.second);
if (!Double.isNaN(candidate.second)) {
mm.put(candidate.second);
}
@@ -147,7 +145,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
// Remaining objects are inliers
for(DBID id : idview) {
- scores.put(id, 0.0);
+ scores.putDouble(id, 0.0);
}
}
@@ -170,9 +168,9 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1);
// We need stable indexed DBIDs
- ArrayDBIDs ids = DBIDUtil.newArray(relationx.getDBIDs());
+ ArrayModifiableDBIDs ids = DBIDUtil.newArray(relationx.getDBIDs());
// Sort, so we can do a binary search below.
- Collections.sort(ids);
+ ids.sort();
// init F,X,Z
Matrix X = new Matrix(ids.size(), 6);
@@ -203,7 +201,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
// Fill the neighborhood matrix F:
{
- List<DistanceResultPair<D>> neighbors = knnQuery.getKNNForDBID(id, k + 1);
+ KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
for(DistanceResultPair<D> dpair : neighbors) {
if(id.equals(dpair.getDBID())) {
@@ -216,7 +214,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
final int nweight = -1 / neighborhood.size();
// We need to find the index positions of the neighbors, unfortunately.
for(DBID nid : neighborhood) {
- int pos = Collections.binarySearch(ids, nid);
+ int pos = ids.binarySearch(nid);
assert (pos >= 0);
F.set(pos, i, nweight);
}
@@ -239,7 +237,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
double worstscore = Double.NEGATIVE_INFINITY;
for(int i = 0; i < ids.size(); i++) {
DBID id = ids.get(i);
- double err = E.getRowVector(i).euclideanLength();
+ double err = E.getRow(i).euclideanLength();
// double err = Math.abs(E.get(i, 0));
if(err > worstscore) {
worstscore = err;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
index 516c2ade..68e58ffa 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
@@ -60,7 +61,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
* <p>
* Implementation note: attribute standardization is not used; this is
* equivalent to using the
- * {@link de.lmu.ifi.dbs.elki.datasource.filter.AttributeWiseVarianceNormalization
+ * {@link de.lmu.ifi.dbs.elki.datasource.filter.normalization.AttributeWiseVarianceNormalization
* AttributeWiseVarianceNormalization} filter.
* </p>
*
@@ -115,14 +116,12 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?, ?>> extends
Matrix cmati = covmaker.destroyToSampleMatrix().inverse();
DoubleMinMax minmax = new DoubleMinMax();
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC);
for(DBID id : attributes.iterDBIDs()) {
Vector temp = deltas.get(id).minus(mean);
- final Vector res = temp.transposeTimes(cmati).times(temp);
- assert (res.getDimensionality() == 1);
- double score = res.get(0);
+ final double score = temp.transposeTimesTimes(cmati, temp);
minmax.put(score);
- scores.put(id, score);
+ scores.putDouble(id, score);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("mean multiple attributes spatial outlier", "mean-multipleattributes-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs());
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
index 7f397790..9b4534fe 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -29,7 +29,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
@@ -37,10 +37,10 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
-import de.lmu.ifi.dbs.elki.math.statistics.QuickSelect;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -91,7 +91,7 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
*/
public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance mv = new MeanVariance();
for(DBID id : relation.iterDBIDs()) {
@@ -110,15 +110,14 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
}
if(c > 0) {
- // Note: only use up to c-1, since we may have used a too big array
- median = QuickSelect.median(fi, 0, c - 1);
+ median = QuickSelect.median(fi, 0, c);
}
else {
median = relation.get(id).doubleValue(1);
}
}
double h = relation.get(id).doubleValue(1) - median;
- scores.put(id, h);
+ scores.putDouble(id, h);
mv.put(h);
}
@@ -127,9 +126,9 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
final double stddev = mv.getNaiveStddev();
DoubleMinMax minmax = new DoubleMinMax();
for(DBID id : relation.iterDBIDs()) {
- double score = Math.abs((scores.get(id) - mean) / stddev);
+ double score = Math.abs((scores.doubleValue(id) - mean) / stddev);
minmax.put(score);
- scores.put(id, score);
+ scores.putDouble(id, score);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("MO", "Median-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
index 649511eb..cbf61c38 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
@@ -39,11 +40,11 @@ import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
-import de.lmu.ifi.dbs.elki.math.statistics.QuickSelect;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
/**
@@ -61,7 +62,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
* <p>
* Implementation note: attribute standardization is not used; this is
* equivalent to using the
- * {@link de.lmu.ifi.dbs.elki.datasource.filter.AttributeWiseVarianceNormalization
+ * {@link de.lmu.ifi.dbs.elki.datasource.filter.normalization.AttributeWiseVarianceNormalization
* AttributeWiseVarianceNormalization} filter.
* </p>
*
@@ -141,14 +142,12 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten
Matrix cmati = covmaker.destroyToSampleMatrix().inverse();
DoubleMinMax minmax = new DoubleMinMax();
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC);
for(DBID id : attributes.iterDBIDs()) {
Vector temp = deltas.get(id).minus(mean);
- final Vector res = temp.transposeTimes(cmati).times(temp);
- assert (res.getDimensionality() == 1);
- double score = res.get(0);
+ final double score = temp.transposeTimesTimes(cmati, temp);
minmax.put(score);
- scores.put(id, score);
+ scores.putDouble(id, score);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("Median multiple attributes outlier", "median-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs());
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
index 6780fcc9..9f19757d 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,7 +31,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -103,7 +103,7 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
}
DoubleMinMax minmax = new DoubleMinMax();
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
// calculate normalized attribute values
// calculate neighborhood average of normalized attribute values.
@@ -132,7 +132,7 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
// Note: in the original moran scatterplot, any object with a score < 0 would be an outlier.
final double score = Math.max(-globalZ * localZ, 0);
minmax.put(score);
- scores.put(id, score);
+ scores.putDouble(id, score);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("MoranOutlier", "Moran Scatterplot Outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
index 0fe65fee..a6425d43 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,6 +31,7 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
@@ -196,13 +197,13 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
for(int i = 0; i < ids.size(); i++) {
DBID id = ids.get(i);
// Note: matrix times ith unit vector = ith column
- Vector sim = E.getColumnVector(i);
+ Vector sim = E.getCol(i);
similarityVectors.put(id, sim);
}
E = null;
// compute the relevance scores between specified Object and its neighbors
DoubleMinMax minmax = new DoubleMinMax();
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(spatial.getDBIDs(), DataStoreFactory.HINT_STATIC);
for(int i = 0; i < ids.size(); i++) {
DBID id = ids.get(i);
double gmean = 1.0;
@@ -211,13 +212,13 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
if(id.equals(n)) {
continue;
}
- double sim = MathUtil.cosineSimilarity(similarityVectors.get(id), similarityVectors.get(n));
+ double sim = MathUtil.angle(similarityVectors.get(id), similarityVectors.get(n));
gmean *= sim;
cnt++;
}
final double score = Math.pow(gmean, 1.0 / cnt);
minmax.put(score);
- scores.put(id, score);
+ scores.putDouble(id, score);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("randomwalkec", "RandomWalkEC", TypeUtil.DOUBLE, scores, relation.getDBIDs());
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
index d18e4130..8e4ab32c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -30,7 +30,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
@@ -97,7 +97,7 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
*/
public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
- WritableDataStore<Double> means = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP, Double.class);
+ WritableDoubleDataStore means = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
// Calculate average of neighborhood for each object and perform a linear
// regression using the covariance matrix
@@ -122,7 +122,7 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
m = local;
}
// Store the mean for the score calculation
- means.put(id, m);
+ means.putDouble(id, m);
covm.put(new double[] { local, m });
}
// Finalize covariance matrix, compute linear regression
@@ -137,13 +137,13 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
}
// calculate mean and variance for error
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance mv = new MeanVariance();
for(DBID id : relation.iterDBIDs()) {
// Compute the error from the linear regression
double y_i = relation.get(id).doubleValue(1);
- double e = means.get(id) - (slope * y_i + inter);
- scores.put(id, e);
+ double e = means.doubleValue(id) - (slope * y_i + inter);
+ scores.putDouble(id, e);
mv.put(e);
}
@@ -153,9 +153,9 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
final double mean = mv.getMean();
final double variance = mv.getNaiveStddev();
for(DBID id : relation.iterDBIDs()) {
- double score = Math.abs((scores.get(id) - mean) / variance);
+ double score = Math.abs((scores.doubleValue(id) - mean) / variance);
minmax.put(score);
- scores.put(id, score);
+ scores.putDouble(id, score);
}
}
// build representation
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
index 304203db..573e1526 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,7 +31,7 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
@@ -99,7 +99,7 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> {
*/
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance zmv = new MeanVariance();
for(DBID id : relation.iterDBIDs()) {
@@ -121,16 +121,16 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> {
else {
localdiff = 0.0;
}
- scores.put(id, localdiff);
+ scores.putDouble(id, localdiff);
zmv.put(localdiff);
}
// Normalize scores using mean and variance
DoubleMinMax minmax = new DoubleMinMax();
for(DBID id : relation.iterDBIDs()) {
- double score = Math.abs(scores.get(id) - zmv.getMean()) / zmv.getSampleStddev();
+ double score = Math.abs(scores.doubleValue(id) - zmv.getMean()) / zmv.getSampleStddev();
minmax.put(score);
- scores.put(id, score);
+ scores.putDouble(id, score);
}
// Wrap result
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
index af8762e0..e69d46d4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -29,16 +29,16 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
@@ -96,7 +96,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(spatial);
DistanceQuery<O, D> distFunc = getNonSpatialDistanceFunction().instantiate(relation);
- WritableDataStore<Double> modifiedDistance = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.class);
+ WritableDoubleDataStore modifiedDistance = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// calculate D-Tilde
for(DBID id : relation.iterDBIDs()) {
double sum = 0;
@@ -114,18 +114,18 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
maxDist = Math.max(maxDist, dist);
}
if(cnt > 1) {
- modifiedDistance.put(id, ((sum - maxDist) / (cnt - 1)));
+ modifiedDistance.putDouble(id, ((sum - maxDist) / (cnt - 1)));
}
else {
// Use regular distance when the d-tilde trick is undefined.
// Note: this can be 0 when there were no neighbors.
- modifiedDistance.put(id, maxDist);
+ modifiedDistance.putDouble(id, maxDist);
}
}
// Second step - compute actual SLOM values
DoubleMinMax slomminmax = new DoubleMinMax();
- WritableDataStore<Double> sloms = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore sloms = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
for(DBID id : relation.iterDBIDs()) {
double sum = 0;
@@ -136,18 +136,18 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
if(neighbor.equals(id)) {
continue;
}
- sum += modifiedDistance.get(neighbor);
+ sum += modifiedDistance.doubleValue(neighbor);
cnt++;
}
double slom;
if(cnt > 0) {
// With and without the object itself:
- double avgPlus = (sum + modifiedDistance.get(id)) / (cnt + 1);
+ double avgPlus = (sum + modifiedDistance.doubleValue(id)) / (cnt + 1);
double avg = sum / cnt;
double beta = 0;
for(DBID neighbor : neighbors) {
- final double dist = modifiedDistance.get(neighbor).doubleValue();
+ final double dist = modifiedDistance.doubleValue(neighbor);
if(dist > avgPlus) {
beta += 1;
}
@@ -157,7 +157,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
}
// Include object itself
if(!neighbors.contains(id)) {
- final double dist = modifiedDistance.get(id).doubleValue();
+ final double dist = modifiedDistance.doubleValue(id);
if(dist > avgPlus) {
beta += 1;
}
@@ -176,13 +176,13 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
}
beta = beta / (1 + avg);
- slom = beta * modifiedDistance.get(id);
+ slom = beta * modifiedDistance.doubleValue(id);
}
else {
// No neighbors to compare to - no score.
slom = 0.0;
}
- sloms.put(id, slom);
+ sloms.putDouble(id, slom);
slomminmax.put(slom);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
index e3ede8bb..abc3c481 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,16 +28,16 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
@@ -103,8 +103,8 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(spatial);
DistanceQuery<O, D> distFunc = getNonSpatialDistanceFunction().instantiate(relation);
- WritableDataStore<Double> lrds = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Double.class);
- WritableDataStore<Double> lofs = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax lofminmax = new DoubleMinMax();
// Compute densities
@@ -118,7 +118,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
if (Double.isNaN(lrd)) {
lrd = 0;
}
- lrds.put(id, lrd);
+ lrds.putDouble(id, lrd);
}
// Compute density quotients
@@ -126,14 +126,14 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
DBIDs neighbors = npred.getNeighborDBIDs(id);
double avg = 0;
for(DBID n : neighbors) {
- avg += lrds.get(n);
+ avg += lrds.doubleValue(n);
}
- final double lrd = (avg / neighbors.size()) / lrds.get(id);
+ final double lrd = (avg / neighbors.size()) / lrds.doubleValue(id);
if (!Double.isNaN(lrd)) {
- lofs.put(id, lrd);
+ lofs.putDouble(id, lrd);
lofminmax.put(lrd);
} else {
- lofs.put(id, 0.0);
+ lofs.putDouble(id, 0.0);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
index d99ecc99..75700bca 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -32,7 +32,7 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
@@ -41,11 +41,11 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.Mean;
-import de.lmu.ifi.dbs.elki.math.statistics.QuickSelect;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -112,8 +112,8 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
assert (DatabaseUtil.dimensionality(relation) == 1) : "TrimmedMean can only process one-dimensional data sets.";
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
- WritableDataStore<Double> errors = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP, Double.class);
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore errors = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Computing trimmed means", relation.size(), logger) : null;
for(DBID id : relation.iterDBIDs()) {
@@ -142,7 +142,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
tm = relation.get(id).doubleValue(1);
}
// Error: deviation from trimmed mean
- errors.put(id, relation.get(id).doubleValue(1) - tm);
+ errors.putDouble(id, relation.get(id).doubleValue(1) - tm);
if(progress != null) {
progress.incrementProcessed(logger);
@@ -162,7 +162,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
{
int i = 0;
for(DBID id : relation.iterDBIDs()) {
- ei[i] = errors.get(id);
+ ei[i] = errors.doubleValue(id);
i++;
}
}
@@ -181,8 +181,8 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
// calculate score
DoubleMinMax minmax = new DoubleMinMax();
for(DBID id : relation.iterDBIDs()) {
- double score = Math.abs(errors.get(id)) * 0.6745 / median_dev_from_median;
- scores.put(id, score);
+ double score = Math.abs(errors.doubleValue(id)) * 0.6745 / median_dev_from_median;
+ scores.putDouble(id, score);
minmax.put(score);
}
//
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java
index df28de24..5898b053 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
index 640c30fe..9ee92d35 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,6 +31,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -132,15 +133,15 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
// Expand multiple steps
FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Expanding neighborhoods", database.size(), logger) : null;
for(final DBID id : database.iterDBIDs()) {
- ModifiableDBIDs res = DBIDUtil.newHashSet(id);
+ HashSetModifiableDBIDs res = DBIDUtil.newHashSet(id);
DBIDs todo = id;
for(int i = 0; i < steps; i++) {
ModifiableDBIDs ntodo = DBIDUtil.newHashSet();
for(final DBID oid : todo) {
DBIDs add = innerinst.getNeighborDBIDs(oid);
if(add != null) {
- for (DBID nid: add) {
- if (res.contains(add)) {
+ for(DBID nid : add) {
+ if(res.contains(nid)) {
continue;
}
ntodo.add(nid);
@@ -148,7 +149,7 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
}
}
}
- if (ntodo.size() == 0) {
+ if(ntodo.size() == 0) {
continue;
}
todo = ntodo;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
index 789a63e7..f2586e2e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java
index 6683b3fd..3a6d0e28 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
index fa878778..f5ea7e15 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -22,8 +22,6 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.List;
-
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
@@ -36,6 +34,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
@@ -121,7 +120,7 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
// TODO: use bulk?
WritableDataStore<DBIDs> s = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, DBIDs.class);
for(DBID id : relation.iterDBIDs()) {
- List<DistanceResultPair<D>> neighbors = knnQuery.getKNNForDBID(id, k);
+ KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k);
ArrayModifiableDBIDs neighbours = DBIDUtil.newArray(neighbors.size());
for(DistanceResultPair<D> dpair : neighbors) {
neighbours.add(dpair.getDBID());
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java
index eb490642..47ca5ad2 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
index b2f70e16..52fc2c46 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
index 0927c026..4378aa2e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java
index d0cdfa9c..b147935a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java
index ff82dbee..39165cfd 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java
index df50e592..13bf3f25 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
index 62f083fb..86730404 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -34,7 +34,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -111,7 +111,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements
* @return Result
*/
public OutlierResult run(Relation<?> relation) {
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, Double.class);
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT);
for(DBID id : relation.iterDBIDs()) {
String label = relation.get(id).toString();
final double score;
@@ -120,7 +120,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements
} else {
score = 0.0;
}
- scores.put(id, score);
+ scores.putDouble(id, score);
}
Relation<Double> scoreres = new MaterializedRelation<Double>("By label outlier scores", "label-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore();
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
index ff93e0ed..509e35e9 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -29,7 +29,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -69,9 +69,9 @@ public class TrivialAllOutlier extends AbstractAlgorithm<OutlierResult> implemen
* @return Result
*/
public OutlierResult run(Relation<?> relation) {
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, Double.class);
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT);
for(DBID id : relation.iterDBIDs()) {
- scores.put(id, 1.0);
+ scores.putDouble(id, 1.0);
}
Relation<Double> scoreres = new MaterializedRelation<Double>("Trivial all-outlier score", "all-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore();
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
new file mode 100644
index 00000000..db40ff30
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
@@ -0,0 +1,212 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.HashSet;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.Model;
+import de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorSingleCluster;
+import de.lmu.ifi.dbs.elki.data.type.NoSupportedDataTypeException;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.ChiSquaredDistribution;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
+/**
+ * Extract outlier score from the model the objects were generated by.
+ *
+ * This algorithm can only be applied to data that was freshly generated, to the
+ * generator model information is still available.
+ *
+ * @author Erich Schubert
+ */
+public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+ /**
+ * Class logger
+ */
+ private static final Logging logger = Logging.getLogger(TrivialGeneratedOutlier.class);
+
+ /**
+ * Expected share of outliers
+ */
+ public static final OptionID EXPECT_ID = OptionID.getOrCreateOptionID("modeloutlier.expect", "Expected amount of outliers, for making the scores more intuitive.");
+
+ /**
+ * Expected share of outliers.
+ */
+ double expect = 0.01;
+
+ /**
+ * Constructor.
+ *
+ * @param expect Expected share of outliers
+ */
+ public TrivialGeneratedOutlier(double expect) {
+ super();
+ this.expect = expect;
+ }
+
+ /**
+ * Constructor.
+ */
+ public TrivialGeneratedOutlier() {
+ this(0.01);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD, new SimpleTypeInformation<Model>(Model.class), TypeUtil.GUESSED_LABEL);
+ }
+
+ @Override
+ public OutlierResult run(Database database) throws IllegalStateException {
+ Relation<NumberVector<?, ?>> vecs = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
+ Relation<Model> models = database.getRelation(new SimpleTypeInformation<Model>(Model.class));
+ // Prefer a true class label
+ try {
+ Relation<?> relation = database.getRelation(TypeUtil.CLASSLABEL);
+ return run(models, vecs, relation);
+ }
+ catch(NoSupportedDataTypeException e) {
+ // Otherwise, try any labellike.
+ return run(models, vecs, database.getRelation(TypeUtil.GUESSED_LABEL));
+ }
+ }
+
+ /**
+ * Run the algorithm
+ *
+ * @param models Model relation
+ * @param vecs Vector relation
+ * @param labels Label relation
+ * @return Outlier result
+ */
+ public OutlierResult run(Relation<Model> models, Relation<NumberVector<?, ?>> vecs, Relation<?> labels) {
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT);
+
+ // Adjustment constant
+ final double minscore = expect / (expect + 1);
+
+ HashSet<GeneratorSingleCluster> generators = new HashSet<GeneratorSingleCluster>();
+ for(DBID id : models.iterDBIDs()) {
+ Model model = models.get(id);
+ if(model instanceof GeneratorSingleCluster) {
+ generators.add((GeneratorSingleCluster) model);
+ }
+ }
+ if(generators.size() == 0) {
+ logger.warning("No generator models found for dataset - all points will be considered outliers.");
+ }
+
+ for(DBID id : models.iterDBIDs()) {
+ double score = 0.0;
+ // Convert to a math vector
+ Vector v = vecs.get(id).getColumnVector();
+ for(GeneratorSingleCluster gen : generators) {
+ Vector tv = v;
+ // Transform backwards
+ if(gen.getTransformation() != null) {
+ tv = gen.getTransformation().applyInverse(v);
+ }
+ final int dim = tv.getDimensionality();
+ double lensq = 0.0;
+ int norm = 0;
+ for(int i = 0; i < dim; i++) {
+ Distribution dist = gen.getDistribution(i);
+ if(dist instanceof NormalDistribution) {
+ NormalDistribution d = (NormalDistribution) dist;
+ double delta = (tv.get(i) - d.getMean()) / d.getStddev();
+ lensq += delta * delta;
+ norm += 1;
+ }
+ }
+ if(norm > 0) {
+ // The squared distances are ChiSquared distributed
+ score = Math.max(score, 1 - ChiSquaredDistribution.cdf(lensq, norm));
+ }
+ }
+ // score inversion.
+ score = expect / (expect + score);
+ // adjust to 0 to 1 range:
+ score = (score - minscore) / (1 - minscore);
+ scores.putDouble(id, score);
+ }
+ Relation<Double> scoreres = new MaterializedRelation<Double>("Model outlier scores", "model-outlier", TypeUtil.DOUBLE, scores, models.getDBIDs());
+ OutlierScoreMeta meta = new ProbabilisticOutlierScore(0., 1.);
+ return new OutlierResult(meta, scoreres);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return logger;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Expected share of outliers
+ */
+ double expect;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ DoubleParameter expectP = new DoubleParameter(EXPECT_ID, 0.01);
+ if(config.grab(expectP)) {
+ expect = expectP.getValue();
+ }
+ }
+
+ @Override
+ protected TrivialGeneratedOutlier makeInstance() {
+ return new TrivialGeneratedOutlier(expect);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
index f3ae7e72..cff2ad2c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -29,7 +29,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -69,9 +69,9 @@ public class TrivialNoOutlier extends AbstractAlgorithm<OutlierResult> implement
* @return Result
*/
public OutlierResult run(Relation<?> relation) throws IllegalStateException {
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, Double.class);
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT);
for(DBID id : relation.iterDBIDs()) {
- scores.put(id, 0.0);
+ scores.putDouble(id, 0.0);
}
Relation<Double> scoreres = new MaterializedRelation<Double>("Trivial no-outlier score", "no-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore();
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java
index fbae60dc..d49d3565 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/package-info.java
index 0dfbc8c1..c18579f0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/package-info.java
@@ -10,7 +10,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java
new file mode 100644
index 00000000..1c74621b
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java
@@ -0,0 +1,235 @@
+package de.lmu.ifi.dbs.elki.algorithm.statistics;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.DoubleVector;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.result.CollectionResult;
+import de.lmu.ifi.dbs.elki.result.HistogramResult;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint.IntervalBoundary;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+
+/**
+ * Evaluate a distance functions performance by computing the average precision
+ * at k, when ranking the objects by distance.
+ *
+ * @author Erich Schubert
+ * @param <V> Vector type
+ * @param <D> Distance type
+ */
+public class AveragePrecisionAtK<V extends Object, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, CollectionResult<DoubleVector>> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging logger = Logging.getLogger(AveragePrecisionAtK.class);
+
+ /**
+ * The parameter k - the number of neighbors to retrieve.
+ */
+ private int k;
+
+ /**
+ * Relative number of object to use in sampling.
+ */
+ private double sampling = 1.0;
+
+ /**
+ * Random sampling seed.
+ */
+ private Long seed = null;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param sampling Sampling rate
+ * @param seed Random sampling seed (may be null)
+ */
+ public AveragePrecisionAtK(DistanceFunction<? super V, D> distanceFunction, int k, double sampling, Long seed) {
+ super(distanceFunction);
+ this.k = k;
+ this.sampling = sampling;
+ this.seed = seed;
+ }
+
+ @Override
+ public HistogramResult<DoubleVector> run(Database database) throws IllegalStateException {
+ final Relation<V> relation = database.getRelation(getInputTypeRestriction()[0]);
+ final Relation<Object> lrelation = database.getRelation(getInputTypeRestriction()[1]);
+ final DistanceQuery<V, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
+ final KNNQuery<V, D> knnQuery = database.getKNNQuery(distQuery, k);
+
+ MeanVariance[] mvs = MeanVariance.newArray(k);
+
+ final DBIDs ids;
+ if (sampling < 1.0) {
+ int size = Math.max(1, (int) (sampling * relation.size()));
+ ids = DBIDUtil.randomSample(relation.getDBIDs(), size, seed);
+ } else {
+ ids = relation.getDBIDs();
+ }
+
+ if(logger.isVerbose()) {
+ logger.verbose("Processing points...");
+ }
+ FiniteProgress objloop = logger.isVerbose() ? new FiniteProgress("Computing nearest neighbors", ids.size(), logger) : null;
+ // sort neighbors
+ for(DBID id : ids) {
+ KNNResult<D> knn = knnQuery.getKNNForDBID(id, k);
+ Object label = lrelation.get(id);
+
+ int positive = 0;
+ Iterator<DistanceResultPair<D>> ri = knn.iterator();
+ for(int i = 0; i < k && ri.hasNext(); i++) {
+ DBID nid = ri.next().getDBID();
+ Object olabel = lrelation.get(nid);
+ if(label == null) {
+ if(olabel == null) {
+ positive += 1;
+ }
+ }
+ else {
+ if(label.equals(olabel)) {
+ positive += 1;
+ }
+ }
+ final double precision = positive / (double) (i + 1);
+ mvs[i].put(precision);
+ }
+ if(objloop != null) {
+ objloop.incrementProcessed(logger);
+ }
+ }
+ if(objloop != null) {
+ objloop.ensureCompleted(logger);
+ }
+ // Collections.sort(results);
+
+ // Transform Histogram into a Double Vector array.
+ Collection<DoubleVector> res = new ArrayList<DoubleVector>(k);
+ for(int i = 0; i < k; i++) {
+ DoubleVector row = new DoubleVector(new double[] { mvs[i].getMean(), mvs[i].getSampleStddev() });
+ res.add(row);
+ }
+ return new HistogramResult<DoubleVector>("Average Precision", "average-precision", res);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction(), TypeUtil.GUESSED_LABEL);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return logger;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ /**
+ * Parameter k to compute the average precision at.
+ */
+ private static final OptionID K_ID = OptionID.getOrCreateOptionID("avep.k", "K to compute the average precision at.");
+
+ /**
+ * Parameter to enable sampling
+ */
+ public static final OptionID SAMPLING_ID = OptionID.getOrCreateOptionID("avep.sampling", "Relative amount of object to sample.");
+
+ /**
+ * Parameter to control the sampling random seed
+ */
+ public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("avep.sampling-seed", "Random seed for deterministic sampling.");
+
+ /**
+ * Neighborhood size
+ */
+ protected int k = 20;
+
+ /**
+ * Relative amount of data to sample
+ */
+ protected double sampling = 1.0;
+
+ /**
+ * Random sampling seed.
+ */
+ protected Long seed = null;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final IntParameter kP = new IntParameter(K_ID, new GreaterEqualConstraint(2));
+ if(config.grab(kP)) {
+ k = kP.getValue();
+ }
+ final DoubleParameter samplingP = new DoubleParameter(SAMPLING_ID, new IntervalConstraint(0.0, IntervalBoundary.OPEN, 1.0, IntervalBoundary.CLOSE), true);
+ if (config.grab(samplingP)) {
+ sampling = samplingP.getValue();
+ }
+ final LongParameter seedP = new LongParameter(SEED_ID, true);
+ if (config.grab(seedP)) {
+ seed = seedP.getValue();
+ }
+ }
+
+ @Override
+ protected AveragePrecisionAtK<V, D> makeInstance() {
+ return new AveragePrecisionAtK<V, D>(distanceFunction, k, sampling, seed);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java
index 33201b67..78bbf5f4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -49,10 +49,10 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
-import de.lmu.ifi.dbs.elki.math.AggregatingHistogram;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.math.FlexiHistogram;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.math.histograms.AggregatingHistogram;
+import de.lmu.ifi.dbs.elki.math.histograms.FlexiHistogram;
import de.lmu.ifi.dbs.elki.result.CollectionResult;
import de.lmu.ifi.dbs.elki.result.HistogramResult;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
@@ -65,6 +65,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Parameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
import de.lmu.ifi.dbs.elki.utilities.pairs.FCPair;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@@ -249,7 +250,7 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
// count the number of samples we have in the data
long inum = 0;
long onum = 0;
- for(Pair<Double, Pair<Long, Long>> ppair : histogram) {
+ for(DoubleObjPair<Pair<Long, Long>> ppair : histogram) {
inum += ppair.getSecond().getFirst();
onum += ppair.getSecond().getSecond();
}
@@ -258,12 +259,12 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
assert (bnum == relation.size() * (relation.size() - 1));
Collection<DoubleVector> binstat = new ArrayList<DoubleVector>(numbin);
- for(Pair<Double, Pair<Long, Long>> ppair : histogram) {
+ for(DoubleObjPair<Pair<Long, Long>> ppair : histogram) {
final double icof = (inum == 0) ? 0 : ((double) ppair.getSecond().getFirst()) / inum / histogram.getBinsize();
final double icaf = ((double) ppair.getSecond().getFirst()) / bnum / histogram.getBinsize();
final double ocof = (onum == 0) ? 0 : ((double) ppair.getSecond().getSecond()) / onum / histogram.getBinsize();
final double ocaf = ((double) ppair.getSecond().getSecond()) / bnum / histogram.getBinsize();
- DoubleVector row = new DoubleVector(new double[] { ppair.getFirst(), icof, icaf, ocof, ocaf });
+ DoubleVector row = new DoubleVector(new double[] { ppair.first, icof, icaf, ocof, ocaf });
binstat.add(row);
}
HistogramResult<DoubleVector> result = new HistogramResult<DoubleVector>("Distance Histogram", "distance-histogram", binstat);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java
index 58fb5b89..c1eb118d 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -27,7 +27,6 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
-import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering;
@@ -40,18 +39,18 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.evaluation.roc.ROC;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
-import de.lmu.ifi.dbs.elki.math.AggregatingHistogram;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.math.histograms.AggregatingHistogram;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.result.CollectionResult;
@@ -63,8 +62,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
import de.lmu.ifi.dbs.elki.utilities.pairs.FCPair;
-import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
* Evaluate a distance function with respect to kNN queries. For each point, the
@@ -159,7 +158,7 @@ public class EvaluateRankingQuality<V extends NumberVector<V, ?>, D extends Numb
for(int ind = 0; ind < cmem.size(); ind++) {
DBID i1 = cmem.get(ind).getSecond();
- List<DistanceResultPair<D>> knn = knnQuery.getKNNForDBID(i1, relation.size());
+ KNNResult<D> knn = knnQuery.getKNNForDBID(i1, relation.size());
double result = ROC.computeROCAUCDistanceResult(relation.size(), clus, knn);
hist.aggregate(((double) ind) / clus.size(), result);
@@ -176,8 +175,8 @@ public class EvaluateRankingQuality<V extends NumberVector<V, ?>, D extends Numb
// Transform Histogram into a Double Vector array.
Collection<DoubleVector> res = new ArrayList<DoubleVector>(relation.size());
- for(Pair<Double, MeanVariance> pair : hist) {
- DoubleVector row = new DoubleVector(new double[] { pair.getFirst(), pair.getSecond().getCount(), pair.getSecond().getMean(), pair.getSecond().getSampleVariance() });
+ for(DoubleObjPair<MeanVariance> pair : hist) {
+ DoubleVector row = new DoubleVector(new double[] { pair.first, pair.getSecond().getCount(), pair.getSecond().getMean(), pair.getSecond().getSampleVariance() });
res.add(row);
}
return new HistogramResult<DoubleVector>("Ranking Quality Histogram", "ranking-histogram", res);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java
index 07e6795a..6d64dc55 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.algorithm.statistics;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering;
@@ -36,17 +35,17 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.evaluation.roc.ROC;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
-import de.lmu.ifi.dbs.elki.math.AggregatingHistogram;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.math.histograms.AggregatingHistogram;
import de.lmu.ifi.dbs.elki.result.CollectionResult;
import de.lmu.ifi.dbs.elki.result.HistogramResult;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
@@ -55,7 +54,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
/**
* Evaluate a distance function with respect to kNN queries. For each point, the
@@ -121,7 +120,7 @@ public class RankingQualityHistogram<O, D extends NumberDistance<D, ?>> extends
// sort neighbors
for(Cluster<?> clus : split) {
for(DBID i1 : clus.getIDs()) {
- List<DistanceResultPair<D>> knn = knnQuery.getKNNForDBID(i1, relation.size());
+ KNNResult<D> knn = knnQuery.getKNNForDBID(i1, relation.size());
double result = ROC.computeROCAUCDistanceResult(relation.size(), clus, knn);
mv.put(result);
@@ -138,8 +137,8 @@ public class RankingQualityHistogram<O, D extends NumberDistance<D, ?>> extends
// Transform Histogram into a Double Vector array.
Collection<DoubleVector> res = new ArrayList<DoubleVector>(relation.size());
- for(Pair<Double, Double> pair : hist) {
- DoubleVector row = new DoubleVector(new double[] { pair.getFirst(), pair.getSecond() });
+ for(DoubleObjPair<Double> pair : hist) {
+ DoubleVector row = new DoubleVector(new double[] { pair.first, pair.getSecond() });
res.add(row);
}
HistogramResult<DoubleVector> result = new HistogramResult<DoubleVector>("Ranking Quality Histogram", "ranking-histogram", res);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/package-info.java
index 3928f01a..e706d586 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/package-info.java
@@ -8,7 +8,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team