diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java | 239 |
1 files changed, 204 insertions, 35 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java index 5235273c..c6b18812 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,41 +23,53 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractProjectedDBSCAN; -import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN; +import de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.FourCCorePredicate; +import de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.FourCNeighborPredicate; +import de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.GeneralizedDBSCAN; import de.lmu.ifi.dbs.elki.data.NumberVector; -import de.lmu.ifi.dbs.elki.data.model.Model; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; -import de.lmu.ifi.dbs.elki.distance.distancefunction.LocallyWeightedDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; -import de.lmu.ifi.dbs.elki.index.preprocessed.subspaceproj.FourCSubspaceIndex; import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.LimitEigenPairFilter; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; /** * 4C identifies local subgroups of data objects sharing a uniform correlation. * The algorithm is based on a combination of PCA and density-based clustering * (DBSCAN). + * + * Reference: * <p> - * Reference: Christian Böhm, Karin Kailing, Peer Kröger, Arthur Zimek: + * C. Böhm, K. Kailing, P. Kröger, A. Zimek:<br /> * Computing Clusters of Correlation Connected Objects. <br> * In Proc. ACM SIGMOD Int. Conf. on Management of Data, Paris, France, 2004. * </p> * * @author Arthur Zimek * - * @apiviz.uses FourCSubspaceIndex + * @apiviz.composedOf FourCNeighborPredicate + * @apiviz.composedOf FourCCorePredicate * * @param <V> type of NumberVector handled by this Algorithm */ @Title("4C: Computing Correlation Connected Clusters") -@Description("4C identifies local subgroups of data objects sharing a uniform correlation. " + "The algorithm is based on a combination of PCA and density-based clustering (DBSCAN).") -@Reference(authors = "C. Böhm, K. Kailing, P. Kröger, A. Zimek", title = "Computing Clusters of Correlation Connected Objects", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data, Paris, France, 2004, 455-466", url = "http://dx.doi.org/10.1145/1007568.1007620") -public class FourC<V extends NumberVector<?>> extends AbstractProjectedDBSCAN<Clustering<Model>, V> { +@Description("4C identifies local subgroups of data objects sharing a uniform correlation. " // + + "The algorithm is based on a combination of PCA and density-based clustering (DBSCAN).") +@Reference(authors = "C. Böhm, K. Kailing, P. Kröger, A. Zimek", // +title = "Computing Clusters of Correlation Connected Objects", // +booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data, Paris, France, 2004, 455-466", // +url = "http://dx.doi.org/10.1145/1007568.1007620") +public class FourC<V extends NumberVector> extends GeneralizedDBSCAN { /** * The logger for this class. */ @@ -66,23 +78,10 @@ public class FourC<V extends NumberVector<?>> extends AbstractProjectedDBSCAN<Cl /** * Constructor. * - * @param epsilon Epsilon value - * @param minpts MinPts value - * @param distanceFunction Distance function - * @param lambda Lambda value + * @param settings FourC settings. */ - public FourC(DoubleDistance epsilon, int minpts, LocallyWeightedDistanceFunction<V> distanceFunction, int lambda) { - super(epsilon, minpts, distanceFunction, lambda); - } - - @Override - public String getLongResultName() { - return "4C Clustering"; - } - - @Override - public String getShortResultName() { - return "4c-clustering"; + public FourC(FourC.Settings settings) { + super(new FourCNeighborPredicate<V>(settings), new FourCCorePredicate(settings), false); } @Override @@ -96,26 +95,196 @@ public class FourC<V extends NumberVector<?>> extends AbstractProjectedDBSCAN<Cl } /** + * Class wrapping the 4C parameter settings. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Settings { + /** + * Query radius epsilon. + */ + public double epsilon; + + /** + * Use absolute variance, not relative variance. + */ + public boolean absolute = false; + + /** + * Delta parameter, for selecting strong Eigenvectors. + */ + public double delta = 0.0; + + /** + * Kappa penalty parameter, to punish deviation in low-variance + * Eigenvectors. + */ + public double kappa = 50.; + + /** + * Maximum subspace dimensionality lambda. + */ + public int lambda = Integer.MAX_VALUE; + + /** + * MinPts / mu parameter. + */ + public int minpts; + + /** + * Parameterization class for 4C settings. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * The default value for delta. + */ + public static final double DEFAULT_DELTA = .1; + + /** + * Parameter Kappa: penalty for deviations in preferred dimensions. + */ + public static final OptionID KAPPA_ID = new OptionID("predecon.kappa", "Penalty factor for deviations in preferred (low-variance) dimensions."); + + /** + * Default for kappa parameter. + */ + public static final double KAPPA_DEFAULT = 20.; + + /** + * Parameter Lambda: maximum dimensionality allowed. + */ + public static final OptionID LAMBDA_ID = new OptionID("predecon.lambda", "Maximum dimensionality to consider for core points."); + + /** + * Settings storage. + */ + Settings settings; + + @Override + protected void makeOptions(Parameterization config) { + settings = new Settings(); + configEpsilon(config); + configMinPts(config); + configDelta(config); + configKappa(config); + configLambda(config); + } + + /** + * Configure the epsilon radius parameter. + * + * @param config Parameter source + */ + protected void configEpsilon(Parameterization config) { + DoubleParameter epsilonP = new DoubleParameter(DBSCAN.Parameterizer.EPSILON_ID) // + .addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE); + if(config.grab(epsilonP)) { + settings.epsilon = epsilonP.doubleValue(); + } + } + + /** + * Configure the minPts aka "mu" parameter. + * + * @param config Parameter source + */ + protected void configMinPts(Parameterization config) { + IntParameter minptsP = new IntParameter(DBSCAN.Parameterizer.MINPTS_ID) // + .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); + if(config.grab(minptsP)) { + settings.minpts = minptsP.intValue(); + } + } + + /** + * Configure the delta parameter. + * + * @param config Parameter source + */ + protected void configDelta(Parameterization config) { + // Flag for using absolute variances + Flag absoluteF = new Flag(LimitEigenPairFilter.EIGENPAIR_FILTER_ABSOLUTE); + if(config.grab(absoluteF)) { + settings.absolute = absoluteF.isTrue(); + } + + // Parameter delta + DoubleParameter deltaP = new DoubleParameter(LimitEigenPairFilter.EIGENPAIR_FILTER_DELTA) // + .addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE); + if(!settings.absolute) { + deltaP.setDefaultValue(DEFAULT_DELTA); + } + else { + deltaP.addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE); + } + if(config.grab(deltaP)) { + settings.delta = deltaP.doubleValue(); + } + } + + /** + * Configure the kappa parameter. + * + * @param config Parameter source + */ + protected void configKappa(Parameterization config) { + DoubleParameter kappaP = new DoubleParameter(KAPPA_ID) // + .addConstraint(CommonConstraints.GREATER_THAN_ONE_DOUBLE) // + .setDefaultValue(KAPPA_DEFAULT); + if(config.grab(kappaP)) { + settings.kappa = kappaP.doubleValue(); + } + } + + /** + * Configure the delta parameter. + * + * @param config Parameter source + */ + protected void configLambda(Parameterization config) { + IntParameter lambdaP = new IntParameter(LAMBDA_ID) // + .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT) // + .setOptional(true); + if(config.grab(lambdaP)) { + settings.lambda = lambdaP.intValue(); + } + } + + @Override + protected Object makeInstance() { + return settings; + } + } + } + + /** * Parameterization class. * * @author Erich Schubert * * @apiviz.exclude */ - public static class Parameterizer<O extends NumberVector<?>> extends AbstractProjectedDBSCAN.Parameterizer<O, DoubleDistance> { + public static class Parameterizer<O extends NumberVector> extends AbstractParameterizer { + /** + * Settings storage. + */ + Settings settings; + @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - configInnerDistance(config); - configEpsilon(config, innerdist); - configMinPts(config); - configOuterDistance(config, epsilon, minpts, FourCSubspaceIndex.Factory.class, innerdist); - configLambda(config); + settings = config.tryInstantiate(FourC.Settings.class); } @Override protected FourC<O> makeInstance() { - return new FourC<>(epsilon, minpts, outerdist, lambda); + return new FourC<>(settings); } } }
\ No newline at end of file |