diff options
author | Erich Schubert <erich@debian.org> | 2012-12-14 20:45:15 +0100 |
---|---|---|
committer | Andrej Shadura <andrewsh@debian.org> | 2019-03-09 22:30:35 +0000 |
commit | 357b2761a2c0ded8cad5e4d3c1e667b7639ff7a6 (patch) | |
tree | 3dd8947bb70a67c221adc3cd4359ba1d385e2f3c /src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java | |
parent | 4343785ebed9d4145f417d86d581f18a0d31e4ac (diff) | |
parent | b7b404fd7a726774d442562d11659d7b5368cdb9 (diff) |
Import Debian changes 0.5.5-1
elki (0.5.5-1) unstable; urgency=low
* New upstream release: 0.5.5 interim release.
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java | 196 |
1 files changed, 92 insertions, 104 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java index 416a5ffb..2b02e7d6 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java @@ -40,31 +40,28 @@ import de.lmu.ifi.dbs.elki.database.HashmapDatabase; import de.lmu.ifi.dbs.elki.database.UpdatableDatabase; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.datasource.bundle.SingleObjectBundle; import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.result.AprioriResult; import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.FormatUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; import de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; @@ -77,13 +74,15 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * database. * * @author Elke Achtert + * + * @param <V> Vector type */ @Description("Computes the preference vector of objects of a certain database according to the DiSH algorithm.") -public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> { +public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> { /** - * Logger to use + * Logger to use. */ - protected static final Logging logger = Logging.getLogger(DiSHPreferenceVectorIndex.class); + private static final Logging LOG = Logging.getLogger(DiSHPreferenceVectorIndex.class); /** * Available strategies for determination of the preference vector. @@ -92,17 +91,17 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs */ public enum Strategy { /** - * Apriori strategy + * Apriori strategy. */ APRIORI, /** - * Max intersection strategy + * Max intersection strategy. */ MAX_INTERSECTION } /** - * The epsilon value for each dimension; + * The epsilon value for each dimension. */ protected DoubleDistance[] epsilon; @@ -139,84 +138,76 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, BitSet.class); - if(logger.isDebugging()) { - StringBuffer msg = new StringBuffer(); + if(LOG.isDebugging()) { + StringBuilder msg = new StringBuilder(); msg.append("\n eps ").append(Arrays.asList(epsilon)); msg.append("\n minpts ").append(minpts); msg.append("\n strategy ").append(strategy); - logger.debugFine(msg.toString()); + LOG.debugFine(msg.toString()); } - try { - long start = System.currentTimeMillis(); - FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), logger) : null; - - // only one epsilon value specified - int dim = DatabaseUtil.dimensionality(relation); - if(epsilon.length == 1 && dim != 1) { - DoubleDistance eps = epsilon[0]; - epsilon = new DoubleDistance[dim]; - Arrays.fill(epsilon, eps); - } - - // epsilons as string - RangeQuery<V, DoubleDistance>[] rangeQueries = initRangeQueries(relation, dim); + long start = System.currentTimeMillis(); + FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), LOG) : null; - for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) { - StringBuffer msg = new StringBuffer(); - final DBID id = it.getDBID(); + // only one epsilon value specified + int dim = RelationUtil.dimensionality(relation); + if(epsilon.length == 1 && dim != 1) { + DoubleDistance eps = epsilon[0]; + epsilon = new DoubleDistance[dim]; + Arrays.fill(epsilon, eps); + } - if(logger.isDebugging()) { - msg.append("\nid = ").append(id); - // msg.append(" ").append(database.get(id)); - //msg.append(" ").append(database.getObjectLabelQuery().get(id)); - } + // epsilons as string + RangeQuery<V, DoubleDistance>[] rangeQueries = initRangeQueries(relation, dim); - // determine neighbors in each dimension - ModifiableDBIDs[] allNeighbors = ClassGenericsUtil.newArrayOfNull(dim, ModifiableDBIDs.class); - for(int d = 0; d < dim; d++) { - DistanceDBIDResult<DoubleDistance> qrList = rangeQueries[d].getRangeForDBID(id, epsilon[d]); - allNeighbors[d] = DBIDUtil.newHashSet(qrList.size()); - for(DistanceResultPair<DoubleDistance> qr : qrList) { - allNeighbors[d].add(qr.getDBID()); - } - } + for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) { + StringBuilder msg = new StringBuilder(); - if(logger.isDebugging()) { - for(int d = 0; d < dim; d++) { - msg.append("\n neighbors [").append(d).append("]"); - msg.append(" (").append(allNeighbors[d].size()).append(") = "); - msg.append(allNeighbors[d]); - } - } + if(LOG.isDebugging()) { + msg.append("\nid = ").append(DBIDUtil.toString(it)); + // msg.append(" ").append(database.get(id)); + // msg.append(" ").append(database.getObjectLabelQuery().get(id)); + } - BitSet preferenceVector = determinePreferenceVector(relation, allNeighbors, msg); - storage.put(id, preferenceVector); + // determine neighbors in each dimension + ModifiableDBIDs[] allNeighbors = new ModifiableDBIDs[dim]; + for(int d = 0; d < dim; d++) { + DistanceDBIDResult<DoubleDistance> qrList = rangeQueries[d].getRangeForDBID(it, epsilon[d]); + allNeighbors[d] = DBIDUtil.newHashSet(qrList); + } - if(logger.isDebugging()) { - logger.debugFine(msg.toString()); + if(LOG.isDebugging()) { + for(int d = 0; d < dim; d++) { + msg.append("\n neighbors [").append(d).append(']'); + msg.append(" (").append(allNeighbors[d].size()).append(") = "); + msg.append(allNeighbors[d]); } + } - if(progress != null) { - progress.incrementProcessed(logger); - } + try { + storage.put(it, determinePreferenceVector(relation, allNeighbors, msg)); } - if(progress != null) { - progress.ensureCompleted(logger); + catch(UnableToComplyException e) { + throw new IllegalStateException(e); } - long end = System.currentTimeMillis(); - // TODO: re-add timing code! - if(logger.isVerbose()) { - long elapsedTime = end - start; - logger.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds."); + if(LOG.isDebugging()) { + LOG.debugFine(msg.toString()); + } + + if(progress != null) { + progress.incrementProcessed(LOG); } } - catch(ParameterException e) { - throw new IllegalStateException(e); + if(progress != null) { + progress.ensureCompleted(LOG); } - catch(UnableToComplyException e) { - throw new IllegalStateException(e); + + long end = System.currentTimeMillis(); + // TODO: re-add timing code! + if(LOG.isVerbose()) { + long elapsedTime = end - start; + LOG.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds."); } } @@ -227,11 +218,10 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * @param neighborIDs the list of ids of the neighbors in each dimension * @param msg a string buffer for debug messages * @return the preference vector - * @throws de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException * * @throws de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException */ - private BitSet determinePreferenceVector(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuffer msg) throws ParameterException, UnableToComplyException { + private BitSet determinePreferenceVector(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuilder msg) throws UnableToComplyException { if(strategy.equals(Strategy.APRIORI)) { return determinePreferenceVectorByApriori(relation, neighborIDs, msg); } @@ -250,23 +240,21 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * @param neighborIDs the list of ids of the neighbors in each dimension * @param msg a string buffer for debug messages * @return the preference vector - * @throws de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException * * @throws de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException * */ - private BitSet determinePreferenceVectorByApriori(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuffer msg) throws ParameterException, UnableToComplyException { + private BitSet determinePreferenceVectorByApriori(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuilder msg) throws UnableToComplyException { int dimensionality = neighborIDs.length; // database for apriori UpdatableDatabase apriori_db = new HashmapDatabase(); - SimpleTypeInformation<?> bitmeta = VectorFieldTypeInformation.get(BitVector.class, dimensionality); + SimpleTypeInformation<?> bitmeta = new VectorFieldTypeInformation<BitVector>(BitVector.class, dimensionality); for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) { - DBID id = it.getDBID(); Bit[] bits = new Bit[dimensionality]; boolean allFalse = true; for(int d = 0; d < dimensionality; d++) { - if(neighborIDs[d].contains(id)) { + if(neighborIDs[d].contains(it)) { bits[d] = new Bit(true); allFalse = false; } @@ -286,9 +274,9 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs // result of apriori List<BitSet> frequentItemsets = aprioriResult.getSolution(); Map<BitSet, Integer> supports = aprioriResult.getSupports(); - if(logger.isDebugging()) { - msg.append("\n Frequent itemsets: " + frequentItemsets); - msg.append("\n All supports: " + supports); + if(LOG.isDebugging()) { + msg.append("\n Frequent itemsets: ").append(frequentItemsets); + msg.append("\n All supports: ").append(supports); } int maxSupport = 0; int maxCardinality = 0; @@ -302,11 +290,11 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs } } - if(logger.isDebugging()) { + if(LOG.isDebugging()) { msg.append("\n preference "); msg.append(FormatUtil.format(dimensionality, preferenceVector)); - msg.append("\n"); - logger.debugFine(msg.toString()); + msg.append('\n'); + LOG.debugFine(msg.toString()); } return preferenceVector; @@ -319,7 +307,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * @param msg a string buffer for debug messages * @return the preference vector */ - private BitSet determinePreferenceVectorByMaxIntersection(ModifiableDBIDs[] neighborIDs, StringBuffer msg) { + private BitSet determinePreferenceVectorByMaxIntersection(ModifiableDBIDs[] neighborIDs, StringBuilder msg) { int dimensionality = neighborIDs.length; BitSet preferenceVector = new BitSet(dimensionality); @@ -330,8 +318,8 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs candidates.put(i, s_i); } } - if(logger.isDebugging()) { - msg.append("\n candidates " + candidates.keySet()); + if(LOG.isDebugging()) { + msg.append("\n candidates ").append(candidates.keySet()); } if(!candidates.isEmpty()) { @@ -355,11 +343,11 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs } } - if(logger.isDebugging()) { + if(LOG.isDebugging()) { msg.append("\n preference "); msg.append(FormatUtil.format(dimensionality, preferenceVector)); - msg.append("\n"); - logger.debug(msg.toString()); + msg.append('\n'); + LOG.debug(msg.toString()); } return preferenceVector; @@ -416,20 +404,19 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * @param dimensionality the dimensionality of the objects * @return the dimension selecting distancefunctions to determine the * preference vectors - * @throws ParameterException */ - private RangeQuery<V, DoubleDistance>[] initRangeQueries(Relation<V> relation, int dimensionality) throws ParameterException { + private RangeQuery<V, DoubleDistance>[] initRangeQueries(Relation<V> relation, int dimensionality) { Class<RangeQuery<V, DoubleDistance>> rqcls = ClassGenericsUtil.uglyCastIntoSubclass(RangeQuery.class); RangeQuery<V, DoubleDistance>[] rangeQueries = ClassGenericsUtil.newArrayOfNull(dimensionality, rqcls); for(int d = 0; d < dimensionality; d++) { - rangeQueries[d] = relation.getDatabase().getRangeQuery(new PrimitiveDistanceQuery<V, DoubleDistance>(relation, new DimensionSelectingDistanceFunction(d + 1))); + rangeQueries[d] = relation.getDatabase().getRangeQuery(new PrimitiveDistanceQuery<V, DoubleDistance>(relation, new DimensionSelectingDistanceFunction(d))); } return rangeQueries; } @Override protected Logging getLogger() { - return logger; + return LOG; } @Override @@ -443,7 +430,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs } /** - * Factory class + * Factory class. * * @author Erich Schubert * @@ -452,7 +439,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * * @param <V> Vector type */ - public static class Factory<V extends NumberVector<?, ?>> extends AbstractPreferenceVectorIndex.Factory<V, DiSHPreferenceVectorIndex<V>> { + public static class Factory<V extends NumberVector<?>> extends AbstractPreferenceVectorIndex.Factory<V, DiSHPreferenceVectorIndex<V>> { /** * The default value for epsilon. */ @@ -472,7 +459,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * Default value: {@link #DEFAULT_EPSILON} * </p> */ - public static final OptionID EPSILON_ID = OptionID.getOrCreateOptionID("dish.epsilon", "A comma separated list of positive doubles specifying the " + "maximum radius of the neighborhood to be " + "considered in each dimension for determination of " + "the preference vector " + "(default is " + DEFAULT_EPSILON + " in each dimension). " + "If only one value is specified, this value " + "will be used for each dimension."); + public static final OptionID EPSILON_ID = new OptionID("dish.epsilon", "A comma separated list of positive doubles specifying the " + "maximum radius of the neighborhood to be " + "considered in each dimension for determination of " + "the preference vector " + "(default is " + DEFAULT_EPSILON + " in each dimension). " + "If only one value is specified, this value " + "will be used for each dimension."); /** * Option name for {@link #MINPTS_ID}. @@ -493,12 +480,12 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * Key: {@code -dish.minpts} * </p> */ - public static final OptionID MINPTS_ID = OptionID.getOrCreateOptionID(MINPTS_P, "Positive threshold for minumum numbers of points in the epsilon-" + "neighborhood of a point. " + CONDITION); + public static final OptionID MINPTS_ID = new OptionID(MINPTS_P, "Positive threshold for minumum numbers of points in the epsilon-" + "neighborhood of a point. " + CONDITION); /** * Default strategy. */ - public static Strategy DEFAULT_STRATEGY = Strategy.MAX_INTERSECTION; + public static final Strategy DEFAULT_STRATEGY = Strategy.MAX_INTERSECTION; /** * The strategy for determination of the preference vector, available @@ -512,10 +499,10 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * Default value: {@link #DEFAULT_STRATEGY} * </p> */ - public static final OptionID STRATEGY_ID = OptionID.getOrCreateOptionID("dish.strategy", "The strategy for determination of the preference vector, " + "available strategies are: [" + Strategy.APRIORI + "| " + Strategy.MAX_INTERSECTION + "]" + "(default is " + DEFAULT_STRATEGY + ")"); + public static final OptionID STRATEGY_ID = new OptionID("dish.strategy", "The strategy for determination of the preference vector, " + "available strategies are: [" + Strategy.APRIORI + "| " + Strategy.MAX_INTERSECTION + "]" + "(default is " + DEFAULT_STRATEGY + ")"); /** - * The epsilon value for each dimension; + * The epsilon value for each dimension. */ protected DoubleDistance[] epsilon; @@ -549,7 +536,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs } /** - * Return the minpts value + * Return the minpts value. * * @return minpts */ @@ -564,9 +551,9 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<?, ?>> extends AbstractParameterizer { + public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer { /** - * The epsilon value for each dimension; + * The epsilon value for each dimension. */ protected DoubleDistance[] epsilon; @@ -583,7 +570,8 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter minptsP = new IntParameter(MINPTS_ID, new GreaterConstraint(0)); + final IntParameter minptsP = new IntParameter(MINPTS_ID); + minptsP.addConstraint(new GreaterConstraint(0)); if(config.grab(minptsP)) { minpts = minptsP.getValue(); } |