diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/index/preprocessed/preference')
4 files changed, 151 insertions, 158 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/AbstractPreferenceVectorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/AbstractPreferenceVectorIndex.java index 387985ab..56b09dba 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/AbstractPreferenceVectorIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/AbstractPreferenceVectorIndex.java @@ -40,7 +40,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; * * @param <NV> Number vector */ -public abstract class AbstractPreferenceVectorIndex<NV extends NumberVector<?, ?>> extends AbstractPreprocessorIndex<NV, BitSet> implements PreferenceVectorIndex<NV> { +public abstract class AbstractPreferenceVectorIndex<NV extends NumberVector<?>> extends AbstractPreprocessorIndex<NV, BitSet> implements PreferenceVectorIndex<NV> { /** * Constructor. * @@ -53,7 +53,7 @@ public abstract class AbstractPreferenceVectorIndex<NV extends NumberVector<?, ? /** * Preprocessing step. */ - abstract protected void preprocess(); + protected abstract void preprocess(); @Override public BitSet getPreferenceVector(DBIDRef objid) { @@ -64,14 +64,14 @@ public abstract class AbstractPreferenceVectorIndex<NV extends NumberVector<?, ? } /** - * Factory class + * Factory class. * * @author Erich Schubert * * @apiviz.stereotype factory * @apiviz.uses AbstractPreferenceVectorIndex oneway - - «create» */ - public static abstract class Factory<V extends NumberVector<?, ?>, I extends PreferenceVectorIndex<V>> implements PreferenceVectorIndex.Factory<V, I>, Parameterizable { + public abstract static class Factory<V extends NumberVector<?>, I extends PreferenceVectorIndex<V>> implements PreferenceVectorIndex.Factory<V, I>, Parameterizable { @Override public abstract I instantiate(Relation<V> relation); diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java index 416a5ffb..2b02e7d6 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java @@ -40,31 +40,28 @@ import de.lmu.ifi.dbs.elki.database.HashmapDatabase; import de.lmu.ifi.dbs.elki.database.UpdatableDatabase; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.datasource.bundle.SingleObjectBundle; import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.result.AprioriResult; import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.FormatUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; import de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; @@ -77,13 +74,15 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * database. * * @author Elke Achtert + * + * @param <V> Vector type */ @Description("Computes the preference vector of objects of a certain database according to the DiSH algorithm.") -public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> { +public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> { /** - * Logger to use + * Logger to use. */ - protected static final Logging logger = Logging.getLogger(DiSHPreferenceVectorIndex.class); + private static final Logging LOG = Logging.getLogger(DiSHPreferenceVectorIndex.class); /** * Available strategies for determination of the preference vector. @@ -92,17 +91,17 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs */ public enum Strategy { /** - * Apriori strategy + * Apriori strategy. */ APRIORI, /** - * Max intersection strategy + * Max intersection strategy. */ MAX_INTERSECTION } /** - * The epsilon value for each dimension; + * The epsilon value for each dimension. */ protected DoubleDistance[] epsilon; @@ -139,84 +138,76 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, BitSet.class); - if(logger.isDebugging()) { - StringBuffer msg = new StringBuffer(); + if(LOG.isDebugging()) { + StringBuilder msg = new StringBuilder(); msg.append("\n eps ").append(Arrays.asList(epsilon)); msg.append("\n minpts ").append(minpts); msg.append("\n strategy ").append(strategy); - logger.debugFine(msg.toString()); + LOG.debugFine(msg.toString()); } - try { - long start = System.currentTimeMillis(); - FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), logger) : null; - - // only one epsilon value specified - int dim = DatabaseUtil.dimensionality(relation); - if(epsilon.length == 1 && dim != 1) { - DoubleDistance eps = epsilon[0]; - epsilon = new DoubleDistance[dim]; - Arrays.fill(epsilon, eps); - } - - // epsilons as string - RangeQuery<V, DoubleDistance>[] rangeQueries = initRangeQueries(relation, dim); + long start = System.currentTimeMillis(); + FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), LOG) : null; - for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) { - StringBuffer msg = new StringBuffer(); - final DBID id = it.getDBID(); + // only one epsilon value specified + int dim = RelationUtil.dimensionality(relation); + if(epsilon.length == 1 && dim != 1) { + DoubleDistance eps = epsilon[0]; + epsilon = new DoubleDistance[dim]; + Arrays.fill(epsilon, eps); + } - if(logger.isDebugging()) { - msg.append("\nid = ").append(id); - // msg.append(" ").append(database.get(id)); - //msg.append(" ").append(database.getObjectLabelQuery().get(id)); - } + // epsilons as string + RangeQuery<V, DoubleDistance>[] rangeQueries = initRangeQueries(relation, dim); - // determine neighbors in each dimension - ModifiableDBIDs[] allNeighbors = ClassGenericsUtil.newArrayOfNull(dim, ModifiableDBIDs.class); - for(int d = 0; d < dim; d++) { - DistanceDBIDResult<DoubleDistance> qrList = rangeQueries[d].getRangeForDBID(id, epsilon[d]); - allNeighbors[d] = DBIDUtil.newHashSet(qrList.size()); - for(DistanceResultPair<DoubleDistance> qr : qrList) { - allNeighbors[d].add(qr.getDBID()); - } - } + for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) { + StringBuilder msg = new StringBuilder(); - if(logger.isDebugging()) { - for(int d = 0; d < dim; d++) { - msg.append("\n neighbors [").append(d).append("]"); - msg.append(" (").append(allNeighbors[d].size()).append(") = "); - msg.append(allNeighbors[d]); - } - } + if(LOG.isDebugging()) { + msg.append("\nid = ").append(DBIDUtil.toString(it)); + // msg.append(" ").append(database.get(id)); + // msg.append(" ").append(database.getObjectLabelQuery().get(id)); + } - BitSet preferenceVector = determinePreferenceVector(relation, allNeighbors, msg); - storage.put(id, preferenceVector); + // determine neighbors in each dimension + ModifiableDBIDs[] allNeighbors = new ModifiableDBIDs[dim]; + for(int d = 0; d < dim; d++) { + DistanceDBIDResult<DoubleDistance> qrList = rangeQueries[d].getRangeForDBID(it, epsilon[d]); + allNeighbors[d] = DBIDUtil.newHashSet(qrList); + } - if(logger.isDebugging()) { - logger.debugFine(msg.toString()); + if(LOG.isDebugging()) { + for(int d = 0; d < dim; d++) { + msg.append("\n neighbors [").append(d).append(']'); + msg.append(" (").append(allNeighbors[d].size()).append(") = "); + msg.append(allNeighbors[d]); } + } - if(progress != null) { - progress.incrementProcessed(logger); - } + try { + storage.put(it, determinePreferenceVector(relation, allNeighbors, msg)); } - if(progress != null) { - progress.ensureCompleted(logger); + catch(UnableToComplyException e) { + throw new IllegalStateException(e); } - long end = System.currentTimeMillis(); - // TODO: re-add timing code! - if(logger.isVerbose()) { - long elapsedTime = end - start; - logger.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds."); + if(LOG.isDebugging()) { + LOG.debugFine(msg.toString()); + } + + if(progress != null) { + progress.incrementProcessed(LOG); } } - catch(ParameterException e) { - throw new IllegalStateException(e); + if(progress != null) { + progress.ensureCompleted(LOG); } - catch(UnableToComplyException e) { - throw new IllegalStateException(e); + + long end = System.currentTimeMillis(); + // TODO: re-add timing code! + if(LOG.isVerbose()) { + long elapsedTime = end - start; + LOG.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds."); } } @@ -227,11 +218,10 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * @param neighborIDs the list of ids of the neighbors in each dimension * @param msg a string buffer for debug messages * @return the preference vector - * @throws de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException * * @throws de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException */ - private BitSet determinePreferenceVector(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuffer msg) throws ParameterException, UnableToComplyException { + private BitSet determinePreferenceVector(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuilder msg) throws UnableToComplyException { if(strategy.equals(Strategy.APRIORI)) { return determinePreferenceVectorByApriori(relation, neighborIDs, msg); } @@ -250,23 +240,21 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * @param neighborIDs the list of ids of the neighbors in each dimension * @param msg a string buffer for debug messages * @return the preference vector - * @throws de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException * * @throws de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException * */ - private BitSet determinePreferenceVectorByApriori(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuffer msg) throws ParameterException, UnableToComplyException { + private BitSet determinePreferenceVectorByApriori(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuilder msg) throws UnableToComplyException { int dimensionality = neighborIDs.length; // database for apriori UpdatableDatabase apriori_db = new HashmapDatabase(); - SimpleTypeInformation<?> bitmeta = VectorFieldTypeInformation.get(BitVector.class, dimensionality); + SimpleTypeInformation<?> bitmeta = new VectorFieldTypeInformation<BitVector>(BitVector.class, dimensionality); for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) { - DBID id = it.getDBID(); Bit[] bits = new Bit[dimensionality]; boolean allFalse = true; for(int d = 0; d < dimensionality; d++) { - if(neighborIDs[d].contains(id)) { + if(neighborIDs[d].contains(it)) { bits[d] = new Bit(true); allFalse = false; } @@ -286,9 +274,9 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs // result of apriori List<BitSet> frequentItemsets = aprioriResult.getSolution(); Map<BitSet, Integer> supports = aprioriResult.getSupports(); - if(logger.isDebugging()) { - msg.append("\n Frequent itemsets: " + frequentItemsets); - msg.append("\n All supports: " + supports); + if(LOG.isDebugging()) { + msg.append("\n Frequent itemsets: ").append(frequentItemsets); + msg.append("\n All supports: ").append(supports); } int maxSupport = 0; int maxCardinality = 0; @@ -302,11 +290,11 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs } } - if(logger.isDebugging()) { + if(LOG.isDebugging()) { msg.append("\n preference "); msg.append(FormatUtil.format(dimensionality, preferenceVector)); - msg.append("\n"); - logger.debugFine(msg.toString()); + msg.append('\n'); + LOG.debugFine(msg.toString()); } return preferenceVector; @@ -319,7 +307,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * @param msg a string buffer for debug messages * @return the preference vector */ - private BitSet determinePreferenceVectorByMaxIntersection(ModifiableDBIDs[] neighborIDs, StringBuffer msg) { + private BitSet determinePreferenceVectorByMaxIntersection(ModifiableDBIDs[] neighborIDs, StringBuilder msg) { int dimensionality = neighborIDs.length; BitSet preferenceVector = new BitSet(dimensionality); @@ -330,8 +318,8 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs candidates.put(i, s_i); } } - if(logger.isDebugging()) { - msg.append("\n candidates " + candidates.keySet()); + if(LOG.isDebugging()) { + msg.append("\n candidates ").append(candidates.keySet()); } if(!candidates.isEmpty()) { @@ -355,11 +343,11 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs } } - if(logger.isDebugging()) { + if(LOG.isDebugging()) { msg.append("\n preference "); msg.append(FormatUtil.format(dimensionality, preferenceVector)); - msg.append("\n"); - logger.debug(msg.toString()); + msg.append('\n'); + LOG.debug(msg.toString()); } return preferenceVector; @@ -416,20 +404,19 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * @param dimensionality the dimensionality of the objects * @return the dimension selecting distancefunctions to determine the * preference vectors - * @throws ParameterException */ - private RangeQuery<V, DoubleDistance>[] initRangeQueries(Relation<V> relation, int dimensionality) throws ParameterException { + private RangeQuery<V, DoubleDistance>[] initRangeQueries(Relation<V> relation, int dimensionality) { Class<RangeQuery<V, DoubleDistance>> rqcls = ClassGenericsUtil.uglyCastIntoSubclass(RangeQuery.class); RangeQuery<V, DoubleDistance>[] rangeQueries = ClassGenericsUtil.newArrayOfNull(dimensionality, rqcls); for(int d = 0; d < dimensionality; d++) { - rangeQueries[d] = relation.getDatabase().getRangeQuery(new PrimitiveDistanceQuery<V, DoubleDistance>(relation, new DimensionSelectingDistanceFunction(d + 1))); + rangeQueries[d] = relation.getDatabase().getRangeQuery(new PrimitiveDistanceQuery<V, DoubleDistance>(relation, new DimensionSelectingDistanceFunction(d))); } return rangeQueries; } @Override protected Logging getLogger() { - return logger; + return LOG; } @Override @@ -443,7 +430,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs } /** - * Factory class + * Factory class. * * @author Erich Schubert * @@ -452,7 +439,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * * @param <V> Vector type */ - public static class Factory<V extends NumberVector<?, ?>> extends AbstractPreferenceVectorIndex.Factory<V, DiSHPreferenceVectorIndex<V>> { + public static class Factory<V extends NumberVector<?>> extends AbstractPreferenceVectorIndex.Factory<V, DiSHPreferenceVectorIndex<V>> { /** * The default value for epsilon. */ @@ -472,7 +459,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * Default value: {@link #DEFAULT_EPSILON} * </p> */ - public static final OptionID EPSILON_ID = OptionID.getOrCreateOptionID("dish.epsilon", "A comma separated list of positive doubles specifying the " + "maximum radius of the neighborhood to be " + "considered in each dimension for determination of " + "the preference vector " + "(default is " + DEFAULT_EPSILON + " in each dimension). " + "If only one value is specified, this value " + "will be used for each dimension."); + public static final OptionID EPSILON_ID = new OptionID("dish.epsilon", "A comma separated list of positive doubles specifying the " + "maximum radius of the neighborhood to be " + "considered in each dimension for determination of " + "the preference vector " + "(default is " + DEFAULT_EPSILON + " in each dimension). " + "If only one value is specified, this value " + "will be used for each dimension."); /** * Option name for {@link #MINPTS_ID}. @@ -493,12 +480,12 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * Key: {@code -dish.minpts} * </p> */ - public static final OptionID MINPTS_ID = OptionID.getOrCreateOptionID(MINPTS_P, "Positive threshold for minumum numbers of points in the epsilon-" + "neighborhood of a point. " + CONDITION); + public static final OptionID MINPTS_ID = new OptionID(MINPTS_P, "Positive threshold for minumum numbers of points in the epsilon-" + "neighborhood of a point. " + CONDITION); /** * Default strategy. */ - public static Strategy DEFAULT_STRATEGY = Strategy.MAX_INTERSECTION; + public static final Strategy DEFAULT_STRATEGY = Strategy.MAX_INTERSECTION; /** * The strategy for determination of the preference vector, available @@ -512,10 +499,10 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * Default value: {@link #DEFAULT_STRATEGY} * </p> */ - public static final OptionID STRATEGY_ID = OptionID.getOrCreateOptionID("dish.strategy", "The strategy for determination of the preference vector, " + "available strategies are: [" + Strategy.APRIORI + "| " + Strategy.MAX_INTERSECTION + "]" + "(default is " + DEFAULT_STRATEGY + ")"); + public static final OptionID STRATEGY_ID = new OptionID("dish.strategy", "The strategy for determination of the preference vector, " + "available strategies are: [" + Strategy.APRIORI + "| " + Strategy.MAX_INTERSECTION + "]" + "(default is " + DEFAULT_STRATEGY + ")"); /** - * The epsilon value for each dimension; + * The epsilon value for each dimension. */ protected DoubleDistance[] epsilon; @@ -549,7 +536,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs } /** - * Return the minpts value + * Return the minpts value. * * @return minpts */ @@ -564,9 +551,9 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<?, ?>> extends AbstractParameterizer { + public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer { /** - * The epsilon value for each dimension; + * The epsilon value for each dimension. */ protected DoubleDistance[] epsilon; @@ -583,7 +570,8 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter minptsP = new IntParameter(MINPTS_ID, new GreaterConstraint(0)); + final IntParameter minptsP = new IntParameter(MINPTS_ID); + minptsP.addConstraint(new GreaterConstraint(0)); if(config.grab(minptsP)) { minpts = minptsP.getValue(); } diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java index 65f5f61e..fd6aa0bf 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java @@ -30,13 +30,15 @@ import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; @@ -48,7 +50,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; @@ -60,14 +62,16 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * @author Elke Achtert * * @see HiSC + * + * @param <V> Vector type */ @Title("HiSC Preprocessor") @Description("Computes the preference vector of objects of a certain database according to the HiSC algorithm.") -public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> { +public class HiSCPreferenceVectorIndex<V extends NumberVector<?>> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> { /** - * Logger to use + * Logger to use. */ - protected static final Logging logger = Logging.getLogger(HiSCPreferenceVectorIndex.class); + private static final Logging LOG = Logging.getLogger(HiSCPreferenceVectorIndex.class); /** * Holds the value of parameter alpha. @@ -94,49 +98,47 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs @Override protected void preprocess() { - if(relation == null || relation.size() <= 0) { + if (relation == null || relation.size() <= 0) { throw new IllegalArgumentException(ExceptionMessages.DATABASE_EMPTY); } storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, BitSet.class); - StringBuffer msg = new StringBuffer(); + StringBuilder msg = new StringBuilder(); long start = System.currentTimeMillis(); - FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), logger) : null; + FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), LOG) : null; KNNQuery<V, DoubleDistance> knnQuery = QueryUtil.getKNNQuery(relation, EuclideanDistanceFunction.STATIC, k); for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) { - DBID id = it.getDBID(); - - if(logger.isDebugging()) { - msg.append("\n\nid = ").append(id); - ///msg.append(" ").append(database.getObjectLabelQuery().get(id)); + if (LOG.isDebugging()) { + msg.append("\n\nid = ").append(DBIDUtil.toString(it)); + // /msg.append(" ").append(database.getObjectLabelQuery().get(id)); msg.append("\n knns: "); } - KNNResult<DoubleDistance> knns = knnQuery.getKNNForDBID(id, k); - BitSet preferenceVector = determinePreferenceVector(relation, id, knns.asDBIDs(), msg); - storage.put(id, preferenceVector); + KNNResult<DoubleDistance> knns = knnQuery.getKNNForDBID(it, k); + BitSet preferenceVector = determinePreferenceVector(relation, it, knns, msg); + storage.put(it, preferenceVector); - if(progress != null) { - progress.incrementProcessed(logger); + if (progress != null) { + progress.incrementProcessed(LOG); } } - if(progress != null) { - progress.ensureCompleted(logger); + if (progress != null) { + progress.ensureCompleted(LOG); } - if(logger.isDebugging()) { - logger.debugFine(msg.toString()); + if (LOG.isDebugging()) { + LOG.debugFine(msg.toString()); } long end = System.currentTimeMillis(); // TODO: re-add timing code! - if(logger.isVerbose()) { + if (LOG.isVerbose()) { long elapsedTime = end - start; - logger.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds."); + LOG.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds."); } } @@ -150,20 +152,20 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * @param msg a string buffer for debug messages * @return the preference vector */ - private BitSet determinePreferenceVector(Relation<V> relation, DBID id, DBIDs neighborIDs, StringBuffer msg) { + private BitSet determinePreferenceVector(Relation<V> relation, DBIDRef id, DBIDs neighborIDs, StringBuilder msg) { // variances double[] variances = DatabaseUtil.variances(relation, relation.get(id), neighborIDs); // preference vector BitSet preferenceVector = new BitSet(variances.length); - for(int d = 0; d < variances.length; d++) { - if(variances[d] < alpha) { + for (int d = 0; d < variances.length; d++) { + if (variances[d] < alpha) { preferenceVector.set(d); } } - if(msg != null && logger.isDebugging()) { - msg.append("\nalpha " + alpha); + if (msg != null && LOG.isDebugging()) { + msg.append("\nalpha ").append(alpha); msg.append("\nvariances "); msg.append(FormatUtil.format(variances, ", ", 4)); msg.append("\npreference "); @@ -175,7 +177,7 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs @Override protected Logging getLogger() { - return logger; + return LOG; } @Override @@ -189,7 +191,7 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs } /** - * Factory class + * Factory class. * * @author Erich Schubert * @@ -198,7 +200,7 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * * @param <V> Vector type */ - public static class Factory<V extends NumberVector<?, ?>> extends AbstractPreferenceVectorIndex.Factory<V, HiSCPreferenceVectorIndex<V>> { + public static class Factory<V extends NumberVector<?>> extends AbstractPreferenceVectorIndex.Factory<V, HiSCPreferenceVectorIndex<V>> { /** * The default value for alpha. */ @@ -214,7 +216,7 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * Key: {@code -hisc.alpha} * </p> */ - public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("hisc.alpha", "The maximum absolute variance along a coordinate axis."); + public static final OptionID ALPHA_ID = new OptionID("hisc.alpha", "The maximum absolute variance along a coordinate axis."); /** * The number of nearest neighbors considered to determine the preference @@ -227,7 +229,7 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * Default value: three times of the dimensionality of the database objects * </p> */ - public static final OptionID K_ID = OptionID.getOrCreateOptionID("hisc.k", "The number of nearest neighbors considered to determine the preference vector. If this value is not defined, k ist set to three times of the dimensionality of the database objects."); + public static final OptionID K_ID = new OptionID("hisc.k", "The number of nearest neighbors considered to determine the preference vector. If this value is not defined, k ist set to three times of the dimensionality of the database objects."); /** * Holds the value of parameter {@link #ALPHA_ID}. @@ -254,10 +256,9 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs @Override public HiSCPreferenceVectorIndex<V> instantiate(Relation<V> relation) { final int usek; - if(k == null) { - usek = 3 * DatabaseUtil.dimensionality(relation); - } - else { + if (k == null) { + usek = 3 * RelationUtil.dimensionality(relation); + } else { usek = k; } return new HiSCPreferenceVectorIndex<V>(relation, alpha, usek); @@ -270,7 +271,7 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<?, ?>> extends AbstractParameterizer { + public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer { /** * Holds the value of parameter {@link #ALPHA_ID}. */ @@ -281,17 +282,21 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs */ protected Integer k; - @Override + @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final DoubleParameter ALPHA_PARAM = new DoubleParameter(ALPHA_ID, new IntervalConstraint(0.0, IntervalConstraint.IntervalBoundary.OPEN, 1.0, IntervalConstraint.IntervalBoundary.OPEN), DEFAULT_ALPHA); - if(config.grab(ALPHA_PARAM)) { - alpha = ALPHA_PARAM.getValue(); + final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, DEFAULT_ALPHA); + alphaP.addConstraint(new GreaterConstraint(0.0)); + alphaP.addConstraint(new LessConstraint(1.0)); + if (config.grab(alphaP)) { + alpha = alphaP.doubleValue(); } - final IntParameter K_PARAM = new IntParameter(K_ID, new GreaterConstraint(0), true); - if(config.grab(K_PARAM)) { - k = K_PARAM.getValue(); + final IntParameter kP = new IntParameter(K_ID); + kP.addConstraint(new GreaterConstraint(0)); + kP.setOptional(true); + if (config.grab(kP)) { + k = kP.intValue(); } } @@ -301,4 +306,4 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs } } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/PreferenceVectorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/PreferenceVectorIndex.java index a0fba8f3..a212c2cd 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/PreferenceVectorIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/PreferenceVectorIndex.java @@ -38,7 +38,7 @@ import de.lmu.ifi.dbs.elki.index.IndexFactory; * * @param <NV> Vector type */ -public interface PreferenceVectorIndex<NV extends NumberVector<?, ?>> extends Index { +public interface PreferenceVectorIndex<NV extends NumberVector<?>> extends Index { /** * Get the precomputed preference vector for a particular object ID. * @@ -58,7 +58,7 @@ public interface PreferenceVectorIndex<NV extends NumberVector<?, ?>> extends In * @param <V> vector type * @param <I> index type */ - public static interface Factory<V extends NumberVector<?, ?>, I extends PreferenceVectorIndex<V>> extends IndexFactory<V, I> { + public static interface Factory<V extends NumberVector<?>, I extends PreferenceVectorIndex<V>> extends IndexFactory<V, I> { /** * Instantiate the index for a given database. * |