summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java
diff options
context:
space:
mode:
authorErich Schubert <erich@debian.org>2012-12-14 20:45:15 +0100
committerAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:35 +0000
commit357b2761a2c0ded8cad5e4d3c1e667b7639ff7a6 (patch)
tree3dd8947bb70a67c221adc3cd4359ba1d385e2f3c /src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java
parent4343785ebed9d4145f417d86d581f18a0d31e4ac (diff)
parentb7b404fd7a726774d442562d11659d7b5368cdb9 (diff)
Import Debian changes 0.5.5-1
elki (0.5.5-1) unstable; urgency=low * New upstream release: 0.5.5 interim release.
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java196
1 files changed, 92 insertions, 104 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java
index 416a5ffb..2b02e7d6 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java
@@ -40,31 +40,28 @@ import de.lmu.ifi.dbs.elki.database.HashmapDatabase;
import de.lmu.ifi.dbs.elki.database.UpdatableDatabase;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.datasource.bundle.SingleObjectBundle;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.result.AprioriResult;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
import de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -77,13 +74,15 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* database.
*
* @author Elke Achtert
+ *
+ * @param <V> Vector type
*/
@Description("Computes the preference vector of objects of a certain database according to the DiSH algorithm.")
-public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> {
+public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> {
/**
- * Logger to use
+ * Logger to use.
*/
- protected static final Logging logger = Logging.getLogger(DiSHPreferenceVectorIndex.class);
+ private static final Logging LOG = Logging.getLogger(DiSHPreferenceVectorIndex.class);
/**
* Available strategies for determination of the preference vector.
@@ -92,17 +91,17 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
*/
public enum Strategy {
/**
- * Apriori strategy
+ * Apriori strategy.
*/
APRIORI,
/**
- * Max intersection strategy
+ * Max intersection strategy.
*/
MAX_INTERSECTION
}
/**
- * The epsilon value for each dimension;
+ * The epsilon value for each dimension.
*/
protected DoubleDistance[] epsilon;
@@ -139,84 +138,76 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, BitSet.class);
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
msg.append("\n eps ").append(Arrays.asList(epsilon));
msg.append("\n minpts ").append(minpts);
msg.append("\n strategy ").append(strategy);
- logger.debugFine(msg.toString());
+ LOG.debugFine(msg.toString());
}
- try {
- long start = System.currentTimeMillis();
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), logger) : null;
-
- // only one epsilon value specified
- int dim = DatabaseUtil.dimensionality(relation);
- if(epsilon.length == 1 && dim != 1) {
- DoubleDistance eps = epsilon[0];
- epsilon = new DoubleDistance[dim];
- Arrays.fill(epsilon, eps);
- }
-
- // epsilons as string
- RangeQuery<V, DoubleDistance>[] rangeQueries = initRangeQueries(relation, dim);
+ long start = System.currentTimeMillis();
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), LOG) : null;
- for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
- StringBuffer msg = new StringBuffer();
- final DBID id = it.getDBID();
+ // only one epsilon value specified
+ int dim = RelationUtil.dimensionality(relation);
+ if(epsilon.length == 1 && dim != 1) {
+ DoubleDistance eps = epsilon[0];
+ epsilon = new DoubleDistance[dim];
+ Arrays.fill(epsilon, eps);
+ }
- if(logger.isDebugging()) {
- msg.append("\nid = ").append(id);
- // msg.append(" ").append(database.get(id));
- //msg.append(" ").append(database.getObjectLabelQuery().get(id));
- }
+ // epsilons as string
+ RangeQuery<V, DoubleDistance>[] rangeQueries = initRangeQueries(relation, dim);
- // determine neighbors in each dimension
- ModifiableDBIDs[] allNeighbors = ClassGenericsUtil.newArrayOfNull(dim, ModifiableDBIDs.class);
- for(int d = 0; d < dim; d++) {
- DistanceDBIDResult<DoubleDistance> qrList = rangeQueries[d].getRangeForDBID(id, epsilon[d]);
- allNeighbors[d] = DBIDUtil.newHashSet(qrList.size());
- for(DistanceResultPair<DoubleDistance> qr : qrList) {
- allNeighbors[d].add(qr.getDBID());
- }
- }
+ for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
+ StringBuilder msg = new StringBuilder();
- if(logger.isDebugging()) {
- for(int d = 0; d < dim; d++) {
- msg.append("\n neighbors [").append(d).append("]");
- msg.append(" (").append(allNeighbors[d].size()).append(") = ");
- msg.append(allNeighbors[d]);
- }
- }
+ if(LOG.isDebugging()) {
+ msg.append("\nid = ").append(DBIDUtil.toString(it));
+ // msg.append(" ").append(database.get(id));
+ // msg.append(" ").append(database.getObjectLabelQuery().get(id));
+ }
- BitSet preferenceVector = determinePreferenceVector(relation, allNeighbors, msg);
- storage.put(id, preferenceVector);
+ // determine neighbors in each dimension
+ ModifiableDBIDs[] allNeighbors = new ModifiableDBIDs[dim];
+ for(int d = 0; d < dim; d++) {
+ DistanceDBIDResult<DoubleDistance> qrList = rangeQueries[d].getRangeForDBID(it, epsilon[d]);
+ allNeighbors[d] = DBIDUtil.newHashSet(qrList);
+ }
- if(logger.isDebugging()) {
- logger.debugFine(msg.toString());
+ if(LOG.isDebugging()) {
+ for(int d = 0; d < dim; d++) {
+ msg.append("\n neighbors [").append(d).append(']');
+ msg.append(" (").append(allNeighbors[d].size()).append(") = ");
+ msg.append(allNeighbors[d]);
}
+ }
- if(progress != null) {
- progress.incrementProcessed(logger);
- }
+ try {
+ storage.put(it, determinePreferenceVector(relation, allNeighbors, msg));
}
- if(progress != null) {
- progress.ensureCompleted(logger);
+ catch(UnableToComplyException e) {
+ throw new IllegalStateException(e);
}
- long end = System.currentTimeMillis();
- // TODO: re-add timing code!
- if(logger.isVerbose()) {
- long elapsedTime = end - start;
- logger.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds.");
+ if(LOG.isDebugging()) {
+ LOG.debugFine(msg.toString());
+ }
+
+ if(progress != null) {
+ progress.incrementProcessed(LOG);
}
}
- catch(ParameterException e) {
- throw new IllegalStateException(e);
+ if(progress != null) {
+ progress.ensureCompleted(LOG);
}
- catch(UnableToComplyException e) {
- throw new IllegalStateException(e);
+
+ long end = System.currentTimeMillis();
+ // TODO: re-add timing code!
+ if(LOG.isVerbose()) {
+ long elapsedTime = end - start;
+ LOG.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds.");
}
}
@@ -227,11 +218,10 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
* @param neighborIDs the list of ids of the neighbors in each dimension
* @param msg a string buffer for debug messages
* @return the preference vector
- * @throws de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException
*
* @throws de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException
*/
- private BitSet determinePreferenceVector(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuffer msg) throws ParameterException, UnableToComplyException {
+ private BitSet determinePreferenceVector(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuilder msg) throws UnableToComplyException {
if(strategy.equals(Strategy.APRIORI)) {
return determinePreferenceVectorByApriori(relation, neighborIDs, msg);
}
@@ -250,23 +240,21 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
* @param neighborIDs the list of ids of the neighbors in each dimension
* @param msg a string buffer for debug messages
* @return the preference vector
- * @throws de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException
*
* @throws de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException
*
*/
- private BitSet determinePreferenceVectorByApriori(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuffer msg) throws ParameterException, UnableToComplyException {
+ private BitSet determinePreferenceVectorByApriori(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuilder msg) throws UnableToComplyException {
int dimensionality = neighborIDs.length;
// database for apriori
UpdatableDatabase apriori_db = new HashmapDatabase();
- SimpleTypeInformation<?> bitmeta = VectorFieldTypeInformation.get(BitVector.class, dimensionality);
+ SimpleTypeInformation<?> bitmeta = new VectorFieldTypeInformation<BitVector>(BitVector.class, dimensionality);
for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
- DBID id = it.getDBID();
Bit[] bits = new Bit[dimensionality];
boolean allFalse = true;
for(int d = 0; d < dimensionality; d++) {
- if(neighborIDs[d].contains(id)) {
+ if(neighborIDs[d].contains(it)) {
bits[d] = new Bit(true);
allFalse = false;
}
@@ -286,9 +274,9 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
// result of apriori
List<BitSet> frequentItemsets = aprioriResult.getSolution();
Map<BitSet, Integer> supports = aprioriResult.getSupports();
- if(logger.isDebugging()) {
- msg.append("\n Frequent itemsets: " + frequentItemsets);
- msg.append("\n All supports: " + supports);
+ if(LOG.isDebugging()) {
+ msg.append("\n Frequent itemsets: ").append(frequentItemsets);
+ msg.append("\n All supports: ").append(supports);
}
int maxSupport = 0;
int maxCardinality = 0;
@@ -302,11 +290,11 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
}
}
- if(logger.isDebugging()) {
+ if(LOG.isDebugging()) {
msg.append("\n preference ");
msg.append(FormatUtil.format(dimensionality, preferenceVector));
- msg.append("\n");
- logger.debugFine(msg.toString());
+ msg.append('\n');
+ LOG.debugFine(msg.toString());
}
return preferenceVector;
@@ -319,7 +307,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
* @param msg a string buffer for debug messages
* @return the preference vector
*/
- private BitSet determinePreferenceVectorByMaxIntersection(ModifiableDBIDs[] neighborIDs, StringBuffer msg) {
+ private BitSet determinePreferenceVectorByMaxIntersection(ModifiableDBIDs[] neighborIDs, StringBuilder msg) {
int dimensionality = neighborIDs.length;
BitSet preferenceVector = new BitSet(dimensionality);
@@ -330,8 +318,8 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
candidates.put(i, s_i);
}
}
- if(logger.isDebugging()) {
- msg.append("\n candidates " + candidates.keySet());
+ if(LOG.isDebugging()) {
+ msg.append("\n candidates ").append(candidates.keySet());
}
if(!candidates.isEmpty()) {
@@ -355,11 +343,11 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
}
}
- if(logger.isDebugging()) {
+ if(LOG.isDebugging()) {
msg.append("\n preference ");
msg.append(FormatUtil.format(dimensionality, preferenceVector));
- msg.append("\n");
- logger.debug(msg.toString());
+ msg.append('\n');
+ LOG.debug(msg.toString());
}
return preferenceVector;
@@ -416,20 +404,19 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
* @param dimensionality the dimensionality of the objects
* @return the dimension selecting distancefunctions to determine the
* preference vectors
- * @throws ParameterException
*/
- private RangeQuery<V, DoubleDistance>[] initRangeQueries(Relation<V> relation, int dimensionality) throws ParameterException {
+ private RangeQuery<V, DoubleDistance>[] initRangeQueries(Relation<V> relation, int dimensionality) {
Class<RangeQuery<V, DoubleDistance>> rqcls = ClassGenericsUtil.uglyCastIntoSubclass(RangeQuery.class);
RangeQuery<V, DoubleDistance>[] rangeQueries = ClassGenericsUtil.newArrayOfNull(dimensionality, rqcls);
for(int d = 0; d < dimensionality; d++) {
- rangeQueries[d] = relation.getDatabase().getRangeQuery(new PrimitiveDistanceQuery<V, DoubleDistance>(relation, new DimensionSelectingDistanceFunction(d + 1)));
+ rangeQueries[d] = relation.getDatabase().getRangeQuery(new PrimitiveDistanceQuery<V, DoubleDistance>(relation, new DimensionSelectingDistanceFunction(d)));
}
return rangeQueries;
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
@@ -443,7 +430,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
}
/**
- * Factory class
+ * Factory class.
*
* @author Erich Schubert
*
@@ -452,7 +439,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
*
* @param <V> Vector type
*/
- public static class Factory<V extends NumberVector<?, ?>> extends AbstractPreferenceVectorIndex.Factory<V, DiSHPreferenceVectorIndex<V>> {
+ public static class Factory<V extends NumberVector<?>> extends AbstractPreferenceVectorIndex.Factory<V, DiSHPreferenceVectorIndex<V>> {
/**
* The default value for epsilon.
*/
@@ -472,7 +459,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
* Default value: {@link #DEFAULT_EPSILON}
* </p>
*/
- public static final OptionID EPSILON_ID = OptionID.getOrCreateOptionID("dish.epsilon", "A comma separated list of positive doubles specifying the " + "maximum radius of the neighborhood to be " + "considered in each dimension for determination of " + "the preference vector " + "(default is " + DEFAULT_EPSILON + " in each dimension). " + "If only one value is specified, this value " + "will be used for each dimension.");
+ public static final OptionID EPSILON_ID = new OptionID("dish.epsilon", "A comma separated list of positive doubles specifying the " + "maximum radius of the neighborhood to be " + "considered in each dimension for determination of " + "the preference vector " + "(default is " + DEFAULT_EPSILON + " in each dimension). " + "If only one value is specified, this value " + "will be used for each dimension.");
/**
* Option name for {@link #MINPTS_ID}.
@@ -493,12 +480,12 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
* Key: {@code -dish.minpts}
* </p>
*/
- public static final OptionID MINPTS_ID = OptionID.getOrCreateOptionID(MINPTS_P, "Positive threshold for minumum numbers of points in the epsilon-" + "neighborhood of a point. " + CONDITION);
+ public static final OptionID MINPTS_ID = new OptionID(MINPTS_P, "Positive threshold for minumum numbers of points in the epsilon-" + "neighborhood of a point. " + CONDITION);
/**
* Default strategy.
*/
- public static Strategy DEFAULT_STRATEGY = Strategy.MAX_INTERSECTION;
+ public static final Strategy DEFAULT_STRATEGY = Strategy.MAX_INTERSECTION;
/**
* The strategy for determination of the preference vector, available
@@ -512,10 +499,10 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
* Default value: {@link #DEFAULT_STRATEGY}
* </p>
*/
- public static final OptionID STRATEGY_ID = OptionID.getOrCreateOptionID("dish.strategy", "The strategy for determination of the preference vector, " + "available strategies are: [" + Strategy.APRIORI + "| " + Strategy.MAX_INTERSECTION + "]" + "(default is " + DEFAULT_STRATEGY + ")");
+ public static final OptionID STRATEGY_ID = new OptionID("dish.strategy", "The strategy for determination of the preference vector, " + "available strategies are: [" + Strategy.APRIORI + "| " + Strategy.MAX_INTERSECTION + "]" + "(default is " + DEFAULT_STRATEGY + ")");
/**
- * The epsilon value for each dimension;
+ * The epsilon value for each dimension.
*/
protected DoubleDistance[] epsilon;
@@ -549,7 +536,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
}
/**
- * Return the minpts value
+ * Return the minpts value.
*
* @return minpts
*/
@@ -564,9 +551,9 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
/**
- * The epsilon value for each dimension;
+ * The epsilon value for each dimension.
*/
protected DoubleDistance[] epsilon;
@@ -583,7 +570,8 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter minptsP = new IntParameter(MINPTS_ID, new GreaterConstraint(0));
+ final IntParameter minptsP = new IntParameter(MINPTS_ID);
+ minptsP.addConstraint(new GreaterConstraint(0));
if(config.grab(minptsP)) {
minpts = minptsP.getValue();
}