summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/index/preprocessed/preference')
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/AbstractPreferenceVectorIndex.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java125
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java43
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/PreferenceVectorIndex.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/package-info.java2
5 files changed, 79 insertions, 112 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/AbstractPreferenceVectorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/AbstractPreferenceVectorIndex.java
index dd43e027..80753ace 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/AbstractPreferenceVectorIndex.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/AbstractPreferenceVectorIndex.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.preference;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,15 +23,12 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.preference;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.BitSet;
-
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.index.preprocessed.AbstractPreprocessorIndex;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
/**
* Abstract base class for preference vector based algorithms.
@@ -40,7 +37,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
*
* @param <NV> Number vector
*/
-public abstract class AbstractPreferenceVectorIndex<NV extends NumberVector<?>> extends AbstractPreprocessorIndex<NV, BitSet> implements PreferenceVectorIndex<NV> {
+public abstract class AbstractPreferenceVectorIndex<NV extends NumberVector> extends AbstractPreprocessorIndex<NV, long[]> implements PreferenceVectorIndex<NV> {
/**
* Constructor.
*
@@ -51,7 +48,7 @@ public abstract class AbstractPreferenceVectorIndex<NV extends NumberVector<?>>
}
@Override
- public BitSet getPreferenceVector(DBIDRef objid) {
+ public long[] getPreferenceVector(DBIDRef objid) {
if(storage == null) {
initialize();
}
@@ -66,7 +63,7 @@ public abstract class AbstractPreferenceVectorIndex<NV extends NumberVector<?>>
* @apiviz.stereotype factory
* @apiviz.uses AbstractPreferenceVectorIndex oneway - - «create»
*/
- public abstract static class Factory<V extends NumberVector<?>, I extends PreferenceVectorIndex<V>> implements PreferenceVectorIndex.Factory<V, I>, Parameterizable {
+ public abstract static class Factory<V extends NumberVector, I extends PreferenceVectorIndex<V>> implements PreferenceVectorIndex.Factory<V, I> {
@Override
public abstract I instantiate(Relation<V> relation);
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java
index 99a13a23..ccc04d45 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.preference;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,17 +25,17 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.preference;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.BitSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import de.lmu.ifi.dbs.elki.algorithm.APRIORI;
-import de.lmu.ifi.dbs.elki.data.Bit;
+import de.lmu.ifi.dbs.elki.algorithm.itemsetmining.APRIORI;
+import de.lmu.ifi.dbs.elki.algorithm.itemsetmining.Itemset;
import de.lmu.ifi.dbs.elki.data.BitVector;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.HashmapDatabase;
import de.lmu.ifi.dbs.elki.database.UpdatableDatabase;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
@@ -43,23 +43,21 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.datasource.bundle.SingleObjectBundle;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.OnedimensionalDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.result.AprioriResult;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
-import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
-import de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
@@ -78,7 +76,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* @param <V> Vector type
*/
@Description("Computes the preference vector of objects of a certain database according to the DiSH algorithm.")
-public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> {
+public class DiSHPreferenceVectorIndex<V extends NumberVector> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> {
/**
* Logger to use.
*/
@@ -103,7 +101,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
/**
* The epsilon value for each dimension.
*/
- protected DoubleDistance[] epsilon;
+ protected double[] epsilon;
/**
* Threshold for minimum number of points in the neighborhood.
@@ -123,7 +121,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
* @param minpts MinPts value
* @param strategy Strategy
*/
- public DiSHPreferenceVectorIndex(Relation<V> relation, DoubleDistance[] epsilon, int minpts, Strategy strategy) {
+ public DiSHPreferenceVectorIndex(Relation<V> relation, double[] epsilon, int minpts, Strategy strategy) {
super(relation);
this.epsilon = epsilon;
this.minpts = minpts;
@@ -136,7 +134,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
throw new IllegalArgumentException(ExceptionMessages.DATABASE_EMPTY);
}
- storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, BitSet.class);
+ storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, long[].class);
if(LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
@@ -152,13 +150,13 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
// only one epsilon value specified
int dim = RelationUtil.dimensionality(relation);
if(epsilon.length == 1 && dim != 1) {
- DoubleDistance eps = epsilon[0];
- epsilon = new DoubleDistance[dim];
+ double eps = epsilon[0];
+ epsilon = new double[dim];
Arrays.fill(epsilon, eps);
}
// epsilons as string
- RangeQuery<V, DoubleDistance>[] rangeQueries = initRangeQueries(relation, dim);
+ RangeQuery<V>[] rangeQueries = initRangeQueries(relation, dim);
for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
StringBuilder msg = new StringBuilder();
@@ -172,7 +170,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
// determine neighbors in each dimension
ModifiableDBIDs[] allNeighbors = new ModifiableDBIDs[dim];
for(int d = 0; d < dim; d++) {
- DistanceDBIDList<DoubleDistance> qrList = rangeQueries[d].getRangeForDBID(it, epsilon[d]);
+ DoubleDBIDList qrList = rangeQueries[d].getRangeForDBID(it, epsilon[d]);
allNeighbors[d] = DBIDUtil.newHashSet(qrList);
}
@@ -184,24 +182,15 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
}
}
- try {
- storage.put(it, determinePreferenceVector(relation, allNeighbors, msg));
- }
- catch(UnableToComplyException e) {
- throw new IllegalStateException(e);
- }
+ storage.put(it, determinePreferenceVector(relation, allNeighbors, msg));
if(LOG.isDebugging()) {
LOG.debugFine(msg.toString());
}
- if(progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- if(progress != null) {
- progress.ensureCompleted(LOG);
+ LOG.incrementProcessed(progress);
}
+ LOG.ensureCompleted(progress);
long end = System.currentTimeMillis();
// TODO: re-add timing code!
@@ -218,10 +207,8 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
* @param neighborIDs the list of ids of the neighbors in each dimension
* @param msg a string buffer for debug messages
* @return the preference vector
- *
- * @throws de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException
*/
- private BitSet determinePreferenceVector(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuilder msg) throws UnableToComplyException {
+ private long[] determinePreferenceVector(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuilder msg) {
if(strategy.equals(Strategy.APRIORI)) {
return determinePreferenceVectorByApriori(relation, neighborIDs, msg);
}
@@ -240,31 +227,25 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
* @param neighborIDs the list of ids of the neighbors in each dimension
* @param msg a string buffer for debug messages
* @return the preference vector
- *
- * @throws de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException
- *
*/
- private BitSet determinePreferenceVectorByApriori(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuilder msg) throws UnableToComplyException {
+ private long[] determinePreferenceVectorByApriori(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuilder msg) {
int dimensionality = neighborIDs.length;
// database for apriori
UpdatableDatabase apriori_db = new HashmapDatabase();
- SimpleTypeInformation<?> bitmeta = new VectorFieldTypeInformation<>(BitVector.class, dimensionality);
+ SimpleTypeInformation<?> bitmeta = VectorFieldTypeInformation.typeRequest(BitVector.class, dimensionality, dimensionality);
for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
- Bit[] bits = new Bit[dimensionality];
+ long[] bits = BitsUtil.zero(dimensionality);
boolean allFalse = true;
for(int d = 0; d < dimensionality; d++) {
if(neighborIDs[d].contains(it)) {
- bits[d] = Bit.TRUE;
+ BitsUtil.setI(bits, d);
allFalse = false;
}
- else {
- bits[d] = Bit.FALSE;
- }
}
if(!allFalse) {
SingleObjectBundle oaa = new SingleObjectBundle();
- oaa.append(bitmeta, new BitVector(bits));
+ oaa.append(bitmeta, new BitVector(bits, dimensionality));
apriori_db.insert(oaa);
}
}
@@ -272,27 +253,24 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
AprioriResult aprioriResult = apriori.run(apriori_db);
// result of apriori
- List<BitSet> frequentItemsets = aprioriResult.getSolution();
- Map<BitSet, Integer> supports = aprioriResult.getSupports();
+ List<Itemset> frequentItemsets = aprioriResult.getItemsets();
if(LOG.isDebugging()) {
msg.append("\n Frequent itemsets: ").append(frequentItemsets);
- msg.append("\n All supports: ").append(supports);
}
int maxSupport = 0;
int maxCardinality = 0;
- BitSet preferenceVector = new BitSet();
- for(BitSet bitSet : frequentItemsets) {
- int cardinality = bitSet.cardinality();
- if((maxCardinality < cardinality) || (maxCardinality == cardinality && maxSupport == supports.get(bitSet))) {
- preferenceVector = bitSet;
- maxCardinality = cardinality;
- maxSupport = supports.get(bitSet);
+ long[] preferenceVector = BitsUtil.zero(dimensionality);
+ for(Itemset itemset : frequentItemsets) {
+ if((maxCardinality < itemset.length()) || (maxCardinality == itemset.length() && maxSupport == itemset.getSupport())) {
+ preferenceVector = itemset.getItems();
+ maxCardinality = itemset.length();
+ maxSupport = itemset.getSupport();
}
}
if(LOG.isDebugging()) {
msg.append("\n preference ");
- msg.append(FormatUtil.format(dimensionality, preferenceVector));
+ msg.append(BitsUtil.toStringLow(preferenceVector, dimensionality));
msg.append('\n');
LOG.debugFine(msg.toString());
}
@@ -307,9 +285,9 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
* @param msg a string buffer for debug messages
* @return the preference vector
*/
- private BitSet determinePreferenceVectorByMaxIntersection(ModifiableDBIDs[] neighborIDs, StringBuilder msg) {
+ private long[] determinePreferenceVectorByMaxIntersection(ModifiableDBIDs[] neighborIDs, StringBuilder msg) {
int dimensionality = neighborIDs.length;
- BitSet preferenceVector = new BitSet(dimensionality);
+ long[] preferenceVector = BitsUtil.zero(dimensionality);
Map<Integer, ModifiableDBIDs> candidates = new HashMap<>(dimensionality);
for(int i = 0; i < dimensionality; i++) {
@@ -325,7 +303,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
if(!candidates.isEmpty()) {
int i = max(candidates);
ModifiableDBIDs intersection = candidates.remove(i);
- preferenceVector.set(i);
+ BitsUtil.setI(preferenceVector, i);
while(!candidates.isEmpty()) {
ModifiableDBIDs newIntersection = DBIDUtil.newHashSet();
i = maxIntersection(candidates, intersection, newIntersection);
@@ -338,14 +316,14 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
break;
}
else {
- preferenceVector.set(i);
+ BitsUtil.setI(preferenceVector, i);
}
}
}
if(LOG.isDebugging()) {
msg.append("\n preference ");
- msg.append(FormatUtil.format(dimensionality, preferenceVector));
+ msg.append(BitsUtil.toStringLow(preferenceVector, dimensionality));
msg.append('\n');
LOG.debug(msg.toString());
}
@@ -405,11 +383,12 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
* @return the dimension selecting distancefunctions to determine the
* preference vectors
*/
- private RangeQuery<V, DoubleDistance>[] initRangeQueries(Relation<V> relation, int dimensionality) {
- Class<RangeQuery<V, DoubleDistance>> rqcls = ClassGenericsUtil.uglyCastIntoSubclass(RangeQuery.class);
- RangeQuery<V, DoubleDistance>[] rangeQueries = ClassGenericsUtil.newArrayOfNull(dimensionality, rqcls);
+ private RangeQuery<V>[] initRangeQueries(Relation<V> relation, int dimensionality) {
+ Database db = relation.getDatabase();
+ Class<RangeQuery<V>> rqcls = ClassGenericsUtil.uglyCastIntoSubclass(RangeQuery.class);
+ RangeQuery<V>[] rangeQueries = ClassGenericsUtil.newArrayOfNull(dimensionality, rqcls);
for(int d = 0; d < dimensionality; d++) {
- rangeQueries[d] = relation.getDatabase().getRangeQuery(new PrimitiveDistanceQuery<>(relation, new DimensionSelectingDistanceFunction(d)));
+ rangeQueries[d] = db.getRangeQuery(new PrimitiveDistanceQuery<>(relation, new OnedimensionalDistanceFunction(d)));
}
return rangeQueries;
}
@@ -444,11 +423,11 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
*
* @param <V> Vector type
*/
- public static class Factory<V extends NumberVector<?>> extends AbstractPreferenceVectorIndex.Factory<V, DiSHPreferenceVectorIndex<V>> {
+ public static class Factory<V extends NumberVector> extends AbstractPreferenceVectorIndex.Factory<V, DiSHPreferenceVectorIndex<V>> {
/**
* The default value for epsilon.
*/
- public static final DoubleDistance DEFAULT_EPSILON = new DoubleDistance(0.001);
+ public static final double DEFAULT_EPSILON = 0.001;
/**
* A comma separated list of positive doubles specifying the maximum radius
@@ -509,7 +488,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
/**
* The epsilon value for each dimension.
*/
- protected DoubleDistance[] epsilon;
+ protected double[] epsilon;
/**
* Threshold for minimum number of points in the neighborhood.
@@ -528,7 +507,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
* @param minpts Minpts
* @param strategy Strategy
*/
- public Factory(DoubleDistance[] epsilon, int minpts, Strategy strategy) {
+ public Factory(double[] epsilon, int minpts, Strategy strategy) {
super();
this.epsilon = epsilon;
this.minpts = minpts;
@@ -556,11 +535,11 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
/**
* The epsilon value for each dimension.
*/
- protected DoubleDistance[] epsilon;
+ protected double[] epsilon;
/**
* Threshold for minimum number of points in the neighborhood.
@@ -584,16 +563,16 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
// parameter epsilon
// todo: constraint auf positive werte
List<Double> defaultEps = new ArrayList<>();
- defaultEps.add(DEFAULT_EPSILON.doubleValue());
+ defaultEps.add(DEFAULT_EPSILON);
final DoubleListParameter epsilonP = new DoubleListParameter(EPSILON_ID, true);
epsilonP.setDefaultValue(defaultEps);
if(config.grab(epsilonP)) {
List<Double> eps_list = epsilonP.getValue();
- epsilon = new DoubleDistance[eps_list.size()];
+ epsilon = new double[eps_list.size()];
for(int d = 0; d < eps_list.size(); d++) {
- epsilon[d] = new DoubleDistance(eps_list.get(d));
- if(epsilon[d].doubleValue() < 0) {
+ epsilon[d] = eps_list.get(d);
+ if(epsilon[d] < 0) {
config.reportError(new WrongParameterValueException(epsilonP, eps_list.toString()));
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java
index 8ead8458..843e4eda 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.preference;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.preference;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.BitSet;
-
import de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.HiSC;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
@@ -34,15 +32,14 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -66,7 +63,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
@Title("HiSC Preprocessor")
@Description("Computes the preference vector of objects of a certain database according to the HiSC algorithm.")
-public class HiSCPreferenceVectorIndex<V extends NumberVector<?>> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> {
+public class HiSCPreferenceVectorIndex<V extends NumberVector> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> {
/**
* Logger to use.
*/
@@ -101,14 +98,14 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
throw new IllegalArgumentException(ExceptionMessages.DATABASE_EMPTY);
}
- storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, BitSet.class);
+ storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, long[].class);
StringBuilder msg = new StringBuilder();
long start = System.currentTimeMillis();
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), LOG) : null;
- KNNQuery<V, DoubleDistance> knnQuery = QueryUtil.getKNNQuery(relation, EuclideanDistanceFunction.STATIC, k);
+ KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relation, EuclideanDistanceFunction.STATIC, k);
for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
if(LOG.isDebugging()) {
@@ -117,17 +114,13 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
msg.append("\n knns: ");
}
- KNNList<DoubleDistance> knns = knnQuery.getKNNForDBID(it, k);
- BitSet preferenceVector = determinePreferenceVector(relation, it, knns, msg);
+ KNNList knns = knnQuery.getKNNForDBID(it, k);
+ long[] preferenceVector = determinePreferenceVector(relation, it, knns, msg);
storage.put(it, preferenceVector);
- if(progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- if(progress != null) {
- progress.ensureCompleted(LOG);
+ LOG.incrementProcessed(progress);
}
+ LOG.ensureCompleted(progress);
if(LOG.isDebugging()) {
LOG.debugFine(msg.toString());
@@ -151,24 +144,24 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
* @param msg a string buffer for debug messages
* @return the preference vector
*/
- private BitSet determinePreferenceVector(Relation<V> relation, DBIDRef id, DBIDs neighborIDs, StringBuilder msg) {
+ private long[] determinePreferenceVector(Relation<V> relation, DBIDRef id, DBIDs neighborIDs, StringBuilder msg) {
// variances
- double[] variances = DatabaseUtil.variances(relation, relation.get(id), neighborIDs);
+ double[] variances = RelationUtil.variances(relation, relation.get(id), neighborIDs);
// preference vector
- BitSet preferenceVector = new BitSet(variances.length);
+ long[] preferenceVector = BitsUtil.zero(variances.length);
for(int d = 0; d < variances.length; d++) {
if(variances[d] < alpha) {
- preferenceVector.set(d);
+ BitsUtil.setI(preferenceVector, d);
}
}
if(msg != null && LOG.isDebugging()) {
msg.append("\nalpha ").append(alpha);
msg.append("\nvariances ");
- msg.append(FormatUtil.format(variances, ", ", 4));
+ msg.append(FormatUtil.format(variances, ", ", FormatUtil.NF4));
msg.append("\npreference ");
- msg.append(FormatUtil.format(variances.length, preferenceVector));
+ msg.append(BitsUtil.toStringLow(preferenceVector, variances.length));
}
return preferenceVector;
@@ -204,7 +197,7 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
*
* @param <V> Vector type
*/
- public static class Factory<V extends NumberVector<?>> extends AbstractPreferenceVectorIndex.Factory<V, HiSCPreferenceVectorIndex<V>> {
+ public static class Factory<V extends NumberVector> extends AbstractPreferenceVectorIndex.Factory<V, HiSCPreferenceVectorIndex<V>> {
/**
* The default value for alpha.
*/
@@ -276,7 +269,7 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
/**
* Holds the value of parameter {@link #ALPHA_ID}.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/PreferenceVectorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/PreferenceVectorIndex.java
index 87a9d3dd..45d88dd5 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/PreferenceVectorIndex.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/PreferenceVectorIndex.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.preference;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.preference;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.BitSet;
-
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -38,14 +36,14 @@ import de.lmu.ifi.dbs.elki.index.IndexFactory;
*
* @param <NV> Vector type
*/
-public interface PreferenceVectorIndex<NV extends NumberVector<?>> extends Index {
+public interface PreferenceVectorIndex<NV extends NumberVector> extends Index {
/**
* Get the precomputed preference vector for a particular object ID.
*
* @param id Object ID
* @return Matrix
*/
- public BitSet getPreferenceVector(DBIDRef id);
+ public long[] getPreferenceVector(DBIDRef id);
/**
* Factory interface
@@ -58,7 +56,7 @@ public interface PreferenceVectorIndex<NV extends NumberVector<?>> extends Index
* @param <V> vector type
* @param <I> index type
*/
- public static interface Factory<V extends NumberVector<?>, I extends PreferenceVectorIndex<V>> extends IndexFactory<V, I> {
+ public static interface Factory<V extends NumberVector, I extends PreferenceVectorIndex<V>> extends IndexFactory<V, I> {
/**
* Instantiate the index for a given database.
*
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/package-info.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/package-info.java
index e840bfd0..f912afb7 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2013
+Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team