summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java36
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java24
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java87
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java85
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java19
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java67
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java71
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java95
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java23
14 files changed, 309 insertions, 239 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
index 92862909..47855aad 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
@@ -27,10 +27,12 @@ import java.util.ArrayList;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension;
import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
@@ -50,12 +52,14 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
*
* @author Erich Schubert
*
+ * @apiviz.has MeanModel
* @apiviz.composedOf KMeansInitialization
*
* @param <V> Vector type
* @param <D> Distance type
+ * @param <M> Cluster model type
*/
-public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm<NumberVector<?, ?>, D, Clustering<MeanModel<V>>> implements KMeans {
+public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distance<D>, M extends MeanModel<V>> extends AbstractPrimitiveDistanceBasedAlgorithm<NumberVector<?>, D, Clustering<M>> implements KMeans, ClusteringAlgorithm<Clustering<M>> {
/**
* Holds the value of {@link #K_ID}.
*/
@@ -79,7 +83,7 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
* @param maxiter Maxiter parameter
* @param initializer Function to generate the initial means
*/
- public AbstractKMeans(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+ public AbstractKMeans(PrimitiveDistanceFunction<? super NumberVector<?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
super(distanceFunction);
this.k = k;
this.maxiter = maxiter;
@@ -95,12 +99,12 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
* @param clusters cluster assignment
* @return true when the object was reassigned
*/
- protected boolean assignToNearestCluster(Relation<V> relation, List<? extends NumberVector<?, ?>> means, List<? extends ModifiableDBIDs> clusters) {
+ protected boolean assignToNearestCluster(Relation<V> relation, List<? extends NumberVector<?>> means, List<? extends ModifiableDBIDs> clusters) {
boolean changed = false;
if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
@SuppressWarnings("unchecked")
- final PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>>) getDistanceFunction();
+ final PrimitiveDoubleDistanceFunction<? super NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?>>) getDistanceFunction();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double mindist = Double.POSITIVE_INFINITY;
V fv = relation.get(iditer);
@@ -127,7 +131,7 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
}
}
else {
- final PrimitiveDistanceFunction<? super NumberVector<?, ?>, D> df = getDistanceFunction();
+ final PrimitiveDistanceFunction<? super NumberVector<?>, D> df = getDistanceFunction();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
D mindist = df.getDistanceFactory().infiniteDistance();
V fv = relation.get(iditer);
@@ -158,7 +162,7 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ return TypeUtil.array(new CombinedTypeInformation(TypeUtil.NUMBER_VECTOR_FIELD, getDistanceFunction().getInputTypeRestriction()));
}
/**
@@ -169,7 +173,7 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
* @param database the database containing the vectors
* @return the mean vectors of the given clusters in the given database
*/
- protected List<Vector> means(List<? extends ModifiableDBIDs> clusters, List<? extends NumberVector<?, ?>> means, Relation<V> database) {
+ protected List<Vector> means(List<? extends ModifiableDBIDs> clusters, List<? extends NumberVector<?>> means, Relation<V> database) {
List<Vector> newMeans = new ArrayList<Vector>(k);
for(int i = 0; i < k; i++) {
ModifiableDBIDs list = clusters.get(i);
@@ -200,30 +204,30 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
* @param database the database containing the vectors
* @return the mean vectors of the given clusters in the given database
*/
- protected List<NumberVector<?, ?>> medians(List<? extends ModifiableDBIDs> clusters, List<? extends NumberVector<?, ?>> medians, Relation<V> database) {
+ protected List<NumberVector<?>> medians(List<? extends ModifiableDBIDs> clusters, List<? extends NumberVector<?>> medians, Relation<V> database) {
final int dim = medians.get(0).getDimensionality();
final SortDBIDsBySingleDimension sorter = new SortDBIDsBySingleDimension(database);
- List<NumberVector<?, ?>> newMedians = new ArrayList<NumberVector<?, ?>>(k);
+ List<NumberVector<?>> newMedians = new ArrayList<NumberVector<?>>(k);
for(int i = 0; i < k; i++) {
ArrayModifiableDBIDs list = DBIDUtil.newArray(clusters.get(i));
if(list.size() > 0) {
Vector mean = new Vector(dim);
for(int d = 0; d < dim; d++) {
- sorter.setDimension(d + 1);
+ sorter.setDimension(d);
DBID id = QuickSelect.median(list, sorter);
- mean.set(d, database.get(id).doubleValue(d + 1));
+ mean.set(d, database.get(id).doubleValue(d));
}
newMedians.add(mean);
}
else {
- newMedians.add((NumberVector<?, ?>) medians.get(i));
+ newMedians.add((NumberVector<?>) medians.get(i));
}
}
return newMedians;
}
/**
- * Compute an incremental update for the mean
+ * Compute an incremental update for the mean.
*
* @param mean Mean to update
* @param vec Object vector
@@ -255,7 +259,7 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
// Raw distance function
@SuppressWarnings("unchecked")
- final PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>>) getDistanceFunction();
+ final PrimitiveDoubleDistanceFunction<? super NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?>>) getDistanceFunction();
// Incremental update
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
@@ -287,7 +291,7 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
}
else {
// Raw distance function
- final PrimitiveDistanceFunction<? super NumberVector<?, ?>, D> df = getDistanceFunction();
+ final PrimitiveDistanceFunction<? super NumberVector<?>, D> df = getDistanceFunction();
// Incremental update
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
@@ -319,4 +323,4 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
}
return changed;
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
index a8effecd..3a69c806 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
@@ -22,9 +22,10 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
/**
* Abstract base class for common k-means initializations.
@@ -35,17 +36,17 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
*/
public abstract class AbstractKMeansInitialization<V> implements KMeansInitialization<V> {
/**
- * Holds the value of {@link KMeans#SEED_ID}.
+ * Random number generator
*/
- protected Long seed;
+ protected RandomFactory rnd;
/**
* Constructor.
*
- * @param seed Random seed.
+ * @param rnd Random number generator.
*/
- public AbstractKMeansInitialization(Long seed) {
- this.seed = seed;
+ public AbstractKMeansInitialization(RandomFactory rnd) {
+ this.rnd = rnd;
}
/**
@@ -56,14 +57,17 @@ public abstract class AbstractKMeansInitialization<V> implements KMeansInitializ
* @apiviz.exclude
*/
public abstract static class Parameterizer<V> extends AbstractParameterizer {
- protected Long seed;
+ /**
+ * Random generator
+ */
+ protected RandomFactory rnd;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- LongParameter seedP = new LongParameter(KMeans.SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
+ RandomParameter rndP = new RandomParameter(KMeans.SEED_ID);
+ if(config.grab(rndP)) {
+ rnd = rndP.getValue();
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
index 7a7f2867..1e51f4d6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
@@ -77,7 +77,7 @@ public class FirstKInitialMeans<V> implements KMeansInitialization<V>, KMedoidsI
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
@Override
protected FirstKInitialMeans<V> makeInstance() {
return new FirstKInitialMeans<V>();
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java
index 37171d4a..68fc4e48 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java
@@ -34,22 +34,22 @@ public interface KMeans {
/**
* Parameter to specify the initialization method
*/
- public static final OptionID INIT_ID = OptionID.getOrCreateOptionID("kmeans.initialization", "Method to choose the initial means.");
+ public static final OptionID INIT_ID = new OptionID("kmeans.initialization", "Method to choose the initial means.");
/**
* Parameter to specify the number of clusters to find, must be an integer
* greater than 0.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("kmeans.k", "The number of clusters to find.");
+ public static final OptionID K_ID = new OptionID("kmeans.k", "The number of clusters to find.");
/**
* Parameter to specify the number of clusters to find, must be an integer
* greater or equal to 0, where 0 means no limit.
*/
- public static final OptionID MAXITER_ID = OptionID.getOrCreateOptionID("kmeans.maxiter", "The maximum number of iterations to do. 0 means no limit.");
+ public static final OptionID MAXITER_ID = new OptionID("kmeans.maxiter", "The maximum number of iterations to do. 0 means no limit.");
/**
* Parameter to specify the random generator seed.
*/
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("kmeans.seed", "The random number generator seed.");
+ public static final OptionID SEED_ID = new OptionID("kmeans.seed", "The random number generator seed.");
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
index 9e5d69f0..54b3a2ce 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
@@ -31,6 +31,8 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
* Interface for initializing K-Means
*
* @author Erich Schubert
+ *
+ * @apiviz.landmark
*
* @param <V> Object type
*/
@@ -44,4 +46,4 @@ public interface KMeansInitialization<V> {
* @return List of chosen means for k-means
*/
public abstract List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction);
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
index b1b40632..f43c2277 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
@@ -27,19 +27,20 @@ import java.util.ArrayList;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.data.model.KMeansModel;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -62,7 +63,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @author Arthur Zimek
*
- * @apiviz.has MeanModel
+ * @apiviz.landmark
+ * @apiviz.has KMeansModel
*
* @param <V> vector datatype
* @param <D> distance value type
@@ -70,11 +72,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@Title("K-Means")
@Description("Finds a partitioning into k clusters.")
@Reference(authors = "S. Lloyd", title = "Least squares quantization in PCM", booktitle = "IEEE Transactions on Information Theory 28 (2): 129–137.", url = "http://dx.doi.org/10.1109/TIT.1982.1056489")
-public class KMeansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractKMeans<V, D> implements ClusteringAlgorithm<Clustering<MeanModel<V>>> {
+public class KMeansLloyd<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans<V, D, KMeansModel<V>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KMeansLloyd.class);
+ private static final Logging LOG = Logging.getLogger(KMeansLloyd.class);
/**
* Constructor.
@@ -82,55 +84,56 @@ public class KMeansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>> ex
* @param distanceFunction distance function
* @param k k parameter
* @param maxiter Maxiter parameter
+ * @param initializer Initialization method
*/
- public KMeansLloyd(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+ public KMeansLloyd(PrimitiveDistanceFunction<NumberVector<?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
super(distanceFunction, k, maxiter, initializer);
}
/**
- * Run k-means
+ * Run k-means.
*
* @param database Database
* @param relation relation to use
* @return result
*/
- public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) {
- if(relation.size() <= 0) {
- return new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
+ public Clustering<KMeansModel<V>> run(Database database, Relation<V> relation) {
+ if (relation.size() <= 0) {
+ return new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
- List<? extends NumberVector<?, ?>> means = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
+ List<? extends NumberVector<?>> means = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
- for(int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
- if(logger.isVerbose()) {
- logger.verbose("K-Means iteration " + (iteration + 1));
+ for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
+ if (LOG.isVerbose()) {
+ LOG.verbose("K-Means iteration " + (iteration + 1));
}
boolean changed = assignToNearestCluster(relation, means, clusters);
// Stop if no cluster assignment changed.
- if(!changed) {
+ if (!changed) {
break;
}
// Recompute means.
means = means(clusters, means, relation);
}
// Wrap result
- final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
- Clustering<MeanModel<V>> result = new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
- for(int i = 0; i < clusters.size(); i++) {
- MeanModel<V> model = new MeanModel<V>(factory.newNumberVector(means.get(i).getColumnVector().getArrayRef()));
- result.addCluster(new Cluster<MeanModel<V>>(clusters.get(i), model));
+ final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
+ Clustering<KMeansModel<V>> result = new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
+ for (int i = 0; i < clusters.size(); i++) {
+ KMeansModel<V> model = new KMeansModel<V>(factory.newNumberVector(means.get(i).getColumnVector().getArrayRef()));
+ result.addCluster(new Cluster<KMeansModel<V>>(clusters.get(i), model));
}
return result;
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -140,35 +143,53 @@ public class KMeansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>> ex
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?, ?>, D> {
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?>, D> {
+ /**
+ * k Parameter.
+ */
protected int k;
+ /**
+ * Number of iterations.
+ */
protected int maxiter;
+ /**
+ * Initialization method.
+ */
protected KMeansInitialization<V> initializer;
@Override
protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0));
- if(config.grab(kP)) {
+ ObjectParameter<PrimitiveDistanceFunction<NumberVector<?>, D>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDistanceFunction.class);
+ if(config.grab(distanceFunctionP)) {
+ distanceFunction = distanceFunctionP.instantiateClass(config);
+ if (!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
+ LOG.warning("k-means optimizes the sum of squares - it should be used with squared euclidean distance and may stop converging otherwise!");
+ }
+ }
+
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(kP)) {
k = kP.getValue();
}
ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
- if(config.grab(initialP)) {
+ if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
- IntParameter maxiterP = new IntParameter(MAXITER_ID, new GreaterEqualConstraint(0), 0);
- if(config.grab(maxiterP)) {
- maxiter = maxiterP.getValue();
+ IntParameter maxiterP = new IntParameter(MAXITER_ID, 0);
+ maxiterP.addConstraint(new GreaterEqualConstraint(0));
+ if (config.grab(maxiterP)) {
+ maxiter = maxiterP.intValue();
}
}
@Override
- protected AbstractKMeans<V, D> makeInstance() {
+ protected KMeansLloyd<V, D> makeInstance() {
return new KMeansLloyd<V, D>(distanceFunction, k, maxiter, initializer);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
index c729eb10..0cc7c363 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
@@ -27,21 +27,22 @@ import java.util.ArrayList;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.data.model.KMeansModel;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -62,8 +63,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* </p>
*
* @author Erich Schubert
- *
- * @apiviz.has MeanModel
+ * @apiviz.has KMeansModel
*
* @param <V> vector type to use
* @param <D> distance function value type
@@ -71,11 +71,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@Title("K-Means")
@Description("Finds a partitioning into k clusters.")
@Reference(authors = "J. MacQueen", title = "Some Methods for Classification and Analysis of Multivariate Observations", booktitle = "5th Berkeley Symp. Math. Statist. Prob., Vol. 1, 1967, pp 281-297", url = "http://projecteuclid.org/euclid.bsmsp/1200512992")
-public class KMeansMacQueen<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractKMeans<V, D> implements ClusteringAlgorithm<Clustering<MeanModel<V>>> {
+public class KMeansMacQueen<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans<V, D, KMeansModel<V>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KMeansMacQueen.class);
+ private static final Logging LOG = Logging.getLogger(KMeansMacQueen.class);
/**
* Constructor.
@@ -83,30 +83,31 @@ public class KMeansMacQueen<V extends NumberVector<V, ?>, D extends Distance<D>>
* @param distanceFunction distance function
* @param k k parameter
* @param maxiter Maxiter parameter
+ * @param initializer Initialization method
*/
- public KMeansMacQueen(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+ public KMeansMacQueen(PrimitiveDistanceFunction<NumberVector<?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
super(distanceFunction, k, maxiter, initializer);
}
/**
- * Run k-means
+ * Run k-means.
*
* @param database Database
* @param relation relation to use
* @return Clustering result
*/
- public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) {
- if(relation.size() <= 0) {
- return new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
+ public Clustering<KMeansModel<V>> run(Database database, Relation<V> relation) {
+ if (relation.size() <= 0) {
+ return new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
List<Vector> means = new ArrayList<Vector>(k);
- for(NumberVector<?, ?> nv : initializer.chooseInitialMeans(relation, k, getDistanceFunction())) {
+ for (NumberVector<?> nv : initializer.chooseInitialMeans(relation, k, getDistanceFunction())) {
means.add(nv.getColumnVector());
}
// Initialize cluster and assign objects
List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
assignToNearestCluster(relation, means, clusters);
@@ -114,28 +115,28 @@ public class KMeansMacQueen<V extends NumberVector<V, ?>, D extends Distance<D>>
means = means(clusters, means, relation);
// Refine result
- for(int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
- if(logger.isVerbose()) {
- logger.verbose("K-Means iteration " + (iteration + 1));
+ for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
+ if (LOG.isVerbose()) {
+ LOG.verbose("K-Means iteration " + (iteration + 1));
}
boolean changed = macQueenIterate(relation, means, clusters);
- if(!changed) {
+ if (!changed) {
break;
}
}
- final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
- Clustering<MeanModel<V>> result = new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
- for(int i = 0; i < clusters.size(); i++) {
+ final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
+ Clustering<KMeansModel<V>> result = new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
+ for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
- MeanModel<V> model = new MeanModel<V>(factory.newNumberVector(means.get(i).getArrayRef()));
- result.addCluster(new Cluster<MeanModel<V>>(ids, model));
+ KMeansModel<V> model = new KMeansModel<V>(factory.newNumberVector(means.get(i).getArrayRef()));
+ result.addCluster(new Cluster<KMeansModel<V>>(ids, model));
}
return result;
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -145,35 +146,53 @@ public class KMeansMacQueen<V extends NumberVector<V, ?>, D extends Distance<D>>
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?, ?>, D> {
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?>, D> {
+ /**
+ * k Parameter.
+ */
protected int k;
+ /**
+ * Maximum number of iterations.
+ */
protected int maxiter;
+ /**
+ * Initialization method.
+ */
protected KMeansInitialization<V> initializer;
@Override
protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0));
- if(config.grab(kP)) {
+ ObjectParameter<PrimitiveDistanceFunction<NumberVector<?>, D>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDistanceFunction.class);
+ if (config.grab(distanceFunctionP)) {
+ distanceFunction = distanceFunctionP.instantiateClass(config);
+ if (!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
+ LOG.warning("k-means optimizes the sum of squares - it should be used with squared euclidean distance and may stop converging otherwise!");
+ }
+ }
+
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(kP)) {
k = kP.getValue();
}
ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
- if(config.grab(initialP)) {
+ if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
- IntParameter maxiterP = new IntParameter(MAXITER_ID, new GreaterEqualConstraint(0), 0);
- if(config.grab(maxiterP)) {
+ IntParameter maxiterP = new IntParameter(MAXITER_ID, 0);
+ maxiterP.addConstraint(new GreaterEqualConstraint(0));
+ if (config.grab(maxiterP)) {
maxiter = maxiterP.getValue();
}
}
@Override
- protected AbstractKMeans<V, D> makeInstance() {
+ protected KMeansMacQueen<V, D> makeInstance() {
return new KMeansMacQueen<V, D>(distanceFunction, k, maxiter, initializer);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
index 9afeff6c..a07953da 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
@@ -38,6 +38,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
@@ -62,10 +63,10 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
/**
* Constructor.
*
- * @param seed Random seed.
+ * @param rnd Random generator.
*/
- public KMeansPlusPlusInitialMeans(Long seed) {
- super(seed);
+ public KMeansPlusPlusInitialMeans(RandomFactory rnd) {
+ super(rnd);
}
@Override
@@ -81,8 +82,8 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
// Chose first mean
List<V> means = new ArrayList<V>(k);
- Random random = (seed != null) ? new Random(seed) : new Random();
- DBID first = DBIDUtil.randomSample(relation.getDBIDs(), 1, random.nextLong()).iter().getDBID();
+ Random random = rnd.getRandom();
+ DBID first = DBIDUtil.deref(DBIDUtil.randomSample(relation.getDBIDs(), 1, new Random(random.nextLong())).iter());
means.add(relation.get(first));
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
@@ -131,8 +132,8 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
// Chose first mean
ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
- Random random = (seed != null) ? new Random(seed) : new Random();
- DBID first = DBIDUtil.randomSample(distQ.getRelation().getDBIDs(), 1, random.nextLong()).iter().getDBID();
+ Random random = rnd.getRandom();
+ DBID first = DBIDUtil.deref(DBIDUtil.randomSample(distQ.getRelation().getDBIDs(), 1, new Random(random.nextLong())).iter());
means.add(first);
ArrayDBIDs ids = DBIDUtil.ensureArray(distQ.getRelation().getDBIDs());
@@ -176,7 +177,7 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
double weightsum = 0.0;
DBIDIter it = ids.iter();
for(int i = 0; i < weights.length; i++, it.advance()) {
- if(latest.sameDBID(it)) {
+ if(DBIDUtil.equal(latest, it)) {
weights[i] = 0.0;
}
else {
@@ -243,7 +244,7 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
public static class Parameterizer<V, D extends NumberDistance<D, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
@Override
protected KMeansPlusPlusInitialMeans<V, D> makeInstance() {
- return new KMeansPlusPlusInitialMeans<V, D>(seed);
+ return new KMeansPlusPlusInitialMeans<V, D>(rnd);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java
index 8c284981..9917337e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java
@@ -27,7 +27,6 @@ import java.util.ArrayList;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -36,10 +35,10 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
@@ -61,18 +60,16 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @author Erich Schubert
*
- * @apiviz.has MeanModel
- *
* @param <V> vector datatype
* @param <D> distance value type
*/
@Title("K-Medians")
-@Reference(title = "Clustering via Concave Minimization", authors = "P. S. Bradley, O. L. Mangasarian, W. N. Street", booktitle = "Advances in neural information processing systems", url="http://nips.djvuzone.org/djvu/nips09/0368.djvu")
-public class KMediansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractKMeans<V, D> implements ClusteringAlgorithm<Clustering<MeanModel<V>>> {
+@Reference(title = "Clustering via Concave Minimization", authors = "P. S. Bradley, O. L. Mangasarian, W. N. Street", booktitle = "Advances in neural information processing systems", url = "http://nips.djvuzone.org/djvu/nips09/0368.djvu")
+public class KMediansLloyd<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans<V, D, MeanModel<V>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KMediansLloyd.class);
+ private static final Logging LOG = Logging.getLogger(KMediansLloyd.class);
/**
* Constructor.
@@ -80,46 +77,47 @@ public class KMediansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>>
* @param distanceFunction distance function
* @param k k parameter
* @param maxiter Maxiter parameter
+ * @param initializer Initialization method
*/
- public KMediansLloyd(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+ public KMediansLloyd(PrimitiveDistanceFunction<NumberVector<?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
super(distanceFunction, k, maxiter, initializer);
}
/**
- * Run k-medians
+ * Run k-medians.
*
* @param database Database
* @param relation relation to use
* @return result
*/
public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) {
- if(relation.size() <= 0) {
+ if (relation.size() <= 0) {
return new Clustering<MeanModel<V>>("k-Medians Clustering", "kmedians-clustering");
}
// Choose initial medians
- List<? extends NumberVector<?, ?>> medians = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
+ List<? extends NumberVector<?>> medians = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
- for(int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
- if(logger.isVerbose()) {
- logger.verbose("K-Medians iteration " + (iteration + 1));
+ for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
+ if (LOG.isVerbose()) {
+ LOG.verbose("K-Medians iteration " + (iteration + 1));
}
boolean changed = assignToNearestCluster(relation, medians, clusters);
// Stop if no cluster assignment changed.
- if(!changed) {
+ if (!changed) {
break;
}
// Recompute medians.
medians = medians(clusters, medians, relation);
}
// Wrap result
- final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
+ final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
Clustering<MeanModel<V>> result = new Clustering<MeanModel<V>>("k-Medians Clustering", "kmedians-clustering");
- for(int i = 0; i < clusters.size(); i++) {
+ for (int i = 0; i < clusters.size(); i++) {
MeanModel<V> model = new MeanModel<V>(factory.newNumberVector(medians.get(i).getColumnVector().getArrayRef()));
result.addCluster(new Cluster<MeanModel<V>>(clusters.get(i), model));
}
@@ -128,7 +126,7 @@ public class KMediansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>>
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -138,35 +136,46 @@ public class KMediansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>>
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?, ?>, D> {
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?>, D> {
+ /**
+ * k Parameter.
+ */
protected int k;
+ /**
+ * Maximum number of iterations.
+ */
protected int maxiter;
+ /**
+ * Initialization method.
+ */
protected KMeansInitialization<V> initializer;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0));
- if(config.grab(kP)) {
- k = kP.getValue();
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(kP)) {
+ k = kP.intValue();
}
ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
- if(config.grab(initialP)) {
+ if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
- IntParameter maxiterP = new IntParameter(MAXITER_ID, new GreaterEqualConstraint(0), 0);
- if(config.grab(maxiterP)) {
- maxiter = maxiterP.getValue();
+ IntParameter maxiterP = new IntParameter(MAXITER_ID, 0);
+ maxiterP.addConstraint(new GreaterEqualConstraint(0));
+ if (config.grab(maxiterP)) {
+ maxiter = maxiterP.intValue();
}
}
@Override
- protected AbstractKMeans<V, D> makeInstance() {
+ protected KMediansLloyd<V, D> makeInstance() {
return new KMediansLloyd<V, D>(distanceFunction, k, maxiter, initializer);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java
index a5c3d675..f4398458 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java
@@ -78,7 +78,7 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KMedoidsEM.class);
+ private static final Logging LOG = Logging.getLogger(KMedoidsEM.class);
/**
* Holds the value of {@link AbstractKMeans#K_ID}.
@@ -118,7 +118,7 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
* @return result
*/
public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
- if(relation.size() <= 0) {
+ if (relation.size() <= 0) {
return new Clustering<MedoidModel>("k-Medoids Clustering", "kmedoids-clustering");
}
DistanceQuery<V, D> distQ = database.getDistanceQuery(relation, getDistanceFunction());
@@ -126,7 +126,7 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, distQ));
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
Mean[] mdists = Mean.newArray(k);
@@ -137,41 +137,41 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
// Swap phase
boolean changed = true;
- while(changed) {
+ while (changed) {
changed = false;
// Try to swap the medoid with a better cluster member:
- for(int i = 0; i < k; i++) {
- DBID med = medoids.get(i);
+ int i = 0;
+ for (DBIDIter miter = medoids.iter(); miter.valid(); miter.advance(), i++) {
DBID best = null;
Mean bestm = mdists[i];
- for(DBIDIter iter = clusters.get(i).iter(); iter.valid(); iter.advance()) {
- if(med.sameDBID(iter)) {
+ for (DBIDIter iter = clusters.get(i).iter(); iter.valid(); iter.advance()) {
+ if (DBIDUtil.equal(miter, iter)) {
continue;
}
Mean mdist = new Mean();
- for(DBIDIter iter2 = clusters.get(i).iter(); iter2.valid(); iter2.advance()) {
+ for (DBIDIter iter2 = clusters.get(i).iter(); iter2.valid(); iter2.advance()) {
mdist.put(distQ.distance(iter, iter2).doubleValue());
}
- if(mdist.getMean() < bestm.getMean()) {
- best = iter.getDBID();
+ if (mdist.getMean() < bestm.getMean()) {
+ best = DBIDUtil.deref(iter);
bestm = mdist;
}
}
- if(best != null && !med.sameDBID(best)) {
+ if (best != null && !DBIDUtil.equal(miter, best)) {
changed = true;
medoids.set(i, best);
mdists[i] = bestm;
}
}
// Reassign
- if(changed) {
+ if (changed) {
assignToNearestCluster(medoids, mdists, clusters, distQ);
}
}
// Wrap result
Clustering<MedoidModel> result = new Clustering<MedoidModel>("k-Medoids Clustering", "kmedoids-clustering");
- for(int i = 0; i < clusters.size(); i++) {
+ for (int i = 0; i < clusters.size(); i++) {
MedoidModel model = new MedoidModel(medoids.get(i));
result.addCluster(new Cluster<MedoidModel>(clusters.get(i), model));
}
@@ -192,24 +192,27 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
boolean changed = false;
double[] dists = new double[k];
- for(DBIDIter iditer = distQ.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = distQ.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
int minIndex = 0;
double mindist = Double.POSITIVE_INFINITY;
- for(int i = 0; i < k; i++) {
- dists[i] = distQ.distance(iditer, means.get(i)).doubleValue();
- if(dists[i] < mindist) {
- minIndex = i;
- mindist = dists[i];
+ {
+ int i = 0;
+ for (DBIDIter miter = means.iter(); miter.valid(); miter.advance(), i++) {
+ dists[i] = distQ.distance(iditer, miter).doubleValue();
+ if (dists[i] < mindist) {
+ minIndex = i;
+ mindist = dists[i];
+ }
}
}
- if(clusters.get(minIndex).add(iditer)) {
+ if (clusters.get(minIndex).add(iditer)) {
changed = true;
mdist[minIndex].put(mindist);
// Remove from previous cluster
// TODO: keep a list of cluster assignments to save this search?
- for(int i = 0; i < k; i++) {
- if(i != minIndex) {
- if(clusters.get(i).remove(iditer)) {
+ for (int i = 0; i < k; i++) {
+ if (i != minIndex) {
+ if (clusters.get(i).remove(iditer)) {
mdist[minIndex].put(dists[i], -1);
break;
}
@@ -227,7 +230,7 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -247,19 +250,21 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter kP = new IntParameter(KMeans.K_ID, new GreaterConstraint(0));
- if(config.grab(kP)) {
- k = kP.getValue();
+ IntParameter kP = new IntParameter(KMeans.K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(kP)) {
+ k = kP.intValue();
}
ObjectParameter<KMedoidsInitialization<V>> initialP = new ObjectParameter<KMedoidsInitialization<V>>(KMeans.INIT_ID, KMedoidsInitialization.class, PAMInitialMeans.class);
- if(config.grab(initialP)) {
+ if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
- IntParameter maxiterP = new IntParameter(KMeans.MAXITER_ID, new GreaterEqualConstraint(0), 0);
- if(config.grab(maxiterP)) {
- maxiter = maxiterP.getValue();
+ IntParameter maxiterP = new IntParameter(KMeans.MAXITER_ID, 0);
+ maxiterP.addConstraint(new GreaterEqualConstraint(0));
+ if (config.grab(maxiterP)) {
+ maxiter = maxiterP.intValue();
}
}
@@ -268,4 +273,4 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
return new KMedoidsEM<V, D>(distanceFunction, k, maxiter, initializer);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java
index 30c80084..906501e4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java
@@ -83,7 +83,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KMedoidsPAM.class);
+ private static final Logging LOG = Logging.getLogger(KMedoidsPAM.class);
/**
* Holds the value of {@link AbstractKMeans#K_ID}.
@@ -123,7 +123,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
* @return result
*/
public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
- if(relation.size() <= 0) {
+ if (relation.size() <= 0) {
return new Clustering<MedoidModel>("k-Medoids Clustering", "kmedoids-clustering");
}
DistanceQuery<V, D> distQ = database.getDistanceQuery(relation, getDistanceFunction());
@@ -132,7 +132,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, distQ));
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
@@ -143,36 +143,35 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
// Swap phase
boolean changed = true;
- while(changed) {
+ while (changed) {
changed = false;
// Try to swap the medoid with a better cluster member:
double best = 0;
DBID bestid = null;
int bestcluster = -1;
- for(int i = 0; i < k; i++) {
- DBID med = medoids.get(i);
- for(DBIDIter iter = clusters.get(i).iter(); iter.valid(); iter.advance()) {
- if(med.sameDBID(iter)) {
+ int i = 0;
+ for (DBIDIter miter = medoids.iter(); miter.valid(); miter.advance(), i++) {
+ for (DBIDIter iter = clusters.get(i).iter(); iter.valid(); iter.advance()) {
+ if (DBIDUtil.equal(miter, iter)) {
continue;
}
// double disti = distQ.distance(id, med).doubleValue();
double cost = 0;
- for(int j = 0; j < k; j++) {
- for(DBIDIter iter2 = clusters.get(j).iter(); iter2.valid(); iter2.advance()) {
- double distcur = distQ.distance(iter2, medoids.get(j)).doubleValue();
+ DBIDIter olditer = medoids.iter();
+ for (int j = 0; j < k; j++, olditer.advance()) {
+ for (DBIDIter iter2 = clusters.get(j).iter(); iter2.valid(); iter2.advance()) {
+ double distcur = distQ.distance(iter2, olditer).doubleValue();
double distnew = distQ.distance(iter2, iter).doubleValue();
- if(j == i) {
+ if (j == i) {
// Cases 1 and 2.
double distsec = second.doubleValue(iter2);
- if(distcur > distsec) {
+ if (distcur > distsec) {
// Case 1, other would switch to a third medoid
cost += distsec - distcur; // Always positive!
- }
- else { // Would remain with the candidate
+ } else { // Would remain with the candidate
cost += distnew - distcur; // Could be negative
}
- }
- else {
+ } else {
// Cases 3-4: objects from other clusters
if (distcur < distnew) {
// Case 3: no change
@@ -185,20 +184,20 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
}
if (cost < best) {
best = cost;
- bestid = iter.getDBID();
+ bestid = DBIDUtil.deref(iter);
bestcluster = i;
}
}
}
- if(logger.isDebugging()) {
- logger.debug("Best cost: " + best);
+ if (LOG.isDebugging()) {
+ LOG.debug("Best cost: " + best);
}
- if(bestid != null) {
+ if (bestid != null) {
changed = true;
medoids.set(bestcluster, bestid);
}
// Reassign
- if(changed) {
+ if (changed) {
// TODO: can we save some of these recomputations?
assignToNearestCluster(medoids, ids, second, clusters, distQ);
}
@@ -206,7 +205,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
// Wrap result
Clustering<MedoidModel> result = new Clustering<MedoidModel>("k-Medoids Clustering", "kmedoids-clustering");
- for(int i = 0; i < clusters.size(); i++) {
+ for (int i = 0; i < clusters.size(); i++) {
MedoidModel model = new MedoidModel(medoids.get(i));
result.addCluster(new Cluster<MedoidModel>(clusters.get(i), model));
}
@@ -227,28 +226,30 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
protected boolean assignToNearestCluster(ArrayDBIDs means, DBIDs ids, WritableDoubleDataStore second, List<? extends ModifiableDBIDs> clusters, DistanceQuery<V, D> distQ) {
boolean changed = false;
- for(DBIDIter iditer = distQ.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = distQ.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
int minIndex = 0;
double mindist = Double.POSITIVE_INFINITY;
double mindist2 = Double.POSITIVE_INFINITY;
- for(int i = 0; i < k; i++) {
- double dist = distQ.distance(iditer, means.get(i)).doubleValue();
- if(dist < mindist) {
- minIndex = i;
- mindist2 = mindist;
- mindist = dist;
- }
- else if(dist < mindist2) {
- mindist2 = dist;
+ {
+ int i = 0;
+ for (DBIDIter miter = means.iter(); miter.valid(); miter.advance(), i++) {
+ double dist = distQ.distance(iditer, miter).doubleValue();
+ if (dist < mindist) {
+ minIndex = i;
+ mindist2 = mindist;
+ mindist = dist;
+ } else if (dist < mindist2) {
+ mindist2 = dist;
+ }
}
}
- if(clusters.get(minIndex).add(iditer)) {
+ if (clusters.get(minIndex).add(iditer)) {
changed = true;
// Remove from previous cluster
// TODO: keep a list of cluster assignments to save this search?
- for(int i = 0; i < k; i++) {
- if(i != minIndex) {
- if(clusters.get(i).remove(iditer)) {
+ for (int i = 0; i < k; i++) {
+ if (i != minIndex) {
+ if (clusters.get(i).remove(iditer)) {
break;
}
}
@@ -266,7 +267,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -286,19 +287,21 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter kP = new IntParameter(KMeans.K_ID, new GreaterConstraint(0));
- if(config.grab(kP)) {
- k = kP.getValue();
+ IntParameter kP = new IntParameter(KMeans.K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(kP)) {
+ k = kP.intValue();
}
ObjectParameter<KMedoidsInitialization<V>> initialP = new ObjectParameter<KMedoidsInitialization<V>>(KMeans.INIT_ID, KMedoidsInitialization.class, PAMInitialMeans.class);
- if(config.grab(initialP)) {
+ if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
- IntParameter maxiterP = new IntParameter(KMeans.MAXITER_ID, new GreaterEqualConstraint(0), 0);
- if(config.grab(maxiterP)) {
- maxiter = maxiterP.getValue();
+ IntParameter maxiterP = new IntParameter(KMeans.MAXITER_ID, 0);
+ maxiterP.addConstraint(new GreaterEqualConstraint(0));
+ if (config.grab(maxiterP)) {
+ maxiter = maxiterP.intValue();
}
}
@@ -307,4 +310,4 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
return new KMedoidsPAM<V, D>(distanceFunction, k, maxiter, initializer);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java
index 094c37bb..1fc7160e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java
@@ -113,7 +113,7 @@ public class PAMInitialMeans<V, D extends NumberDistance<D, ?>> implements KMean
}
if(mean.getMean() < best) {
best = mean.getMean();
- bestid = iter.getDBID();
+ bestid = DBIDUtil.deref(iter);
if(bestd != null) {
bestd.destroy();
}
@@ -133,23 +133,21 @@ public class PAMInitialMeans<V, D extends NumberDistance<D, ?>> implements KMean
DBID bestid = null;
WritableDoubleDataStore bestd = null;
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
- if(medids.contains(id)) {
+ if(medids.contains(iter)) {
continue;
}
WritableDoubleDataStore newd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
mean.reset();
for(DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
- DBID other = iter2.getDBID();
- double dn = distQ.distance(id, other).doubleValue();
- double v = Math.min(dn, mindist.doubleValue(other));
+ double dn = distQ.distance(iter, iter2).doubleValue();
+ double v = Math.min(dn, mindist.doubleValue(iter2));
mean.put(v);
- newd.put(other, v);
+ newd.put(iter2, v);
}
assert (mean.getCount() == ids.size());
if(mean.getMean() < best) {
best = mean.getMean();
- bestid = id;
+ bestid = DBIDUtil.deref(iter);
if(bestd != null) {
bestd.destroy();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
index 5b9da923..78e59be7 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
@@ -31,6 +31,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
/**
* Initialize K-means by randomly choosing k exsiting elements as cluster
@@ -44,15 +45,15 @@ public class RandomlyChosenInitialMeans<V> extends AbstractKMeansInitialization<
/**
* Constructor.
*
- * @param seed Random seed.
+ * @param rnd Random generator.
*/
- public RandomlyChosenInitialMeans(Long seed) {
- super(seed);
+ public RandomlyChosenInitialMeans(RandomFactory rnd) {
+ super(rnd);
}
@Override
public List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
- DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), k, seed);
+ DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), k, rnd);
List<V> means = new ArrayList<V>(k);
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
means.add(relation.get(iter));
@@ -62,7 +63,7 @@ public class RandomlyChosenInitialMeans<V> extends AbstractKMeansInitialization<
@Override
public DBIDs chooseInitialMedoids(int k, DistanceQuery<? super V, ?> distanceFunction) {
- return DBIDUtil.randomSample(distanceFunction.getRelation().getDBIDs(), k, seed);
+ return DBIDUtil.randomSample(distanceFunction.getRelation().getDBIDs(), k, rnd);
}
/**
@@ -76,7 +77,7 @@ public class RandomlyChosenInitialMeans<V> extends AbstractKMeansInitialization<
@Override
protected RandomlyChosenInitialMeans<V> makeInstance() {
- return new RandomlyChosenInitialMeans<V>(seed);
+ return new RandomlyChosenInitialMeans<V>(rnd);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
index 00ed08c4..300f5cb0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
@@ -28,9 +28,11 @@ import java.util.Random;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
@@ -41,29 +43,30 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
*
* @param <V> Vector type
*/
-public class RandomlyGeneratedInitialMeans<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization<V> {
+public class RandomlyGeneratedInitialMeans<V extends NumberVector<?>> extends AbstractKMeansInitialization<V> {
/**
* Constructor.
*
- * @param seed Random seed.
+ * @param rnd Random generator.
*/
- public RandomlyGeneratedInitialMeans(Long seed) {
- super(seed);
+ public RandomlyGeneratedInitialMeans(RandomFactory rnd) {
+ super(rnd);
}
@Override
public List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
- final int dim = DatabaseUtil.dimensionality(relation);
+ final int dim = RelationUtil.dimensionality(relation);
+ NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
Pair<V, V> minmax = DatabaseUtil.computeMinMax(relation);
List<V> means = new ArrayList<V>(k);
- final Random random = (this.seed != null) ? new Random(this.seed) : new Random();
+ final Random random = rnd.getRandom();
for(int i = 0; i < k; i++) {
double[] r = MathUtil.randomDoubleArray(dim, random);
// Rescale
for(int d = 0; d < dim; d++) {
- r[d] = minmax.first.doubleValue(d + 1) + (minmax.second.doubleValue(d + 1) - minmax.first.doubleValue(d + 1)) * r[d];
+ r[d] = minmax.first.doubleValue(d) + (minmax.second.doubleValue(d) - minmax.first.doubleValue(d)) * r[d];
}
- means.add(minmax.first.newNumberVector(r));
+ means.add(factory.newNumberVector(r));
}
return means;
}
@@ -75,10 +78,10 @@ public class RandomlyGeneratedInitialMeans<V extends NumberVector<V, ?>> extends
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractKMeansInitialization.Parameterizer<V> {
@Override
protected RandomlyGeneratedInitialMeans<V> makeInstance() {
- return new RandomlyGeneratedInitialMeans<V>(seed);
+ return new RandomlyGeneratedInitialMeans<V>(rnd);
}
}
} \ No newline at end of file