summaryrefslogtreecommitdiff
path: root/src/tutorial
diff options
context:
space:
mode:
Diffstat (limited to 'src/tutorial')
-rw-r--r--src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java59
-rw-r--r--src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java26
-rw-r--r--src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java82
-rw-r--r--src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering4.java80
-rw-r--r--src/tutorial/clustering/SameSizeKMeansAlgorithm.java51
-rw-r--r--src/tutorial/clustering/package-info.java2
-rw-r--r--src/tutorial/distancefunction/MultiLPNorm.java12
-rw-r--r--src/tutorial/distancefunction/TutorialDistanceFunction.java14
-rw-r--r--src/tutorial/distancefunction/package-info.java2
-rw-r--r--src/tutorial/outlier/DistanceStddevOutlier.java50
-rw-r--r--src/tutorial/outlier/ODIN.java26
-rw-r--r--src/tutorial/outlier/SimpleScoreDumper.java8
-rw-r--r--src/tutorial/package-info.java2
13 files changed, 206 insertions, 208 deletions
diff --git a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java
index 7477c064..b1406274 100644
--- a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java
+++ b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java
@@ -4,7 +4,7 @@ package tutorial.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -46,7 +46,6 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.result.Result;
@@ -69,7 +68,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*
* @param <O> Object type
*/
-public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, Result> {
+public class NaiveAgglomerativeHierarchicalClustering1<O> extends AbstractDistanceBasedAlgorithm<O, Result> {
/**
* Class logger
*/
@@ -86,7 +85,7 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan
* @param distanceFunction Distance function to use
* @param numclusters Number of clusters
*/
- public NaiveAgglomerativeHierarchicalClustering1(DistanceFunction<? super O, D> distanceFunction, int numclusters) {
+ public NaiveAgglomerativeHierarchicalClustering1(DistanceFunction<? super O> distanceFunction, int numclusters) {
super(distanceFunction);
this.numclusters = numclusters;
}
@@ -99,7 +98,7 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan
* @return Clustering hierarchy
*/
public Result run(Database db, Relation<O> relation) {
- DistanceQuery<O, D> dq = db.getDistanceQuery(relation, getDistanceFunction());
+ DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
@@ -108,10 +107,10 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan
// Compute the initial distance matrix.
double[][] matrix = new double[size][size];
DBIDArrayIter ix = ids.iter(), iy = ids.iter();
- for (int x = 0; ix.valid(); x++, ix.advance()) {
+ for(int x = 0; ix.valid(); x++, ix.advance()) {
iy.seek(0);
- for (int y = 0; y < x; y++, iy.advance()) {
- final double dist = dq.distance(ix, iy).doubleValue();
+ for(int y = 0; y < x; y++, iy.advance()) {
+ final double dist = dq.distance(ix, iy);
matrix[x][y] = dist;
matrix[y][x] = dist;
}
@@ -129,18 +128,18 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan
// Repeat until everything merged, except the desired number of clusters:
final int stop = size - numclusters;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", stop, LOG) : null;
- for (int i = 0; i < stop; i++) {
+ for(int i = 0; i < stop; i++) {
double min = Double.POSITIVE_INFINITY;
int minx = -1, miny = -1;
- for (int x = 0; x < size; x++) {
- if (height[x] < Double.POSITIVE_INFINITY) {
+ for(int x = 0; x < size; x++) {
+ if(height[x] < Double.POSITIVE_INFINITY) {
continue;
}
- for (int y = 0; y < x; y++) {
- if (height[y] < Double.POSITIVE_INFINITY) {
+ for(int y = 0; y < x; y++) {
+ if(height[y] < Double.POSITIVE_INFINITY) {
continue;
}
- if (matrix[x][y] < min) {
+ if(matrix[x][y] < min) {
min = matrix[x][y];
minx = x;
miny = y;
@@ -158,36 +157,33 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan
// Merge into cluster
ModifiableDBIDs cx = clusters.get(minx);
ModifiableDBIDs cy = clusters.get(miny);
- if (cy == null) {
+ if(cy == null) {
cy = DBIDUtil.newHashSet();
cy.add(iy);
}
- if (cx == null) {
+ if(cx == null) {
cy.add(ix);
- } else {
+ }
+ else {
cy.addDBIDs(cx);
clusters.remove(minx);
}
clusters.put(miny, cy);
// Update distance matrix for y:
- for (int j = 0; j < size; j++) {
+ for(int j = 0; j < size; j++) {
matrix[j][miny] = Math.min(matrix[j][minx], matrix[j][miny]);
matrix[miny][j] = Math.min(matrix[minx][j], matrix[miny][j]);
}
- if (prog != null) {
- prog.incrementProcessed(LOG);
- }
- }
- if (prog != null) {
- prog.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
+ LOG.ensureCompleted(prog);
// Build the clustering result
final Clustering<Model> dendrogram = new Clustering<>("Hierarchical-Clustering", "hierarchical-clustering");
- for (int x = 0; x < size; x++) {
- if (height[x] < Double.POSITIVE_INFINITY) {
+ for(int x = 0; x < size; x++) {
+ if(height[x] < Double.POSITIVE_INFINITY) {
DBIDs cids = clusters.get(x);
- if (cids == null) {
+ if(cids == null) {
ix.seek(x);
cids = DBIDUtil.deref(ix);
}
@@ -214,10 +210,11 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan
*
* @author Erich Schubert
*
+ * @apiviz.exclude
+ *
* @param <O> Object type
- * @param <D> Distance type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Desired number of clusters.
*/
@@ -228,13 +225,13 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan
super.makeOptions(config);
IntParameter numclustersP = new IntParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID);
numclustersP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
- if (config.grab(numclustersP)) {
+ if(config.grab(numclustersP)) {
numclusters = numclustersP.intValue();
}
}
@Override
- protected NaiveAgglomerativeHierarchicalClustering1<O, D> makeInstance() {
+ protected NaiveAgglomerativeHierarchicalClustering1<O> makeInstance() {
return new NaiveAgglomerativeHierarchicalClustering1<>(distanceFunction, numclusters);
}
}
diff --git a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java
index d33ca768..0081a845 100644
--- a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java
+++ b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java
@@ -4,7 +4,7 @@ package tutorial.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -46,7 +46,6 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.result.Result;
@@ -69,7 +68,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*
* @param <O> Object type
*/
-public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, Result> {
+public class NaiveAgglomerativeHierarchicalClustering2<O> extends AbstractDistanceBasedAlgorithm<O, Result> {
/**
* Class logger
*/
@@ -86,7 +85,7 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan
* @param distanceFunction Distance function to use
* @param numclusters Number of clusters
*/
- public NaiveAgglomerativeHierarchicalClustering2(DistanceFunction<? super O, D> distanceFunction, int numclusters) {
+ public NaiveAgglomerativeHierarchicalClustering2(DistanceFunction<? super O> distanceFunction, int numclusters) {
super(distanceFunction);
this.numclusters = numclusters;
}
@@ -99,7 +98,7 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan
* @return Clustering hierarchy
*/
public Result run(Database db, Relation<O> relation) {
- DistanceQuery<O, D> dq = db.getDistanceQuery(relation, getDistanceFunction());
+ DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
@@ -116,7 +115,7 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan
for (int x = 0; ix.valid(); x++, ix.advance()) {
iy.seek(0);
for (int y = 0; y < x; y++, iy.advance()) {
- scratch[pos] = dq.distance(ix, iy).doubleValue();
+ scratch[pos] = dq.distance(ix, iy);
pos++;
}
}
@@ -200,13 +199,9 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan
final int jbase = triangleSize(j);
scratch[jbase + miny] = Math.min(scratch[jbase + minx], scratch[jbase + miny]);
}
- if (prog != null) {
- prog.incrementProcessed(LOG);
- }
- }
- if (prog != null) {
- prog.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
+ LOG.ensureCompleted(prog);
// Build the clustering result
final Clustering<Model> dendrogram = new Clustering<>("Hierarchical-Clustering", "hierarchical-clustering");
@@ -251,10 +246,11 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan
*
* @author Erich Schubert
*
+ * @apiviz.exclude
+ *
* @param <O> Object type
- * @param <D> Distance type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Desired number of clusters.
*/
@@ -271,7 +267,7 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan
}
@Override
- protected NaiveAgglomerativeHierarchicalClustering2<O, D> makeInstance() {
+ protected NaiveAgglomerativeHierarchicalClustering2<O> makeInstance() {
return new NaiveAgglomerativeHierarchicalClustering2<>(distanceFunction, numclusters);
}
}
diff --git a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java
index d3312115..0b1590bd 100644
--- a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java
+++ b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java
@@ -4,7 +4,7 @@ package tutorial.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -47,7 +47,6 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.result.Result;
@@ -81,7 +80,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* @param <O> Object type
*/
@Reference(title = "A Review of Classification", authors = "R. M. Cormack", booktitle = "Journal of the Royal Statistical Society. Series A, Vol. 134, No. 3", url = "http://www.jstor.org/stable/2344237")
-public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, Result> {
+public class NaiveAgglomerativeHierarchicalClustering3<O> extends AbstractDistanceBasedAlgorithm<O, Result> {
/**
* Class logger
*/
@@ -94,6 +93,8 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
* R. M. Cormack, A Review of Classification
*
* @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
public static enum Linkage {//
SINGLE {
@@ -168,7 +169,7 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
* @param numclusters Number of clusters
* @param linkage Linkage strategy
*/
- public NaiveAgglomerativeHierarchicalClustering3(DistanceFunction<? super O, D> distanceFunction, int numclusters, Linkage linkage) {
+ public NaiveAgglomerativeHierarchicalClustering3(DistanceFunction<? super O> distanceFunction, int numclusters, Linkage linkage) {
super(distanceFunction);
this.numclusters = numclusters;
this.linkage = linkage;
@@ -182,14 +183,14 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
* @return Clustering hierarchy
*/
public Result run(Database db, Relation<O> relation) {
- DistanceQuery<O, D> dq = db.getDistanceQuery(relation, getDistanceFunction());
+ DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
- if (size > 0x10000) {
+ if(size > 0x10000) {
throw new AbortException("This implementation does not scale to data sets larger than " + 0x10000 + " instances (~17 GB RAM), which results in an integer overflow.");
}
- if (Linkage.SINGLE.equals(linkage)) {
+ if(Linkage.SINGLE.equals(linkage)) {
LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
}
@@ -199,12 +200,12 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
// Position counter - must agree with computeOffset!
int pos = 0;
boolean square = Linkage.WARD.equals(linkage) && !(SquaredEuclideanDistanceFunction.class.isInstance(getDistanceFunction()));
- for (int x = 0; ix.valid(); x++, ix.advance()) {
+ for(int x = 0; ix.valid(); x++, ix.advance()) {
iy.seek(0);
- for (int y = 0; y < x; y++, iy.advance()) {
- scratch[pos] = dq.distance(ix, iy).doubleValue();
+ for(int y = 0; y < x; y++, iy.advance()) {
+ scratch[pos] = dq.distance(ix, iy);
// Ward uses variances -- i.e. squared values
- if (square) {
+ if(square) {
scratch[pos] *= scratch[pos];
}
pos++;
@@ -223,20 +224,20 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
// Repeat until everything merged, except the desired number of clusters:
final int stop = size - numclusters;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", stop, LOG) : null;
- for (int i = 0; i < stop; i++) {
+ for(int i = 0; i < stop; i++) {
double min = Double.POSITIVE_INFINITY;
int minx = -1, miny = -1;
- for (int x = 0; x < size; x++) {
- if (height[x] < Double.POSITIVE_INFINITY) {
+ for(int x = 0; x < size; x++) {
+ if(height[x] < Double.POSITIVE_INFINITY) {
continue;
}
final int xbase = triangleSize(x);
- for (int y = 0; y < x; y++) {
- if (height[y] < Double.POSITIVE_INFINITY) {
+ for(int y = 0; y < x; y++) {
+ if(height[y] < Double.POSITIVE_INFINITY) {
continue;
}
final int idx = xbase + y;
- if (scratch[idx] < min) {
+ if(scratch[idx] < min) {
min = scratch[idx];
minx = x;
miny = y;
@@ -255,15 +256,17 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
ModifiableDBIDs cx = clusters.get(minx);
ModifiableDBIDs cy = clusters.get(miny);
int sizex = 1, sizey = 1; // cluster sizes, for averaging
- if (cy == null) {
+ if(cy == null) {
cy = DBIDUtil.newHashSet();
cy.add(iy);
- } else {
+ }
+ else {
sizey = cy.size();
}
- if (cx == null) {
+ if(cx == null) {
cy.add(ix);
- } else {
+ }
+ else {
sizex = cx.size();
cy.addDBIDs(cx);
clusters.remove(minx);
@@ -276,8 +279,8 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
// hashmap lookup.
final int xbase = triangleSize(minx), ybase = triangleSize(miny);
// Write to (y, j), with j < y
- for (int j = 0; j < miny; j++) {
- if (height[j] < Double.POSITIVE_INFINITY) {
+ for(int j = 0; j < miny; j++) {
+ if(height[j] < Double.POSITIVE_INFINITY) {
continue;
}
final DBIDs idsj = clusters.get(j);
@@ -285,8 +288,8 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
scratch[ybase + j] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[ybase + j], sizej, min);
}
// Write to (j, y), with y < j < x
- for (int j = miny + 1; j < minx; j++) {
- if (height[j] < Double.POSITIVE_INFINITY) {
+ for(int j = miny + 1; j < minx; j++) {
+ if(height[j] < Double.POSITIVE_INFINITY) {
continue;
}
final int jbase = triangleSize(j);
@@ -295,8 +298,8 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
scratch[jbase + miny] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[jbase + miny], sizej, min);
}
// Write to (j, y), with y < x < j
- for (int j = minx + 1; j < size; j++) {
- if (height[j] < Double.POSITIVE_INFINITY) {
+ for(int j = minx + 1; j < size; j++) {
+ if(height[j] < Double.POSITIVE_INFINITY) {
continue;
}
final DBIDs idsj = clusters.get(j);
@@ -304,20 +307,16 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
final int jbase = triangleSize(j);
scratch[jbase + miny] = linkage.combine(sizex, scratch[jbase + minx], sizey, scratch[jbase + miny], sizej, min);
}
- if (prog != null) {
- prog.incrementProcessed(LOG);
- }
- }
- if (prog != null) {
- prog.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
+ LOG.ensureCompleted(prog);
// Build the clustering result
final Clustering<Model> dendrogram = new Clustering<>("Hierarchical-Clustering", "hierarchical-clustering");
- for (int x = 0; x < size; x++) {
- if (height[x] < Double.POSITIVE_INFINITY) {
+ for(int x = 0; x < size; x++) {
+ if(height[x] < Double.POSITIVE_INFINITY) {
DBIDs cids = clusters.get(x);
- if (cids == null) {
+ if(cids == null) {
ix.seek(x);
cids = DBIDUtil.deref(ix);
}
@@ -355,10 +354,11 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
*
* @author Erich Schubert
*
+ * @apiviz.exclude
+ *
* @param <O> Object type
- * @param <D> Distance type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Option ID for linkage parameter.
*/
@@ -379,19 +379,19 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
super.makeOptions(config);
IntParameter numclustersP = new IntParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID);
numclustersP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
- if (config.grab(numclustersP)) {
+ if(config.grab(numclustersP)) {
numclusters = numclustersP.intValue();
}
EnumParameter<Linkage> linkageP = new EnumParameter<>(LINKAGE_ID, Linkage.class);
linkageP.setDefaultValue(Linkage.WARD);
- if (config.grab(linkageP)) {
+ if(config.grab(linkageP)) {
linkage = linkageP.getValue();
}
}
@Override
- protected NaiveAgglomerativeHierarchicalClustering3<O, D> makeInstance() {
+ protected NaiveAgglomerativeHierarchicalClustering3<O> makeInstance() {
return new NaiveAgglomerativeHierarchicalClustering3<>(distanceFunction, numclusters, linkage);
}
}
diff --git a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering4.java b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering4.java
index 23678ccc..997b657a 100644
--- a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering4.java
+++ b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering4.java
@@ -4,7 +4,7 @@ package tutorial.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -33,7 +33,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDistanceDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
@@ -43,8 +43,6 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -75,7 +73,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
* @param <O> Object type
*/
@Reference(title = "A Review of Classification", authors = "R. M. Cormack", booktitle = "Journal of the Royal Statistical Society. Series A, Vol. 134, No. 3", url = "http://www.jstor.org/stable/2344237")
-public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, PointerHierarchyRepresentationResult<DoubleDistance>> implements HierarchicalClusteringAlgorithm<DoubleDistance> {
+public class NaiveAgglomerativeHierarchicalClustering4<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchyRepresentationResult> implements HierarchicalClusteringAlgorithm {
/**
* Class logger
*/
@@ -88,6 +86,8 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
* R. M. Cormack, A Review of Classification
*
* @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
public static enum Linkage {//
SINGLE {
@@ -156,7 +156,7 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
* @param distanceFunction Distance function to use
* @param linkage Linkage strategy
*/
- public NaiveAgglomerativeHierarchicalClustering4(DistanceFunction<? super O, D> distanceFunction, Linkage linkage) {
+ public NaiveAgglomerativeHierarchicalClustering4(DistanceFunction<? super O> distanceFunction, Linkage linkage) {
super(distanceFunction);
this.linkage = linkage;
}
@@ -168,15 +168,15 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
* @param relation Relation
* @return Clustering hierarchy
*/
- public PointerHierarchyRepresentationResult<DoubleDistance> run(Database db, Relation<O> relation) {
- DistanceQuery<O, D> dq = db.getDistanceQuery(relation, getDistanceFunction());
+ public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
+ DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
- if (size > 0x10000) {
+ if(size > 0x10000) {
throw new AbortException("This implementation does not scale to data sets larger than " + 0x10000 + " instances (~17 GB RAM), which results in an integer overflow.");
}
- if (Linkage.SINGLE.equals(linkage)) {
+ if(Linkage.SINGLE.equals(linkage)) {
LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
}
@@ -186,12 +186,12 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
// Position counter - must agree with computeOffset!
int pos = 0;
boolean square = Linkage.WARD.equals(linkage) && !(SquaredEuclideanDistanceFunction.class.isInstance(getDistanceFunction()));
- for (int x = 0; ix.valid(); x++, ix.advance()) {
+ for(int x = 0; ix.valid(); x++, ix.advance()) {
iy.seek(0);
- for (int y = 0; y < x; y++, iy.advance()) {
- scratch[pos] = dq.distance(ix, iy).doubleValue();
+ for(int y = 0; y < x; y++, iy.advance()) {
+ scratch[pos] = dq.distance(ix, iy);
// Ward uses variances -- i.e. squared values
- if (square) {
+ if(square) {
scratch[pos] *= scratch[pos];
}
pos++;
@@ -200,9 +200,9 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
// Initialize space for result:
WritableDBIDDataStore parent = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
- WritableDoubleDistanceDataStore height = DataStoreUtil.makeDoubleDistanceStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
+ WritableDoubleDataStore height = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableIntegerDataStore csize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
- for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
parent.put(it, it);
height.put(it, Double.POSITIVE_INFINITY);
csize.put(it, 1);
@@ -210,20 +210,20 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
// Repeat until everything merged, except the desired number of clusters:
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
- for (int i = 1; i < size; i++) {
+ for(int i = 1; i < size; i++) {
double min = Double.POSITIVE_INFINITY;
int minx = -1, miny = -1;
- for (ix.seek(0); ix.valid(); ix.advance()) {
- if (height.doubleValue(ix) < Double.POSITIVE_INFINITY) {
+ for(ix.seek(0); ix.valid(); ix.advance()) {
+ if(height.doubleValue(ix) < Double.POSITIVE_INFINITY) {
continue;
}
final int xbase = triangleSize(ix.getOffset());
- for (iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
- if (height.doubleValue(iy) < Double.POSITIVE_INFINITY) {
+ for(iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
+ if(height.doubleValue(iy) < Double.POSITIVE_INFINITY) {
continue;
}
final int idx = xbase + iy.getOffset();
- if (scratch[idx] <= min) {
+ if(scratch[idx] <= min) {
min = scratch[idx];
minx = ix.getOffset();
miny = iy.getOffset();
@@ -244,16 +244,16 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
// Update distance matrix. Note: miny < minx
final int xbase = triangleSize(minx), ybase = triangleSize(miny);
// Write to (y, j), with j < y
- for (ij.seek(0); ij.getOffset() < miny; ij.advance()) {
- if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
+ for(ij.seek(0); ij.getOffset() < miny; ij.advance()) {
+ if(height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
continue;
}
final int sizej = csize.intValue(ij);
scratch[ybase + ij.getOffset()] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[ybase + ij.getOffset()], sizej, min);
}
// Write to (j, y), with y < j < x
- for (ij.seek(miny + 1); ij.getOffset() < minx; ij.advance()) {
- if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
+ for(ij.seek(miny + 1); ij.getOffset() < minx; ij.advance()) {
+ if(height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
continue;
}
final int jbase = triangleSize(ij.getOffset());
@@ -261,23 +261,19 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
scratch[jbase + miny] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[jbase + miny], sizej, min);
}
// Write to (j, y), with y < x < j
- for (ij.seek(minx + 1); ij.valid(); ij.advance()) {
- if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
+ for(ij.seek(minx + 1); ij.valid(); ij.advance()) {
+ if(height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
continue;
}
final int jbase = triangleSize(ij.getOffset());
final int sizej = csize.intValue(ij);
scratch[jbase + miny] = linkage.combine(sizex, scratch[jbase + minx], sizey, scratch[jbase + miny], sizej, min);
}
- if (prog != null) {
- prog.incrementProcessed(LOG);
- }
- }
- if (prog != null) {
- prog.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
+ LOG.ensureCompleted(prog);
- return new PointerHierarchyRepresentationResult<>(ids, parent, height);
+ return new PointerHierarchyRepresentationResult(ids, parent, height);
}
/**
@@ -301,20 +297,16 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
return LOG;
}
- @Override
- public DoubleDistance getDistanceFactory() {
- return DoubleDistance.FACTORY;
- }
-
/**
* Parameterization class
*
* @author Erich Schubert
*
+ * @apiviz.exclude
+ *
* @param <O> Object type
- * @param <D> Distance type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Option ID for linkage parameter.
*/
@@ -330,13 +322,13 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
super.makeOptions(config);
EnumParameter<Linkage> linkageP = new EnumParameter<>(LINKAGE_ID, Linkage.class);
linkageP.setDefaultValue(Linkage.WARD);
- if (config.grab(linkageP)) {
+ if(config.grab(linkageP)) {
linkage = linkageP.getValue();
}
}
@Override
- protected NaiveAgglomerativeHierarchicalClustering4<O, D> makeInstance() {
+ protected NaiveAgglomerativeHierarchicalClustering4<O> makeInstance() {
return new NaiveAgglomerativeHierarchicalClustering4<>(distanceFunction, linkage);
}
}
diff --git a/src/tutorial/clustering/SameSizeKMeansAlgorithm.java b/src/tutorial/clustering/SameSizeKMeansAlgorithm.java
index 91da44c5..ad57885e 100644
--- a/src/tutorial/clustering/SameSizeKMeansAlgorithm.java
+++ b/src/tutorial/clustering/SameSizeKMeansAlgorithm.java
@@ -4,7 +4,7 @@ package tutorial.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -29,8 +29,8 @@ import java.util.Comparator;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.AbstractKMeans;
-import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansInitialization;
-import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansPlusPlusInitialMeans;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.KMeansInitialization;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.KMeansPlusPlusInitialMeans;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -48,12 +48,11 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerArrayQuickSort;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -78,7 +77,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @param <V> Vector type
*/
-public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends AbstractKMeans<V, DoubleDistance, MeanModel<V>> {
+public class SameSizeKMeansAlgorithm<V extends NumberVector> extends AbstractKMeans<V, MeanModel> {
/**
* Class logger
*/
@@ -92,7 +91,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
* @param maxiter Maximum number of iterations
* @param initializer
*/
- public SameSizeKMeansAlgorithm(PrimitiveDoubleDistanceFunction<? super NumberVector<?>> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+ public SameSizeKMeansAlgorithm(PrimitiveDistanceFunction<? super NumberVector> distanceFunction, int k, int maxiter, KMeansInitialization<? super V> initializer) {
super(distanceFunction, k, maxiter, initializer);
}
@@ -104,11 +103,11 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
* @return result
*/
@Override
- public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) {
+ public Clustering<MeanModel> run(Database database, Relation<V> relation) {
// Database objects to process
final DBIDs ids = relation.getDBIDs();
// Choose initial means
- List<? extends NumberVector<?>> means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
+ List<Vector> means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction(), Vector.FACTORY);
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for(int i = 0; i < k; i++) {
@@ -125,11 +124,10 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
means = refineResult(relation, means, clusters, metas, tids);
// Wrap result
- Clustering<MeanModel<V>> result = new Clustering<>("k-Means Samesize Clustering", "kmeans-samesize-clustering");
- final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
+ Clustering<MeanModel> result = new Clustering<>("k-Means Samesize Clustering", "kmeans-samesize-clustering");
for(int i = 0; i < clusters.size(); i++) {
- V mean = factory.newNumberVector(means.get(i).getColumnVector().getArrayRef());
- result.addToplevelCluster(new Cluster<>(clusters.get(i), new MeanModel<>(mean)));
+ Vector mean = means.get(i).getColumnVector();
+ result.addToplevelCluster(new Cluster<>(clusters.get(i), new MeanModel(mean)));
}
return result;
}
@@ -141,10 +139,8 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
* @param means Mean vectors
* @return Initialized storage
*/
- protected WritableDataStore<Meta> initializeMeta(Relation<V> relation, List<? extends NumberVector<?>> means) {
- // This is a safe cast - see constructor.
- @SuppressWarnings("unchecked")
- PrimitiveDoubleDistanceFunction<NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<NumberVector<?>>) getDistanceFunction();
+ protected WritableDataStore<Meta> initializeMeta(Relation<V> relation, List<? extends NumberVector> means) {
+ PrimitiveDistanceFunction<? super NumberVector> df = getDistanceFunction();
// The actual storage
final WritableDataStore<Meta> metas = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Meta.class);
// Build the metadata, track the two nearest cluster centers.
@@ -152,7 +148,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
Meta c = new Meta(k);
V fv = relation.get(id);
for(int i = 0; i < k; i++) {
- c.dists[i] = df.doubleDistance(fv, means.get(i));
+ c.dists[i] = df.distance(fv, means.get(i));
if(i > 0) {
if(c.dists[i] < c.dists[c.primary]) {
c.primary = i;
@@ -231,14 +227,14 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
* @param metas Metadata storage
* @param df Distance function
*/
- protected void updateDistances(Relation<V> relation, List<? extends NumberVector<?>> means, final WritableDataStore<Meta> metas, PrimitiveDoubleDistanceFunction<NumberVector<?>> df) {
+ protected void updateDistances(Relation<V> relation, List<Vector> means, final WritableDataStore<Meta> metas, PrimitiveDistanceFunction<? super NumberVector> df) {
for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
Meta c = metas.get(id);
V fv = relation.get(id);
// Update distances to means.
c.secondary = -1;
for(int i = 0; i < k; i++) {
- c.dists[i] = df.doubleDistance(fv, means.get(i));
+ c.dists[i] = df.distance(fv, means.get(i));
if(c.primary != i) {
if(c.secondary < 0 || c.dists[i] < c.dists[c.secondary]) {
c.secondary = i;
@@ -259,10 +255,9 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
* @param tids DBIDs array
* @return final means
*/
- protected List<? extends NumberVector<?>> refineResult(Relation<V> relation, List<? extends NumberVector<?>> means, List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, ArrayModifiableDBIDs tids) {
+ protected List<Vector> refineResult(Relation<V> relation, List<Vector> means, List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, ArrayModifiableDBIDs tids) {
// This is a safe cast - see constructor.
- @SuppressWarnings("unchecked")
- PrimitiveDoubleDistanceFunction<NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<NumberVector<?>>) getDistanceFunction();
+ PrimitiveDistanceFunction<? super NumberVector> df = getDistanceFunction();
// Our desired cluster size:
final int minsize = tids.size() / k; // rounded down
final int maxsize = (tids.size() + k - 1) / k; // rounded up
@@ -289,7 +284,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
transfers[i] = DBIDUtil.newArray();
}
- for(int iter = 0; maxiter < 0 || iter < maxiter; iter++) {
+ for(int iter = 0; maxiter <= 0 || iter < maxiter; iter++) {
updateDistances(relation, means, metas, df);
tids.sort(comp);
int active = 0; // Track if anything has changed
@@ -461,7 +456,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
/**
* k Parameter.
*/
@@ -480,12 +475,12 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
/**
* Distance function
*/
- protected PrimitiveDoubleDistanceFunction<? super NumberVector<?>> distanceFunction;
+ protected PrimitiveDistanceFunction<? super NumberVector> distanceFunction;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<PrimitiveDoubleDistanceFunction<? super NumberVector<?>>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDoubleDistanceFunction.class);
+ ObjectParameter<PrimitiveDistanceFunction<? super NumberVector>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDistanceFunction.class);
if(config.grab(distanceFunctionP)) {
distanceFunction = distanceFunctionP.instantiateClass(config);
if(!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
diff --git a/src/tutorial/clustering/package-info.java b/src/tutorial/clustering/package-info.java
index ac7ab950..e2971ef8 100644
--- a/src/tutorial/clustering/package-info.java
+++ b/src/tutorial/clustering/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/tutorial/distancefunction/MultiLPNorm.java b/src/tutorial/distancefunction/MultiLPNorm.java
index b3a96ecb..823e00b1 100644
--- a/src/tutorial/distancefunction/MultiLPNorm.java
+++ b/src/tutorial/distancefunction/MultiLPNorm.java
@@ -3,7 +3,7 @@ package tutorial.distancefunction;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractNumberVectorDistanceFunction;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -14,7 +14,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleListParamet
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -42,7 +42,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleListParamet
*
* @author Erich Schubert
*/
-public class MultiLPNorm extends AbstractVectorDoubleDistanceFunction {
+public class MultiLPNorm extends AbstractNumberVectorDistanceFunction {
/**
* The exponents
*/
@@ -72,7 +72,7 @@ public class MultiLPNorm extends AbstractVectorDoubleDistanceFunction {
}
@Override
- public double doubleDistance(NumberVector<?> o1, NumberVector<?> o2) {
+ public double distance(NumberVector o1, NumberVector o2) {
assert o1.getDimensionality() == ps.length : "Inappropriate dimensionality!";
assert o2.getDimensionality() == ps.length : "Inappropriate dimensionality!";
@@ -87,8 +87,8 @@ public class MultiLPNorm extends AbstractVectorDoubleDistanceFunction {
}
@Override
- public SimpleTypeInformation<? super NumberVector<?>> getInputTypeRestriction() {
- return new VectorFieldTypeInformation<>(NumberVector.class, ps.length);
+ public SimpleTypeInformation<? super NumberVector> getInputTypeRestriction() {
+ return VectorFieldTypeInformation.typeRequest(NumberVector.class, ps.length, ps.length);
}
/**
diff --git a/src/tutorial/distancefunction/TutorialDistanceFunction.java b/src/tutorial/distancefunction/TutorialDistanceFunction.java
index 5e2fbcb6..825a3a0d 100644
--- a/src/tutorial/distancefunction/TutorialDistanceFunction.java
+++ b/src/tutorial/distancefunction/TutorialDistanceFunction.java
@@ -4,7 +4,7 @@ package tutorial.distancefunction;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,8 +25,8 @@ package tutorial.distancefunction;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractNumberVectorDistanceFunction;
/**
* Tutorial example for ELKI.
@@ -37,16 +37,16 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanc
*
* @author Erich Schubert
*/
-public class TutorialDistanceFunction extends AbstractVectorDoubleDistanceFunction {
+public class TutorialDistanceFunction extends AbstractNumberVectorDistanceFunction {
@Override
- public double doubleDistance(NumberVector<?> o1, NumberVector<?> o2) {
+ public double distance(NumberVector o1, NumberVector o2) {
double dx = (o1.doubleValue(0) - o2.doubleValue(0));
double dy = (o1.doubleValue(1) - o2.doubleValue(1));
return dx * dx + Math.abs(dy);
}
@Override
- public SimpleTypeInformation<? super NumberVector<?>> getInputTypeRestriction() {
- return new VectorFieldTypeInformation<>(NumberVector.class, 2);
+ public SimpleTypeInformation<? super NumberVector> getInputTypeRestriction() {
+ return TypeUtil.NUMBER_VECTOR_FIELD_2D;
}
}
diff --git a/src/tutorial/distancefunction/package-info.java b/src/tutorial/distancefunction/package-info.java
index ba64b6bd..c9fb987d 100644
--- a/src/tutorial/distancefunction/package-info.java
+++ b/src/tutorial/distancefunction/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/tutorial/outlier/DistanceStddevOutlier.java b/src/tutorial/outlier/DistanceStddevOutlier.java
index 61859f88..571e1906 100644
--- a/src/tutorial/outlier/DistanceStddevOutlier.java
+++ b/src/tutorial/outlier/DistanceStddevOutlier.java
@@ -1,4 +1,26 @@
package tutorial.outlier;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
@@ -11,13 +33,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
@@ -36,9 +58,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* @author Erich Schubert
*
* @param <O> Object type
- * @param <D> Distance type
*/
-public class DistanceStddevOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+public class DistanceStddevOutlier<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* Class logger
*/
@@ -55,7 +76,7 @@ public class DistanceStddevOutlier<O, D extends NumberDistance<D, ?>> extends Ab
* @param distanceFunction Distance function to use
* @param k Number of neighbors to use
*/
- public DistanceStddevOutlier(DistanceFunction<? super O, D> distanceFunction, int k) {
+ public DistanceStddevOutlier(DistanceFunction<? super O> distanceFunction, int k) {
super(distanceFunction);
this.k = k;
}
@@ -69,7 +90,7 @@ public class DistanceStddevOutlier<O, D extends NumberDistance<D, ?>> extends Ab
*/
public OutlierResult run(Database database, Relation<O> relation) {
// Get a nearest neighbor query on the relation.
- KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k);
+ KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k);
// Output data storage
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
// Track minimum and maximum scores
@@ -77,15 +98,15 @@ public class DistanceStddevOutlier<O, D extends NumberDistance<D, ?>> extends Ab
// Iterate over all objects
for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
- KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
+ KNNList neighbors = knnq.getKNNForDBID(iter, k);
// Aggregate distances
MeanVariance mv = new MeanVariance();
- for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// Skip the object itself. The 0 is not very informative.
if(DBIDUtil.equal(iter, neighbor)) {
continue;
}
- mv.put(neighbor.getDistance().doubleValue());
+ mv.put(neighbor.doubleValue());
}
// Store score
scores.putDouble(iter, mv.getSampleStddev());
@@ -94,7 +115,7 @@ public class DistanceStddevOutlier<O, D extends NumberDistance<D, ?>> extends Ab
// Wrap the result in the standard containers
// Actual min-max, theoretical min-max!
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
- Relation<Double> rel = new MaterializedRelation<>(database, TypeUtil.DOUBLE, relation.getDBIDs(), "stddev-outlier", scores);
+ DoubleRelation rel = new MaterializedDoubleRelation(database, relation.getDBIDs(), "stddev-outlier", scores);
return new OutlierResult(meta, rel);
}
@@ -116,9 +137,8 @@ public class DistanceStddevOutlier<O, D extends NumberDistance<D, ?>> extends Ab
* @apiviz.exclude
*
* @param <O> Object type
- * @param <D> Distance type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Option ID for parameterization.
*/
@@ -141,7 +161,7 @@ public class DistanceStddevOutlier<O, D extends NumberDistance<D, ?>> extends Ab
}
@Override
- protected DistanceStddevOutlier<O, D> makeInstance() {
+ protected DistanceStddevOutlier<O> makeInstance() {
return new DistanceStddevOutlier<>(distanceFunction, k);
}
}
diff --git a/src/tutorial/outlier/ODIN.java b/src/tutorial/outlier/ODIN.java
index 04f8ee90..b97be33b 100644
--- a/src/tutorial/outlier/ODIN.java
+++ b/src/tutorial/outlier/ODIN.java
@@ -4,7 +4,7 @@ package tutorial.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -33,13 +33,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
@@ -66,10 +66,9 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* @author Erich Schubert
*
* @param <O> Object type
- * @param <D> Distance type
*/
@Reference(authors = "V. Hautamäki and I. Kärkkäinen and P Fränti", title = "Outlier detection using k-nearest neighbour graph", booktitle = "Proc. 17th Int. Conf. Pattern Recognition, ICPR 2004", url = "http://dx.doi.org/10.1109/ICPR.2004.1334558")
-public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+public class ODIN<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* Class logger.
*/
@@ -86,7 +85,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit
* @param distanceFunction Distance function
* @param k k parameter
*/
- public ODIN(DistanceFunction<? super O, D> distanceFunction, int k) {
+ public ODIN(DistanceFunction<? super O> distanceFunction, int k) {
super(distanceFunction);
this.k = k;
}
@@ -105,8 +104,8 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit
*/
public OutlierResult run(Database database, Relation<O> relation) {
// Get the query functions:
- DistanceQuery<O, D> dq = database.getDistanceQuery(relation, getDistanceFunction());
- KNNQuery<O, D> knnq = database.getKNNQuery(dq, k);
+ DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnq = database.getKNNQuery(dq, k);
// Get the objects to process, and a data storage for counting and output:
DBIDs ids = relation.getDBIDs();
@@ -115,7 +114,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit
// Process all objects
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
// Find the nearest neighbors (using an index, if available!)
- KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
+ KNNList neighbors = knnq.getKNNForDBID(iter, k);
// For each neighbor, except ourselves, increase the in-degree:
for (DBIDIter nei = neighbors.iter(); nei.valid(); nei.advance()) {
if (DBIDUtil.equal(iter, nei)) {
@@ -135,7 +134,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit
// By actually specifying theoretical min, max and baseline, we get a better
// visualization (try it out - or see the screenshots in the tutorial)!
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(min, max, 0., ids.size() - 1, k);
- Relation<Double> rel = new MaterializedRelation<>("ODIN In-Degree", "odin", TypeUtil.DOUBLE, scores, ids);
+ DoubleRelation rel = new MaterializedDoubleRelation("ODIN In-Degree", "odin", scores, ids);
return new OutlierResult(meta, rel);
}
@@ -157,9 +156,8 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit
* @apiviz.exclude
*
* @param <O> Object type
- * @param <D> Distance type
*/
- public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Parameter for the number of nearest neighbors:
*
@@ -189,7 +187,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit
}
@Override
- protected ODIN<O, D> makeInstance() {
+ protected ODIN<O> makeInstance() {
return new ODIN<>(distanceFunction, k);
}
}
diff --git a/src/tutorial/outlier/SimpleScoreDumper.java b/src/tutorial/outlier/SimpleScoreDumper.java
index 49c1de0b..6984cccd 100644
--- a/src/tutorial/outlier/SimpleScoreDumper.java
+++ b/src/tutorial/outlier/SimpleScoreDumper.java
@@ -4,7 +4,7 @@ package tutorial.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,7 +26,7 @@ import java.util.ArrayList;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.result.ResultHandler;
@@ -44,9 +44,9 @@ public class SimpleScoreDumper implements ResultHandler {
// Get all new outlier results
ArrayList<OutlierResult> ors = ResultUtil.filterResults(newResult, OutlierResult.class);
for (OutlierResult o : ors) {
- Relation<Double> scores = o.getScores();
+ DoubleRelation scores = o.getScores();
for (DBIDIter iter = scores.iterDBIDs(); iter.valid(); iter.advance()) {
- System.out.println(DBIDUtil.toString(iter) + " " + scores.get(iter));
+ System.out.println(DBIDUtil.toString(iter) + " " + scores.doubleValue(iter));
}
}
}
diff --git a/src/tutorial/package-info.java b/src/tutorial/package-info.java
index e37e53f2..33067053 100644
--- a/src/tutorial/package-info.java
+++ b/src/tutorial/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team