diff options
Diffstat (limited to 'src/tutorial')
13 files changed, 206 insertions, 208 deletions
diff --git a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java index 7477c064..b1406274 100644 --- a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java +++ b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java @@ -4,7 +4,7 @@ package tutorial.clustering; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -46,7 +46,6 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.result.Result; @@ -69,7 +68,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * * @param <O> Object type */ -public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, Result> { +public class NaiveAgglomerativeHierarchicalClustering1<O> extends AbstractDistanceBasedAlgorithm<O, Result> { /** * Class logger */ @@ -86,7 +85,7 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan * @param distanceFunction Distance function to use * @param numclusters Number of clusters */ - public NaiveAgglomerativeHierarchicalClustering1(DistanceFunction<? super O, D> distanceFunction, int numclusters) { + public NaiveAgglomerativeHierarchicalClustering1(DistanceFunction<? super O> distanceFunction, int numclusters) { super(distanceFunction); this.numclusters = numclusters; } @@ -99,7 +98,7 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan * @return Clustering hierarchy */ public Result run(Database db, Relation<O> relation) { - DistanceQuery<O, D> dq = db.getDistanceQuery(relation, getDistanceFunction()); + DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction()); ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs()); final int size = ids.size(); @@ -108,10 +107,10 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan // Compute the initial distance matrix. double[][] matrix = new double[size][size]; DBIDArrayIter ix = ids.iter(), iy = ids.iter(); - for (int x = 0; ix.valid(); x++, ix.advance()) { + for(int x = 0; ix.valid(); x++, ix.advance()) { iy.seek(0); - for (int y = 0; y < x; y++, iy.advance()) { - final double dist = dq.distance(ix, iy).doubleValue(); + for(int y = 0; y < x; y++, iy.advance()) { + final double dist = dq.distance(ix, iy); matrix[x][y] = dist; matrix[y][x] = dist; } @@ -129,18 +128,18 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan // Repeat until everything merged, except the desired number of clusters: final int stop = size - numclusters; FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", stop, LOG) : null; - for (int i = 0; i < stop; i++) { + for(int i = 0; i < stop; i++) { double min = Double.POSITIVE_INFINITY; int minx = -1, miny = -1; - for (int x = 0; x < size; x++) { - if (height[x] < Double.POSITIVE_INFINITY) { + for(int x = 0; x < size; x++) { + if(height[x] < Double.POSITIVE_INFINITY) { continue; } - for (int y = 0; y < x; y++) { - if (height[y] < Double.POSITIVE_INFINITY) { + for(int y = 0; y < x; y++) { + if(height[y] < Double.POSITIVE_INFINITY) { continue; } - if (matrix[x][y] < min) { + if(matrix[x][y] < min) { min = matrix[x][y]; minx = x; miny = y; @@ -158,36 +157,33 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan // Merge into cluster ModifiableDBIDs cx = clusters.get(minx); ModifiableDBIDs cy = clusters.get(miny); - if (cy == null) { + if(cy == null) { cy = DBIDUtil.newHashSet(); cy.add(iy); } - if (cx == null) { + if(cx == null) { cy.add(ix); - } else { + } + else { cy.addDBIDs(cx); clusters.remove(minx); } clusters.put(miny, cy); // Update distance matrix for y: - for (int j = 0; j < size; j++) { + for(int j = 0; j < size; j++) { matrix[j][miny] = Math.min(matrix[j][minx], matrix[j][miny]); matrix[miny][j] = Math.min(matrix[minx][j], matrix[miny][j]); } - if (prog != null) { - prog.incrementProcessed(LOG); - } - } - if (prog != null) { - prog.ensureCompleted(LOG); + LOG.incrementProcessed(prog); } + LOG.ensureCompleted(prog); // Build the clustering result final Clustering<Model> dendrogram = new Clustering<>("Hierarchical-Clustering", "hierarchical-clustering"); - for (int x = 0; x < size; x++) { - if (height[x] < Double.POSITIVE_INFINITY) { + for(int x = 0; x < size; x++) { + if(height[x] < Double.POSITIVE_INFINITY) { DBIDs cids = clusters.get(x); - if (cids == null) { + if(cids == null) { ix.seek(x); cids = DBIDUtil.deref(ix); } @@ -214,10 +210,11 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan * * @author Erich Schubert * + * @apiviz.exclude + * * @param <O> Object type - * @param <D> Distance type */ - public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { /** * Desired number of clusters. */ @@ -228,13 +225,13 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan super.makeOptions(config); IntParameter numclustersP = new IntParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID); numclustersP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); - if (config.grab(numclustersP)) { + if(config.grab(numclustersP)) { numclusters = numclustersP.intValue(); } } @Override - protected NaiveAgglomerativeHierarchicalClustering1<O, D> makeInstance() { + protected NaiveAgglomerativeHierarchicalClustering1<O> makeInstance() { return new NaiveAgglomerativeHierarchicalClustering1<>(distanceFunction, numclusters); } } diff --git a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java index d33ca768..0081a845 100644 --- a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java +++ b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java @@ -4,7 +4,7 @@ package tutorial.clustering; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -46,7 +46,6 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.result.Result; @@ -69,7 +68,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * * @param <O> Object type */ -public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, Result> { +public class NaiveAgglomerativeHierarchicalClustering2<O> extends AbstractDistanceBasedAlgorithm<O, Result> { /** * Class logger */ @@ -86,7 +85,7 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan * @param distanceFunction Distance function to use * @param numclusters Number of clusters */ - public NaiveAgglomerativeHierarchicalClustering2(DistanceFunction<? super O, D> distanceFunction, int numclusters) { + public NaiveAgglomerativeHierarchicalClustering2(DistanceFunction<? super O> distanceFunction, int numclusters) { super(distanceFunction); this.numclusters = numclusters; } @@ -99,7 +98,7 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan * @return Clustering hierarchy */ public Result run(Database db, Relation<O> relation) { - DistanceQuery<O, D> dq = db.getDistanceQuery(relation, getDistanceFunction()); + DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction()); ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs()); final int size = ids.size(); @@ -116,7 +115,7 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan for (int x = 0; ix.valid(); x++, ix.advance()) { iy.seek(0); for (int y = 0; y < x; y++, iy.advance()) { - scratch[pos] = dq.distance(ix, iy).doubleValue(); + scratch[pos] = dq.distance(ix, iy); pos++; } } @@ -200,13 +199,9 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan final int jbase = triangleSize(j); scratch[jbase + miny] = Math.min(scratch[jbase + minx], scratch[jbase + miny]); } - if (prog != null) { - prog.incrementProcessed(LOG); - } - } - if (prog != null) { - prog.ensureCompleted(LOG); + LOG.incrementProcessed(prog); } + LOG.ensureCompleted(prog); // Build the clustering result final Clustering<Model> dendrogram = new Clustering<>("Hierarchical-Clustering", "hierarchical-clustering"); @@ -251,10 +246,11 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan * * @author Erich Schubert * + * @apiviz.exclude + * * @param <O> Object type - * @param <D> Distance type */ - public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { /** * Desired number of clusters. */ @@ -271,7 +267,7 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan } @Override - protected NaiveAgglomerativeHierarchicalClustering2<O, D> makeInstance() { + protected NaiveAgglomerativeHierarchicalClustering2<O> makeInstance() { return new NaiveAgglomerativeHierarchicalClustering2<>(distanceFunction, numclusters); } } diff --git a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java index d3312115..0b1590bd 100644 --- a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java +++ b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java @@ -4,7 +4,7 @@ package tutorial.clustering; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -47,7 +47,6 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.result.Result; @@ -81,7 +80,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * @param <O> Object type */ @Reference(title = "A Review of Classification", authors = "R. M. Cormack", booktitle = "Journal of the Royal Statistical Society. Series A, Vol. 134, No. 3", url = "http://www.jstor.org/stable/2344237") -public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, Result> { +public class NaiveAgglomerativeHierarchicalClustering3<O> extends AbstractDistanceBasedAlgorithm<O, Result> { /** * Class logger */ @@ -94,6 +93,8 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan * R. M. Cormack, A Review of Classification * * @author Erich Schubert + * + * @apiviz.exclude */ public static enum Linkage {// SINGLE { @@ -168,7 +169,7 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan * @param numclusters Number of clusters * @param linkage Linkage strategy */ - public NaiveAgglomerativeHierarchicalClustering3(DistanceFunction<? super O, D> distanceFunction, int numclusters, Linkage linkage) { + public NaiveAgglomerativeHierarchicalClustering3(DistanceFunction<? super O> distanceFunction, int numclusters, Linkage linkage) { super(distanceFunction); this.numclusters = numclusters; this.linkage = linkage; @@ -182,14 +183,14 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan * @return Clustering hierarchy */ public Result run(Database db, Relation<O> relation) { - DistanceQuery<O, D> dq = db.getDistanceQuery(relation, getDistanceFunction()); + DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction()); ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs()); final int size = ids.size(); - if (size > 0x10000) { + if(size > 0x10000) { throw new AbortException("This implementation does not scale to data sets larger than " + 0x10000 + " instances (~17 GB RAM), which results in an integer overflow."); } - if (Linkage.SINGLE.equals(linkage)) { + if(Linkage.SINGLE.equals(linkage)) { LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!"); } @@ -199,12 +200,12 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan // Position counter - must agree with computeOffset! int pos = 0; boolean square = Linkage.WARD.equals(linkage) && !(SquaredEuclideanDistanceFunction.class.isInstance(getDistanceFunction())); - for (int x = 0; ix.valid(); x++, ix.advance()) { + for(int x = 0; ix.valid(); x++, ix.advance()) { iy.seek(0); - for (int y = 0; y < x; y++, iy.advance()) { - scratch[pos] = dq.distance(ix, iy).doubleValue(); + for(int y = 0; y < x; y++, iy.advance()) { + scratch[pos] = dq.distance(ix, iy); // Ward uses variances -- i.e. squared values - if (square) { + if(square) { scratch[pos] *= scratch[pos]; } pos++; @@ -223,20 +224,20 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan // Repeat until everything merged, except the desired number of clusters: final int stop = size - numclusters; FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", stop, LOG) : null; - for (int i = 0; i < stop; i++) { + for(int i = 0; i < stop; i++) { double min = Double.POSITIVE_INFINITY; int minx = -1, miny = -1; - for (int x = 0; x < size; x++) { - if (height[x] < Double.POSITIVE_INFINITY) { + for(int x = 0; x < size; x++) { + if(height[x] < Double.POSITIVE_INFINITY) { continue; } final int xbase = triangleSize(x); - for (int y = 0; y < x; y++) { - if (height[y] < Double.POSITIVE_INFINITY) { + for(int y = 0; y < x; y++) { + if(height[y] < Double.POSITIVE_INFINITY) { continue; } final int idx = xbase + y; - if (scratch[idx] < min) { + if(scratch[idx] < min) { min = scratch[idx]; minx = x; miny = y; @@ -255,15 +256,17 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan ModifiableDBIDs cx = clusters.get(minx); ModifiableDBIDs cy = clusters.get(miny); int sizex = 1, sizey = 1; // cluster sizes, for averaging - if (cy == null) { + if(cy == null) { cy = DBIDUtil.newHashSet(); cy.add(iy); - } else { + } + else { sizey = cy.size(); } - if (cx == null) { + if(cx == null) { cy.add(ix); - } else { + } + else { sizex = cx.size(); cy.addDBIDs(cx); clusters.remove(minx); @@ -276,8 +279,8 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan // hashmap lookup. final int xbase = triangleSize(minx), ybase = triangleSize(miny); // Write to (y, j), with j < y - for (int j = 0; j < miny; j++) { - if (height[j] < Double.POSITIVE_INFINITY) { + for(int j = 0; j < miny; j++) { + if(height[j] < Double.POSITIVE_INFINITY) { continue; } final DBIDs idsj = clusters.get(j); @@ -285,8 +288,8 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan scratch[ybase + j] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[ybase + j], sizej, min); } // Write to (j, y), with y < j < x - for (int j = miny + 1; j < minx; j++) { - if (height[j] < Double.POSITIVE_INFINITY) { + for(int j = miny + 1; j < minx; j++) { + if(height[j] < Double.POSITIVE_INFINITY) { continue; } final int jbase = triangleSize(j); @@ -295,8 +298,8 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan scratch[jbase + miny] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[jbase + miny], sizej, min); } // Write to (j, y), with y < x < j - for (int j = minx + 1; j < size; j++) { - if (height[j] < Double.POSITIVE_INFINITY) { + for(int j = minx + 1; j < size; j++) { + if(height[j] < Double.POSITIVE_INFINITY) { continue; } final DBIDs idsj = clusters.get(j); @@ -304,20 +307,16 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan final int jbase = triangleSize(j); scratch[jbase + miny] = linkage.combine(sizex, scratch[jbase + minx], sizey, scratch[jbase + miny], sizej, min); } - if (prog != null) { - prog.incrementProcessed(LOG); - } - } - if (prog != null) { - prog.ensureCompleted(LOG); + LOG.incrementProcessed(prog); } + LOG.ensureCompleted(prog); // Build the clustering result final Clustering<Model> dendrogram = new Clustering<>("Hierarchical-Clustering", "hierarchical-clustering"); - for (int x = 0; x < size; x++) { - if (height[x] < Double.POSITIVE_INFINITY) { + for(int x = 0; x < size; x++) { + if(height[x] < Double.POSITIVE_INFINITY) { DBIDs cids = clusters.get(x); - if (cids == null) { + if(cids == null) { ix.seek(x); cids = DBIDUtil.deref(ix); } @@ -355,10 +354,11 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan * * @author Erich Schubert * + * @apiviz.exclude + * * @param <O> Object type - * @param <D> Distance type */ - public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { /** * Option ID for linkage parameter. */ @@ -379,19 +379,19 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan super.makeOptions(config); IntParameter numclustersP = new IntParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID); numclustersP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); - if (config.grab(numclustersP)) { + if(config.grab(numclustersP)) { numclusters = numclustersP.intValue(); } EnumParameter<Linkage> linkageP = new EnumParameter<>(LINKAGE_ID, Linkage.class); linkageP.setDefaultValue(Linkage.WARD); - if (config.grab(linkageP)) { + if(config.grab(linkageP)) { linkage = linkageP.getValue(); } } @Override - protected NaiveAgglomerativeHierarchicalClustering3<O, D> makeInstance() { + protected NaiveAgglomerativeHierarchicalClustering3<O> makeInstance() { return new NaiveAgglomerativeHierarchicalClustering3<>(distanceFunction, numclusters, linkage); } } diff --git a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering4.java b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering4.java index 23678ccc..997b657a 100644 --- a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering4.java +++ b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering4.java @@ -4,7 +4,7 @@ package tutorial.clustering; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -33,7 +33,7 @@ import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore; -import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDistanceDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; @@ -43,8 +43,6 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; @@ -75,7 +73,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter; * @param <O> Object type */ @Reference(title = "A Review of Classification", authors = "R. M. Cormack", booktitle = "Journal of the Royal Statistical Society. Series A, Vol. 134, No. 3", url = "http://www.jstor.org/stable/2344237") -public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, PointerHierarchyRepresentationResult<DoubleDistance>> implements HierarchicalClusteringAlgorithm<DoubleDistance> { +public class NaiveAgglomerativeHierarchicalClustering4<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchyRepresentationResult> implements HierarchicalClusteringAlgorithm { /** * Class logger */ @@ -88,6 +86,8 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan * R. M. Cormack, A Review of Classification * * @author Erich Schubert + * + * @apiviz.exclude */ public static enum Linkage {// SINGLE { @@ -156,7 +156,7 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan * @param distanceFunction Distance function to use * @param linkage Linkage strategy */ - public NaiveAgglomerativeHierarchicalClustering4(DistanceFunction<? super O, D> distanceFunction, Linkage linkage) { + public NaiveAgglomerativeHierarchicalClustering4(DistanceFunction<? super O> distanceFunction, Linkage linkage) { super(distanceFunction); this.linkage = linkage; } @@ -168,15 +168,15 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan * @param relation Relation * @return Clustering hierarchy */ - public PointerHierarchyRepresentationResult<DoubleDistance> run(Database db, Relation<O> relation) { - DistanceQuery<O, D> dq = db.getDistanceQuery(relation, getDistanceFunction()); + public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) { + DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction()); ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs()); final int size = ids.size(); - if (size > 0x10000) { + if(size > 0x10000) { throw new AbortException("This implementation does not scale to data sets larger than " + 0x10000 + " instances (~17 GB RAM), which results in an integer overflow."); } - if (Linkage.SINGLE.equals(linkage)) { + if(Linkage.SINGLE.equals(linkage)) { LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!"); } @@ -186,12 +186,12 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan // Position counter - must agree with computeOffset! int pos = 0; boolean square = Linkage.WARD.equals(linkage) && !(SquaredEuclideanDistanceFunction.class.isInstance(getDistanceFunction())); - for (int x = 0; ix.valid(); x++, ix.advance()) { + for(int x = 0; ix.valid(); x++, ix.advance()) { iy.seek(0); - for (int y = 0; y < x; y++, iy.advance()) { - scratch[pos] = dq.distance(ix, iy).doubleValue(); + for(int y = 0; y < x; y++, iy.advance()) { + scratch[pos] = dq.distance(ix, iy); // Ward uses variances -- i.e. squared values - if (square) { + if(square) { scratch[pos] *= scratch[pos]; } pos++; @@ -200,9 +200,9 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan // Initialize space for result: WritableDBIDDataStore parent = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC); - WritableDoubleDistanceDataStore height = DataStoreUtil.makeDoubleDistanceStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC); + WritableDoubleDataStore height = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC); WritableIntegerDataStore csize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); - for (DBIDIter it = ids.iter(); it.valid(); it.advance()) { + for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { parent.put(it, it); height.put(it, Double.POSITIVE_INFINITY); csize.put(it, 1); @@ -210,20 +210,20 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan // Repeat until everything merged, except the desired number of clusters: FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null; - for (int i = 1; i < size; i++) { + for(int i = 1; i < size; i++) { double min = Double.POSITIVE_INFINITY; int minx = -1, miny = -1; - for (ix.seek(0); ix.valid(); ix.advance()) { - if (height.doubleValue(ix) < Double.POSITIVE_INFINITY) { + for(ix.seek(0); ix.valid(); ix.advance()) { + if(height.doubleValue(ix) < Double.POSITIVE_INFINITY) { continue; } final int xbase = triangleSize(ix.getOffset()); - for (iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) { - if (height.doubleValue(iy) < Double.POSITIVE_INFINITY) { + for(iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) { + if(height.doubleValue(iy) < Double.POSITIVE_INFINITY) { continue; } final int idx = xbase + iy.getOffset(); - if (scratch[idx] <= min) { + if(scratch[idx] <= min) { min = scratch[idx]; minx = ix.getOffset(); miny = iy.getOffset(); @@ -244,16 +244,16 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan // Update distance matrix. Note: miny < minx final int xbase = triangleSize(minx), ybase = triangleSize(miny); // Write to (y, j), with j < y - for (ij.seek(0); ij.getOffset() < miny; ij.advance()) { - if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) { + for(ij.seek(0); ij.getOffset() < miny; ij.advance()) { + if(height.doubleValue(ij) < Double.POSITIVE_INFINITY) { continue; } final int sizej = csize.intValue(ij); scratch[ybase + ij.getOffset()] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[ybase + ij.getOffset()], sizej, min); } // Write to (j, y), with y < j < x - for (ij.seek(miny + 1); ij.getOffset() < minx; ij.advance()) { - if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) { + for(ij.seek(miny + 1); ij.getOffset() < minx; ij.advance()) { + if(height.doubleValue(ij) < Double.POSITIVE_INFINITY) { continue; } final int jbase = triangleSize(ij.getOffset()); @@ -261,23 +261,19 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan scratch[jbase + miny] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[jbase + miny], sizej, min); } // Write to (j, y), with y < x < j - for (ij.seek(minx + 1); ij.valid(); ij.advance()) { - if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) { + for(ij.seek(minx + 1); ij.valid(); ij.advance()) { + if(height.doubleValue(ij) < Double.POSITIVE_INFINITY) { continue; } final int jbase = triangleSize(ij.getOffset()); final int sizej = csize.intValue(ij); scratch[jbase + miny] = linkage.combine(sizex, scratch[jbase + minx], sizey, scratch[jbase + miny], sizej, min); } - if (prog != null) { - prog.incrementProcessed(LOG); - } - } - if (prog != null) { - prog.ensureCompleted(LOG); + LOG.incrementProcessed(prog); } + LOG.ensureCompleted(prog); - return new PointerHierarchyRepresentationResult<>(ids, parent, height); + return new PointerHierarchyRepresentationResult(ids, parent, height); } /** @@ -301,20 +297,16 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan return LOG; } - @Override - public DoubleDistance getDistanceFactory() { - return DoubleDistance.FACTORY; - } - /** * Parameterization class * * @author Erich Schubert * + * @apiviz.exclude + * * @param <O> Object type - * @param <D> Distance type */ - public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { /** * Option ID for linkage parameter. */ @@ -330,13 +322,13 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan super.makeOptions(config); EnumParameter<Linkage> linkageP = new EnumParameter<>(LINKAGE_ID, Linkage.class); linkageP.setDefaultValue(Linkage.WARD); - if (config.grab(linkageP)) { + if(config.grab(linkageP)) { linkage = linkageP.getValue(); } } @Override - protected NaiveAgglomerativeHierarchicalClustering4<O, D> makeInstance() { + protected NaiveAgglomerativeHierarchicalClustering4<O> makeInstance() { return new NaiveAgglomerativeHierarchicalClustering4<>(distanceFunction, linkage); } } diff --git a/src/tutorial/clustering/SameSizeKMeansAlgorithm.java b/src/tutorial/clustering/SameSizeKMeansAlgorithm.java index 91da44c5..ad57885e 100644 --- a/src/tutorial/clustering/SameSizeKMeansAlgorithm.java +++ b/src/tutorial/clustering/SameSizeKMeansAlgorithm.java @@ -4,7 +4,7 @@ package tutorial.clustering; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,8 +29,8 @@ import java.util.Comparator; import java.util.List; import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.AbstractKMeans; -import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansInitialization; -import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansPlusPlusInitialMeans; +import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.KMeansInitialization; +import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.KMeansPlusPlusInitialMeans; import de.lmu.ifi.dbs.elki.data.Cluster; import de.lmu.ifi.dbs.elki.data.Clustering; import de.lmu.ifi.dbs.elki.data.NumberVector; @@ -48,12 +48,11 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; -import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerArrayQuickSort; import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; @@ -78,7 +77,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * * @param <V> Vector type */ -public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends AbstractKMeans<V, DoubleDistance, MeanModel<V>> { +public class SameSizeKMeansAlgorithm<V extends NumberVector> extends AbstractKMeans<V, MeanModel> { /** * Class logger */ @@ -92,7 +91,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract * @param maxiter Maximum number of iterations * @param initializer */ - public SameSizeKMeansAlgorithm(PrimitiveDoubleDistanceFunction<? super NumberVector<?>> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) { + public SameSizeKMeansAlgorithm(PrimitiveDistanceFunction<? super NumberVector> distanceFunction, int k, int maxiter, KMeansInitialization<? super V> initializer) { super(distanceFunction, k, maxiter, initializer); } @@ -104,11 +103,11 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract * @return result */ @Override - public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) { + public Clustering<MeanModel> run(Database database, Relation<V> relation) { // Database objects to process final DBIDs ids = relation.getDBIDs(); // Choose initial means - List<? extends NumberVector<?>> means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction()); + List<Vector> means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction(), Vector.FACTORY); // Setup cluster assignment store List<ModifiableDBIDs> clusters = new ArrayList<>(); for(int i = 0; i < k; i++) { @@ -125,11 +124,10 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract means = refineResult(relation, means, clusters, metas, tids); // Wrap result - Clustering<MeanModel<V>> result = new Clustering<>("k-Means Samesize Clustering", "kmeans-samesize-clustering"); - final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation); + Clustering<MeanModel> result = new Clustering<>("k-Means Samesize Clustering", "kmeans-samesize-clustering"); for(int i = 0; i < clusters.size(); i++) { - V mean = factory.newNumberVector(means.get(i).getColumnVector().getArrayRef()); - result.addToplevelCluster(new Cluster<>(clusters.get(i), new MeanModel<>(mean))); + Vector mean = means.get(i).getColumnVector(); + result.addToplevelCluster(new Cluster<>(clusters.get(i), new MeanModel(mean))); } return result; } @@ -141,10 +139,8 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract * @param means Mean vectors * @return Initialized storage */ - protected WritableDataStore<Meta> initializeMeta(Relation<V> relation, List<? extends NumberVector<?>> means) { - // This is a safe cast - see constructor. - @SuppressWarnings("unchecked") - PrimitiveDoubleDistanceFunction<NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<NumberVector<?>>) getDistanceFunction(); + protected WritableDataStore<Meta> initializeMeta(Relation<V> relation, List<? extends NumberVector> means) { + PrimitiveDistanceFunction<? super NumberVector> df = getDistanceFunction(); // The actual storage final WritableDataStore<Meta> metas = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Meta.class); // Build the metadata, track the two nearest cluster centers. @@ -152,7 +148,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract Meta c = new Meta(k); V fv = relation.get(id); for(int i = 0; i < k; i++) { - c.dists[i] = df.doubleDistance(fv, means.get(i)); + c.dists[i] = df.distance(fv, means.get(i)); if(i > 0) { if(c.dists[i] < c.dists[c.primary]) { c.primary = i; @@ -231,14 +227,14 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract * @param metas Metadata storage * @param df Distance function */ - protected void updateDistances(Relation<V> relation, List<? extends NumberVector<?>> means, final WritableDataStore<Meta> metas, PrimitiveDoubleDistanceFunction<NumberVector<?>> df) { + protected void updateDistances(Relation<V> relation, List<Vector> means, final WritableDataStore<Meta> metas, PrimitiveDistanceFunction<? super NumberVector> df) { for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) { Meta c = metas.get(id); V fv = relation.get(id); // Update distances to means. c.secondary = -1; for(int i = 0; i < k; i++) { - c.dists[i] = df.doubleDistance(fv, means.get(i)); + c.dists[i] = df.distance(fv, means.get(i)); if(c.primary != i) { if(c.secondary < 0 || c.dists[i] < c.dists[c.secondary]) { c.secondary = i; @@ -259,10 +255,9 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract * @param tids DBIDs array * @return final means */ - protected List<? extends NumberVector<?>> refineResult(Relation<V> relation, List<? extends NumberVector<?>> means, List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, ArrayModifiableDBIDs tids) { + protected List<Vector> refineResult(Relation<V> relation, List<Vector> means, List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, ArrayModifiableDBIDs tids) { // This is a safe cast - see constructor. - @SuppressWarnings("unchecked") - PrimitiveDoubleDistanceFunction<NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<NumberVector<?>>) getDistanceFunction(); + PrimitiveDistanceFunction<? super NumberVector> df = getDistanceFunction(); // Our desired cluster size: final int minsize = tids.size() / k; // rounded down final int maxsize = (tids.size() + k - 1) / k; // rounded up @@ -289,7 +284,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract transfers[i] = DBIDUtil.newArray(); } - for(int iter = 0; maxiter < 0 || iter < maxiter; iter++) { + for(int iter = 0; maxiter <= 0 || iter < maxiter; iter++) { updateDistances(relation, means, metas, df); tids.sort(comp); int active = 0; // Track if anything has changed @@ -461,7 +456,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer { + public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer { /** * k Parameter. */ @@ -480,12 +475,12 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract /** * Distance function */ - protected PrimitiveDoubleDistanceFunction<? super NumberVector<?>> distanceFunction; + protected PrimitiveDistanceFunction<? super NumberVector> distanceFunction; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - ObjectParameter<PrimitiveDoubleDistanceFunction<? super NumberVector<?>>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDoubleDistanceFunction.class); + ObjectParameter<PrimitiveDistanceFunction<? super NumberVector>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDistanceFunction.class); if(config.grab(distanceFunctionP)) { distanceFunction = distanceFunctionP.instantiateClass(config); if(!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) { diff --git a/src/tutorial/clustering/package-info.java b/src/tutorial/clustering/package-info.java index ac7ab950..e2971ef8 100644 --- a/src/tutorial/clustering/package-info.java +++ b/src/tutorial/clustering/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/tutorial/distancefunction/MultiLPNorm.java b/src/tutorial/distancefunction/MultiLPNorm.java index b3a96ecb..823e00b1 100644 --- a/src/tutorial/distancefunction/MultiLPNorm.java +++ b/src/tutorial/distancefunction/MultiLPNorm.java @@ -3,7 +3,7 @@ package tutorial.distancefunction; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation; -import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractNumberVectorDistanceFunction; import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; @@ -14,7 +14,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleListParamet This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -42,7 +42,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleListParamet * * @author Erich Schubert */ -public class MultiLPNorm extends AbstractVectorDoubleDistanceFunction { +public class MultiLPNorm extends AbstractNumberVectorDistanceFunction { /** * The exponents */ @@ -72,7 +72,7 @@ public class MultiLPNorm extends AbstractVectorDoubleDistanceFunction { } @Override - public double doubleDistance(NumberVector<?> o1, NumberVector<?> o2) { + public double distance(NumberVector o1, NumberVector o2) { assert o1.getDimensionality() == ps.length : "Inappropriate dimensionality!"; assert o2.getDimensionality() == ps.length : "Inappropriate dimensionality!"; @@ -87,8 +87,8 @@ public class MultiLPNorm extends AbstractVectorDoubleDistanceFunction { } @Override - public SimpleTypeInformation<? super NumberVector<?>> getInputTypeRestriction() { - return new VectorFieldTypeInformation<>(NumberVector.class, ps.length); + public SimpleTypeInformation<? super NumberVector> getInputTypeRestriction() { + return VectorFieldTypeInformation.typeRequest(NumberVector.class, ps.length, ps.length); } /** diff --git a/src/tutorial/distancefunction/TutorialDistanceFunction.java b/src/tutorial/distancefunction/TutorialDistanceFunction.java index 5e2fbcb6..825a3a0d 100644 --- a/src/tutorial/distancefunction/TutorialDistanceFunction.java +++ b/src/tutorial/distancefunction/TutorialDistanceFunction.java @@ -4,7 +4,7 @@ package tutorial.distancefunction; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,8 +25,8 @@ package tutorial.distancefunction; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; -import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation; -import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractNumberVectorDistanceFunction; /** * Tutorial example for ELKI. @@ -37,16 +37,16 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanc * * @author Erich Schubert */ -public class TutorialDistanceFunction extends AbstractVectorDoubleDistanceFunction { +public class TutorialDistanceFunction extends AbstractNumberVectorDistanceFunction { @Override - public double doubleDistance(NumberVector<?> o1, NumberVector<?> o2) { + public double distance(NumberVector o1, NumberVector o2) { double dx = (o1.doubleValue(0) - o2.doubleValue(0)); double dy = (o1.doubleValue(1) - o2.doubleValue(1)); return dx * dx + Math.abs(dy); } @Override - public SimpleTypeInformation<? super NumberVector<?>> getInputTypeRestriction() { - return new VectorFieldTypeInformation<>(NumberVector.class, 2); + public SimpleTypeInformation<? super NumberVector> getInputTypeRestriction() { + return TypeUtil.NUMBER_VECTOR_FIELD_2D; } } diff --git a/src/tutorial/distancefunction/package-info.java b/src/tutorial/distancefunction/package-info.java index ba64b6bd..c9fb987d 100644 --- a/src/tutorial/distancefunction/package-info.java +++ b/src/tutorial/distancefunction/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/tutorial/outlier/DistanceStddevOutlier.java b/src/tutorial/outlier/DistanceStddevOutlier.java index 61859f88..571e1906 100644 --- a/src/tutorial/outlier/DistanceStddevOutlier.java +++ b/src/tutorial/outlier/DistanceStddevOutlier.java @@ -1,4 +1,26 @@ package tutorial.outlier; +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; @@ -11,13 +33,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.math.MeanVariance; @@ -36,9 +58,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * @author Erich Schubert * * @param <O> Object type - * @param <D> Distance type */ -public class DistanceStddevOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { +public class DistanceStddevOutlier<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** * Class logger */ @@ -55,7 +76,7 @@ public class DistanceStddevOutlier<O, D extends NumberDistance<D, ?>> extends Ab * @param distanceFunction Distance function to use * @param k Number of neighbors to use */ - public DistanceStddevOutlier(DistanceFunction<? super O, D> distanceFunction, int k) { + public DistanceStddevOutlier(DistanceFunction<? super O> distanceFunction, int k) { super(distanceFunction); this.k = k; } @@ -69,7 +90,7 @@ public class DistanceStddevOutlier<O, D extends NumberDistance<D, ?>> extends Ab */ public OutlierResult run(Database database, Relation<O> relation) { // Get a nearest neighbor query on the relation. - KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k); + KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k); // Output data storage WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB); // Track minimum and maximum scores @@ -77,15 +98,15 @@ public class DistanceStddevOutlier<O, D extends NumberDistance<D, ?>> extends Ab // Iterate over all objects for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { - KNNList<D> neighbors = knnq.getKNNForDBID(iter, k); + KNNList neighbors = knnq.getKNNForDBID(iter, k); // Aggregate distances MeanVariance mv = new MeanVariance(); - for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { // Skip the object itself. The 0 is not very informative. if(DBIDUtil.equal(iter, neighbor)) { continue; } - mv.put(neighbor.getDistance().doubleValue()); + mv.put(neighbor.doubleValue()); } // Store score scores.putDouble(iter, mv.getSampleStddev()); @@ -94,7 +115,7 @@ public class DistanceStddevOutlier<O, D extends NumberDistance<D, ?>> extends Ab // Wrap the result in the standard containers // Actual min-max, theoretical min-max! OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY); - Relation<Double> rel = new MaterializedRelation<>(database, TypeUtil.DOUBLE, relation.getDBIDs(), "stddev-outlier", scores); + DoubleRelation rel = new MaterializedDoubleRelation(database, relation.getDBIDs(), "stddev-outlier", scores); return new OutlierResult(meta, rel); } @@ -116,9 +137,8 @@ public class DistanceStddevOutlier<O, D extends NumberDistance<D, ?>> extends Ab * @apiviz.exclude * * @param <O> Object type - * @param <D> Distance type */ - public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { /** * Option ID for parameterization. */ @@ -141,7 +161,7 @@ public class DistanceStddevOutlier<O, D extends NumberDistance<D, ?>> extends Ab } @Override - protected DistanceStddevOutlier<O, D> makeInstance() { + protected DistanceStddevOutlier<O> makeInstance() { return new DistanceStddevOutlier<>(distanceFunction, k); } } diff --git a/src/tutorial/outlier/ODIN.java b/src/tutorial/outlier/ODIN.java index 04f8ee90..b97be33b 100644 --- a/src/tutorial/outlier/ODIN.java +++ b/src/tutorial/outlier/ODIN.java @@ -4,7 +4,7 @@ package tutorial.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -33,13 +33,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; @@ -66,10 +66,9 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * @author Erich Schubert * * @param <O> Object type - * @param <D> Distance type */ @Reference(authors = "V. Hautamäki and I. Kärkkäinen and P Fränti", title = "Outlier detection using k-nearest neighbour graph", booktitle = "Proc. 17th Int. Conf. Pattern Recognition, ICPR 2004", url = "http://dx.doi.org/10.1109/ICPR.2004.1334558") -public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { +public class ODIN<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** * Class logger. */ @@ -86,7 +85,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit * @param distanceFunction Distance function * @param k k parameter */ - public ODIN(DistanceFunction<? super O, D> distanceFunction, int k) { + public ODIN(DistanceFunction<? super O> distanceFunction, int k) { super(distanceFunction); this.k = k; } @@ -105,8 +104,8 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit */ public OutlierResult run(Database database, Relation<O> relation) { // Get the query functions: - DistanceQuery<O, D> dq = database.getDistanceQuery(relation, getDistanceFunction()); - KNNQuery<O, D> knnq = database.getKNNQuery(dq, k); + DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction()); + KNNQuery<O> knnq = database.getKNNQuery(dq, k); // Get the objects to process, and a data storage for counting and output: DBIDs ids = relation.getDBIDs(); @@ -115,7 +114,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit // Process all objects for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { // Find the nearest neighbors (using an index, if available!) - KNNList<D> neighbors = knnq.getKNNForDBID(iter, k); + KNNList neighbors = knnq.getKNNForDBID(iter, k); // For each neighbor, except ourselves, increase the in-degree: for (DBIDIter nei = neighbors.iter(); nei.valid(); nei.advance()) { if (DBIDUtil.equal(iter, nei)) { @@ -135,7 +134,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit // By actually specifying theoretical min, max and baseline, we get a better // visualization (try it out - or see the screenshots in the tutorial)! OutlierScoreMeta meta = new InvertedOutlierScoreMeta(min, max, 0., ids.size() - 1, k); - Relation<Double> rel = new MaterializedRelation<>("ODIN In-Degree", "odin", TypeUtil.DOUBLE, scores, ids); + DoubleRelation rel = new MaterializedDoubleRelation("ODIN In-Degree", "odin", scores, ids); return new OutlierResult(meta, rel); } @@ -157,9 +156,8 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit * @apiviz.exclude * * @param <O> Object type - * @param <D> Distance type */ - public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { /** * Parameter for the number of nearest neighbors: * @@ -189,7 +187,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit } @Override - protected ODIN<O, D> makeInstance() { + protected ODIN<O> makeInstance() { return new ODIN<>(distanceFunction, k); } } diff --git a/src/tutorial/outlier/SimpleScoreDumper.java b/src/tutorial/outlier/SimpleScoreDumper.java index 49c1de0b..6984cccd 100644 --- a/src/tutorial/outlier/SimpleScoreDumper.java +++ b/src/tutorial/outlier/SimpleScoreDumper.java @@ -4,7 +4,7 @@ package tutorial.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -26,7 +26,7 @@ import java.util.ArrayList; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; -import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; import de.lmu.ifi.dbs.elki.result.HierarchicalResult; import de.lmu.ifi.dbs.elki.result.Result; import de.lmu.ifi.dbs.elki.result.ResultHandler; @@ -44,9 +44,9 @@ public class SimpleScoreDumper implements ResultHandler { // Get all new outlier results ArrayList<OutlierResult> ors = ResultUtil.filterResults(newResult, OutlierResult.class); for (OutlierResult o : ors) { - Relation<Double> scores = o.getScores(); + DoubleRelation scores = o.getScores(); for (DBIDIter iter = scores.iterDBIDs(); iter.valid(); iter.advance()) { - System.out.println(DBIDUtil.toString(iter) + " " + scores.get(iter)); + System.out.println(DBIDUtil.toString(iter) + " " + scores.doubleValue(iter)); } } } diff --git a/src/tutorial/package-info.java b/src/tutorial/package-info.java index e37e53f2..33067053 100644 --- a/src/tutorial/package-info.java +++ b/src/tutorial/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team |