diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/distance/similarityfunction')
20 files changed, 1016 insertions, 364 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractDBIDSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractDBIDSimilarityFunction.java index 22790a01..746f719a 100644 --- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractDBIDSimilarityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractDBIDSimilarityFunction.java @@ -49,9 +49,4 @@ public abstract class AbstractDBIDSimilarityFunction<D extends Distance<D>> exte super(); this.database = database; } - - @Override - public boolean isSymmetric() { - return true; - } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractPrimitiveSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractPrimitiveSimilarityFunction.java index 2a9c2f88..2360f66e 100644 --- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractPrimitiveSimilarityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractPrimitiveSimilarityFunction.java @@ -63,4 +63,4 @@ public abstract class AbstractPrimitiveSimilarityFunction<O, D extends Distance< public <T extends O> SimilarityQuery<T, D> instantiate(Relation<T> relation) { return new PrimitiveSimilarityQuery<>(relation, this); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractVectorDoubleSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractVectorDoubleSimilarityFunction.java new file mode 100644 index 00000000..1f04cc98 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractVectorDoubleSimilarityFunction.java @@ -0,0 +1,50 @@ +package de.lmu.ifi.dbs.elki.distance.similarityfunction; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; + +/** + * Abstract base class for double-valued primitive similarity functions. + * + * @author Erich Schubert + */ +public abstract class AbstractVectorDoubleSimilarityFunction extends AbstractPrimitiveSimilarityFunction<NumberVector<?>, DoubleDistance> implements PrimitiveDoubleSimilarityFunction<NumberVector<?>> { + @Override + public DoubleDistance getDistanceFactory() { + return DoubleDistance.FACTORY; + } + + @Override + public DoubleDistance similarity(NumberVector<?> o1, NumberVector<?> o2) { + return new DoubleDistance(doubleSimilarity(o1, o2)); + } + + @Override + public SimpleTypeInformation<? super NumberVector<?>> getInputTypeRestriction() { + return TypeUtil.NUMBER_VECTOR_FIELD; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/FractionalSharedNearestNeighborSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/FractionalSharedNearestNeighborSimilarityFunction.java index 2e0dd52d..741ece82 100644 --- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/FractionalSharedNearestNeighborSimilarityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/FractionalSharedNearestNeighborSimilarityFunction.java @@ -40,12 +40,14 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz * * @author Arthur Zimek * - * @apiviz.has de.lmu.ifi.dbs.elki.index.preprocessed.snn.SharedNearestNeighborIndex.Factory + * @apiviz.has + * de.lmu.ifi.dbs.elki.index.preprocessed.snn.SharedNearestNeighborIndex + * .Factory * @apiviz.has Instance oneway - - «create» * * @param <O> object type */ -public class FractionalSharedNearestNeighborSimilarityFunction<O> extends AbstractIndexBasedSimilarityFunction<O, SharedNearestNeighborIndex<O>, ArrayDBIDs, DoubleDistance> implements NormalizedSimilarityFunction<O, DoubleDistance> { +public class FractionalSharedNearestNeighborSimilarityFunction<O> extends AbstractIndexBasedSimilarityFunction<O, SharedNearestNeighborIndex<O>, ArrayDBIDs, DoubleDistance> implements NormalizedSimilarityFunction<O> { /** * Constructor. * @@ -59,7 +61,7 @@ public class FractionalSharedNearestNeighborSimilarityFunction<O> extends Abstra @Override public <T extends O> Instance<T> instantiate(Relation<T> database) { SharedNearestNeighborIndex<O> indexi = indexFactory.instantiate((Relation<O>) database); - return (Instance<T>) new Instance<>((Relation<O>) database, indexi); + return (Instance<T>) new Instance<>((Relation<O>) database, indexi, this); } /** @@ -73,13 +75,19 @@ public class FractionalSharedNearestNeighborSimilarityFunction<O> extends Abstra */ public static class Instance<T> extends AbstractIndexBasedSimilarityFunction.Instance<T, SharedNearestNeighborIndex<T>, ArrayDBIDs, DoubleDistance> { /** + * Similarity function. + */ + private FractionalSharedNearestNeighborSimilarityFunction<? super T> similarityFunction; + + /** * Constructor. * * @param database Database * @param preprocessor Preprocessor */ - public Instance(Relation<T> database, SharedNearestNeighborIndex<T> preprocessor) { + public Instance(Relation<T> database, SharedNearestNeighborIndex<T> preprocessor, FractionalSharedNearestNeighborSimilarityFunction<? super T> similarityFunction) { super(database, preprocessor); + this.similarityFunction = similarityFunction; } /** @@ -118,6 +126,11 @@ public class FractionalSharedNearestNeighborSimilarityFunction<O> extends Abstra } @Override + public SimilarityFunction<? super T, DoubleDistance> getSimilarityFunction() { + return similarityFunction; + } + + @Override public DoubleDistance getDistanceFactory() { return DoubleDistance.FACTORY; } diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/JaccardPrimitiveSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/JaccardPrimitiveSimilarityFunction.java new file mode 100644 index 00000000..99fa440e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/JaccardPrimitiveSimilarityFunction.java @@ -0,0 +1,205 @@ +package de.lmu.ifi.dbs.elki.distance.similarityfunction; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.data.FeatureVector; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.query.DistanceSimilarityQuery; +import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceSimilarityQuery; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * A flexible extension of Jaccard similarity to non-binary vectors. + * + * Jaccard coefficient is commonly defined as {@code |intersection|/|union|}. + * + * We can extend this definition as follows: + * + * {@code |non-zero and equal attributes|/|non-zero attributes|}. + * + * For binary vectors, this will obviously be the same quantity. However, this + * version is more useful for categorical data. + * + * Reference: + * <p> + * P. Jaccard<br /> + * Étude comparative de la distribution florale dans une portion des Alpes et + * des Jura<br /> + * Bulletin del la Société Vaudoise des Sciences Naturelles + * </p> + * + * TODO: add optimized implementations for binary vectors. + * + * @author Erich Schubert + * + * @param <O> Vector type + */ +@Reference(authors = "P. Jaccard", title = "Étude comparative de la distribution florale dans une portion des Alpes et des Jura", booktitle = "Bulletin del la Société Vaudoise des Sciences Naturelles") +public class JaccardPrimitiveSimilarityFunction<O extends FeatureVector<?>> extends AbstractPrimitiveSimilarityFunction<O, DoubleDistance> implements NormalizedPrimitiveSimilarityFunction<O>, PrimitiveDoubleDistanceFunction<O> { + /** + * Constants for checking null. + */ + private static final Integer INTEGER_NULL = Integer.valueOf(0); + + /** + * Constants for checking null. + */ + private static final Double DOUBLE_NULL = Double.valueOf(0.); + + /** + * Empty string. + */ + private static final String STRING_NULL = ""; + + /** + * Constructor. No parameters. + */ + public JaccardPrimitiveSimilarityFunction() { + super(); + } + + @Override + public double doubleSimilarity(O o1, O o2) { + if(o1 instanceof NumberVector && o2 instanceof NumberVector) { + return doubleSimilarityNumberVector((NumberVector<?>) o1, (NumberVector<?>) o2); + } + final int d1 = o1.getDimensionality(), d2 = o2.getDimensionality(); + int intersection = 0, union = 0; + int d = 0; + for(; d < d1 && d < d2; d++) { + Object v1 = o1.getValue(d), v2 = o2.getValue(d); + final boolean n1 = isNull(v1), n2 = isNull(v2); + if(v1 instanceof Double && Double.isNaN((Double) v1)) { + continue; + } + if(v2 instanceof Double && Double.isNaN((Double) v2)) { + continue; + } + if(!n1 || !n2) { + ++union; + if(!n1 && v1.equals(v2)) { + ++intersection; + } + } + } + for(; d < d1; d++) { + if(!isNull(o1.getValue(d))) { + ++union; + } + } + for(; d < d2; d++) { + if(!isNull(o2.getValue(d))) { + ++union; + } + } + return intersection / (double) union; + } + + /** + * Compute Jaccard similarity for two number vectors. + * + * @param o1 First vector + * @param o2 Second vector + * @return Jaccard similarity + */ + public static double doubleSimilarityNumberVector(NumberVector<?> o1, NumberVector<?> o2) { + final int d1 = o1.getDimensionality(), d2 = o2.getDimensionality(); + int intersection = 0, union = 0; + int d = 0; + for(; d < d1 && d < d2; d++) { + double v1 = o1.doubleValue(d), v2 = o2.doubleValue(d); + if(v1 != v1 || v2 != v2) { // Skip NaNs. + continue; + } + if(v1 != 0. || v2 != 0) { + ++union; + if(v1 == v2) { + ++intersection; + } + } + } + for(; d < d1; d++) { + if(o1.doubleValue(d) != 0) { + ++union; + } + } + for(; d < d2; d++) { + if(o2.doubleValue(d) != 0) { + ++union; + } + } + return intersection / (double) union; + } + + @Override + public DoubleDistance similarity(O o1, O o2) { + return new DoubleDistance(doubleSimilarity(o1, o2)); + } + + /** + * Test a value for null. + * + * TODO: delegate to {@link FeatureVector} instead? + * + * @param val Value + * @return true when null + */ + private static boolean isNull(Object val) { + return (val == null) || STRING_NULL.equals(val) || DOUBLE_NULL.equals(val) || INTEGER_NULL.equals(val); + } + + @Override + public DoubleDistance distance(O o1, O o2) { + return new DoubleDistance(1. - doubleSimilarity(o1, o2)); + } + + @Override + public double doubleDistance(O o1, O o2) { + return 1. - doubleSimilarity(o1, o2); + } + + @Override + public boolean isMetric() { + return true; + } + + @Override + public DoubleDistance getDistanceFactory() { + return DoubleDistance.FACTORY; + } + + @Override + public SimpleTypeInformation<? super O> getInputTypeRestriction() { + return TypeUtil.FEATURE_VECTORS; + } + + @Override + public <T extends O> DistanceSimilarityQuery<T, DoubleDistance> instantiate(Relation<T> relation) { + return new PrimitiveDistanceSimilarityQuery<>(relation, this, this); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski1SimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski1SimilarityFunction.java index 462279d5..d307bec8 100644 --- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski1SimilarityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski1SimilarityFunction.java @@ -24,9 +24,7 @@ package de.lmu.ifi.dbs.elki.distance.similarityfunction; */ import de.lmu.ifi.dbs.elki.data.NumberVector; -import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; -import de.lmu.ifi.dbs.elki.data.type.TypeUtil; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; @@ -38,11 +36,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; * M.-M. Deza and E. Deza<br /> * Dictionary of distances * </p> - * + * * @author Erich Schubert */ @Reference(authors = "M.-M. Deza and E. Deza", title = "Dictionary of distances", booktitle = "Dictionary of distances") -public class Kulczynski1SimilarityFunction extends AbstractPrimitiveSimilarityFunction<NumberVector<?>, DoubleDistance> { +public class Kulczynski1SimilarityFunction extends AbstractVectorDoubleSimilarityFunction { /** * Static instance. */ @@ -59,34 +57,10 @@ public class Kulczynski1SimilarityFunction extends AbstractPrimitiveSimilarityFu } @Override - public DoubleDistance getDistanceFactory() { - return DoubleDistance.FACTORY; - } - - @Override - public SimpleTypeInformation<? super NumberVector<?>> getInputTypeRestriction() { - return TypeUtil.NUMBER_VECTOR_FIELD; - } - - @Override - public DoubleDistance similarity(NumberVector<?> o1, NumberVector<?> o2) { - return new DoubleDistance(doubleSimilarity(o1, o2)); - } - - /** - * Compute the similarity. - * - * @param v1 First vector - * @param v2 Second vector - * @return Similarity - */ public double doubleSimilarity(NumberVector<?> v1, NumberVector<?> v2) { - final int dim1 = v1.getDimensionality(); - if (dim1 != v2.getDimensionality()) { - throw new IllegalArgumentException("Different dimensionality of FeatureVectors" + "\n first argument: " + v1.toString() + "\n second argument: " + v2.toString() + "\n" + v1.getDimensionality() + "!=" + v2.getDimensionality()); - } + final int dim = AbstractVectorDoubleDistanceFunction.dimensionality(v1, v2); double sumdiff = 0., summin = 0.; - for (int i = 0; i < dim1; i++) { + for (int i = 0; i < dim; i++) { double xi = v1.doubleValue(i), yi = v2.doubleValue(i); sumdiff += Math.abs(xi - yi); summin += Math.min(xi, yi); diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski2SimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski2SimilarityFunction.java index a59010b8..8c678601 100644 --- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski2SimilarityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski2SimilarityFunction.java @@ -24,9 +24,7 @@ package de.lmu.ifi.dbs.elki.distance.similarityfunction; */ import de.lmu.ifi.dbs.elki.data.NumberVector; -import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; -import de.lmu.ifi.dbs.elki.data.type.TypeUtil; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; @@ -44,7 +42,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; * @author Erich Schubert */ @Reference(authors = "M.-M. Deza and E. Deza", title = "Dictionary of distances", booktitle = "Dictionary of distances") -public class Kulczynski2SimilarityFunction extends AbstractPrimitiveSimilarityFunction<NumberVector<?>, DoubleDistance> { +public class Kulczynski2SimilarityFunction extends AbstractVectorDoubleSimilarityFunction { /** * Static instance. */ @@ -61,40 +59,16 @@ public class Kulczynski2SimilarityFunction extends AbstractPrimitiveSimilarityFu } @Override - public DoubleDistance getDistanceFactory() { - return DoubleDistance.FACTORY; - } - - @Override - public SimpleTypeInformation<? super NumberVector<?>> getInputTypeRestriction() { - return TypeUtil.NUMBER_VECTOR_FIELD; - } - - @Override - public DoubleDistance similarity(NumberVector<?> o1, NumberVector<?> o2) { - return new DoubleDistance(doubleSimilarity(o1, o2)); - } - - /** - * Compute the similarity. - * - * @param v1 First vector - * @param v2 Second vector - * @return Similarity - */ public double doubleSimilarity(NumberVector<?> v1, NumberVector<?> v2) { - final int dim1 = v1.getDimensionality(); - if (dim1 != v2.getDimensionality()) { - throw new IllegalArgumentException("Different dimensionality of FeatureVectors" + "\n first argument: " + v1.toString() + "\n second argument: " + v2.toString() + "\n" + v1.getDimensionality() + "!=" + v2.getDimensionality()); - } + final int dim = AbstractVectorDoubleDistanceFunction.dimensionality(v1, v2); double sumx = 0., sumy = 0., summin = 0.; - for (int i = 0; i < dim1; i++) { + for (int i = 0; i < dim; i++) { double xi = v1.doubleValue(i), yi = v2.doubleValue(i); sumx += xi; sumy += yi; summin += Math.min(xi, yi); } - return dim1 * .5 * (dim1 / sumx + dim1 / sumy) * summin; + return dim * .5 * (dim / sumx + dim / sumy) * summin; } /** diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedPrimitiveSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedPrimitiveSimilarityFunction.java index 6e8bd76a..83a0edfa 100644 --- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedPrimitiveSimilarityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedPrimitiveSimilarityFunction.java @@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.distance.similarityfunction; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; - /** * Marker interface for similarity functions working on primitive objects, and * limited to the 0-1 value range. @@ -32,8 +30,7 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; * @author Erich Schubert * * @param <O> Object type - * @param <D> Distance type */ -public interface NormalizedPrimitiveSimilarityFunction<O, D extends Distance<D>> extends PrimitiveSimilarityFunction<O, D>, NormalizedSimilarityFunction<O, D> { +public interface NormalizedPrimitiveSimilarityFunction<O> extends PrimitiveDoubleSimilarityFunction<O>, NormalizedSimilarityFunction<O> { // empty marker interface } diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedSimilarityFunction.java index dcfd8e74..91e94498 100644 --- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedSimilarityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedSimilarityFunction.java @@ -23,7 +23,7 @@ package de.lmu.ifi.dbs.elki.distance.similarityfunction; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; /** * Marker interface to signal that the similarity function is normalized to @@ -31,9 +31,8 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; * * @author Erich Schubert * @param <O> object type - * @param <D> distance type * */ -public interface NormalizedSimilarityFunction<O, D extends Distance<?>> extends SimilarityFunction<O, D> { +public interface NormalizedSimilarityFunction<O> extends SimilarityFunction<O, DoubleDistance> { // Empty - marker interface. } diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/PrimitiveDoubleSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/PrimitiveDoubleSimilarityFunction.java new file mode 100644 index 00000000..2d886706 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/PrimitiveDoubleSimilarityFunction.java @@ -0,0 +1,49 @@ +package de.lmu.ifi.dbs.elki.distance.similarityfunction; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; + +/** + * Interface for similarity functions that can provide a raw double value. + * + * This is for use in performance-critical situations that need to avoid the + * boxing/unboxing cost of regular distance API. + * + * @author Erich Schubert + * + * @param <O> Object type + */ +public interface PrimitiveDoubleSimilarityFunction<O> extends PrimitiveSimilarityFunction<O, DoubleDistance> { + /** + * Computes the similarity between two given Objects according to this + * similarity function. + * + * @param o1 first Object + * @param o2 second Object + * @return the similarity between two given Objects according to this + * similarity function + */ + double doubleSimilarity(O o1, O o2); +} diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/PrimitiveSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/PrimitiveSimilarityFunction.java index d4d84734..5188e9d9 100644 --- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/PrimitiveSimilarityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/PrimitiveSimilarityFunction.java @@ -38,7 +38,7 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; * @param <O> object type * @param <D> distance type */ -public interface PrimitiveSimilarityFunction<O, D extends Distance<D>> extends SimilarityFunction<O, D> { +public interface PrimitiveSimilarityFunction<O, D extends Distance<?>> extends SimilarityFunction<O, D> { /** * Computes the similarity between two given DatabaseObjects according to this * similarity function. diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/SharedNearestNeighborSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/SharedNearestNeighborSimilarityFunction.java index 5faba431..89661f13 100644 --- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/SharedNearestNeighborSimilarityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/SharedNearestNeighborSimilarityFunction.java @@ -40,7 +40,9 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz * * @author Arthur Zimek * - * @apiviz.has de.lmu.ifi.dbs.elki.index.preprocessed.snn.SharedNearestNeighborIndex.Factory + * @apiviz.has + * de.lmu.ifi.dbs.elki.index.preprocessed.snn.SharedNearestNeighborIndex + * .Factory * @apiviz.has Instance oneway - - «create» * * @param <O> object type @@ -72,17 +74,15 @@ public class SharedNearestNeighborSimilarityFunction<O> extends AbstractIndexBas int intersection = 0; DBIDIter iter1 = neighbors1.iter(); DBIDIter iter2 = neighbors2.iter(); - while(iter1.valid() && iter2.valid()) { + while (iter1.valid() && iter2.valid()) { final int comp = DBIDUtil.compare(iter1, iter2); - if(comp == 0) { + if (comp == 0) { intersection++; iter1.advance(); iter2.advance(); - } - else if(comp < 0) { + } else if (comp < 0) { iter1.advance(); - } - else // iter2 < iter1 + } else // iter2 < iter1 { iter2.advance(); } @@ -94,7 +94,7 @@ public class SharedNearestNeighborSimilarityFunction<O> extends AbstractIndexBas @Override public <T extends O> Instance<T> instantiate(Relation<T> database) { SharedNearestNeighborIndex<O> indexi = indexFactory.instantiate((Relation<O>) database); - return (Instance<T>) new Instance<>((Relation<O>) database, indexi); + return (Instance<T>) new Instance<>((Relation<O>) database, indexi, this); } /** @@ -108,13 +108,19 @@ public class SharedNearestNeighborSimilarityFunction<O> extends AbstractIndexBas */ public static class Instance<O> extends AbstractIndexBasedSimilarityFunction.Instance<O, SharedNearestNeighborIndex<O>, SetDBIDs, IntegerDistance> { /** + * Similarity function. + */ + private SharedNearestNeighborSimilarityFunction<? super O> similarityFunction; + + /** * Constructor. - * + * * @param database Database * @param preprocessor Index */ - public Instance(Relation<O> database, SharedNearestNeighborIndex<O> preprocessor) { + public Instance(Relation<O> database, SharedNearestNeighborIndex<O> preprocessor, SharedNearestNeighborSimilarityFunction<? super O> similarityFunction) { super(database, preprocessor); + this.similarityFunction = similarityFunction; } @Override @@ -128,6 +134,11 @@ public class SharedNearestNeighborSimilarityFunction<O> extends AbstractIndexBas public IntegerDistance getDistanceFactory() { return IntegerDistance.FACTORY; } + + @Override + public SimilarityFunction<? super O, IntegerDistance> getSimilarityFunction() { + return similarityFunction; + } } /** @@ -151,4 +162,4 @@ public class SharedNearestNeighborSimilarityFunction<O> extends AbstractIndexBas return new SharedNearestNeighborSimilarityFunction<>(factory); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/FooKernelFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/FooKernelFunction.java deleted file mode 100644 index d56b3f04..00000000 --- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/FooKernelFunction.java +++ /dev/null @@ -1,137 +0,0 @@ -package de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2013 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import de.lmu.ifi.dbs.elki.data.NumberVector; -import de.lmu.ifi.dbs.elki.data.type.TypeUtil; -import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation; -import de.lmu.ifi.dbs.elki.database.query.DistanceSimilarityQuery; -import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceSimilarityQuery; -import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractPrimitiveDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; -import de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; - -/** - * Provides an experimental KernelDistanceFunction for NumberVectors. Currently - * only supports 2D data and x1^2 ~ x2 correlations. - * - * @author Simon Paradies - */ -public class FooKernelFunction extends AbstractPrimitiveDistanceFunction<NumberVector<?>, DoubleDistance> implements PrimitiveSimilarityFunction<NumberVector<?>, DoubleDistance> { - /** - * The default max_degree. - */ - public static final int DEFAULT_MAX_DEGREE = 2; - - /** - * Parameter for the maximum degree - */ - public static final OptionID MAX_DEGREE_ID = new OptionID("fookernel.max_degree", "The max degree of the" + FooKernelFunction.class.getSimpleName() + ". Default: " + DEFAULT_MAX_DEGREE); - - /** - * Degree of the polynomial kernel function - */ - private int max_degree; - - /** - * Constructor. - * - * @param max_degree Maximum degree- - */ - public FooKernelFunction(int max_degree) { - super(); - this.max_degree = max_degree; - } - - /** - * Provides an experimental kernel similarity between the given two vectors. - * - * @param o1 first vector - * @param o2 second vector - * @return the experimental kernel similarity between the given two vectors as - * an instance of {@link DoubleDistance DoubleDistance}. - */ - @Override - public DoubleDistance similarity(final NumberVector<?> o1, final NumberVector<?> o2) { - if(o1.getDimensionality() != o2.getDimensionality()) { - throw new IllegalArgumentException("Different dimensionality of FeatureVectors\n first argument: " + o1.toString() + "\n second argument: " + o2.toString()); - } - double sim = 0.0; - // iterate over differently powered dimensions - for(int degree = 0; degree < max_degree; degree++) { - sim += Math.pow(o1.doubleValue(degree) * o2.doubleValue(degree), degree); - } - return new DoubleDistance(sim); - } - - @Override - public DoubleDistance distance(final NumberVector<?> fv1, final NumberVector<?> fv2) { - return new DoubleDistance(Math.sqrt(similarity(fv1, fv1).doubleValue() + similarity(fv2, fv2).doubleValue() - 2 * similarity(fv1, fv2).doubleValue())); - } - - @Override - public VectorFieldTypeInformation<? super NumberVector<?>> getInputTypeRestriction() { - return TypeUtil.NUMBER_VECTOR_FIELD; - } - - @Override - public DoubleDistance getDistanceFactory() { - return DoubleDistance.FACTORY; - } - - @Override - public <T extends NumberVector<?>> DistanceSimilarityQuery<T, DoubleDistance> instantiate(Relation<T> database) { - return new PrimitiveDistanceSimilarityQuery<>(database, this, this); - } - - /** - * Parameterization class. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - public static class Parameterizer extends AbstractParameterizer { - protected int max_degree = 0; - - @Override - protected void makeOptions(Parameterization config) { - super.makeOptions(config); - final IntParameter max_degreeP = new IntParameter(MAX_DEGREE_ID, DEFAULT_MAX_DEGREE); - if(config.grab(max_degreeP)) { - max_degree = max_degreeP.getValue(); - } - } - - @Override - protected FooKernelFunction makeInstance() { - return new FooKernelFunction(max_degree); - } - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/KernelMatrix.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/KernelMatrix.java index ed3731ba..39fb97a5 100644 --- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/KernelMatrix.java +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/KernelMatrix.java @@ -23,16 +23,18 @@ package de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Collection; -import java.util.Iterator; -import java.util.List; import java.util.logging.Level; -import de.lmu.ifi.dbs.elki.data.FeatureVector; -import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRange; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction; import de.lmu.ifi.dbs.elki.logging.LoggingUtil; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; @@ -43,52 +45,146 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; * * @author Simon Paradies * - * @apiviz.uses de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction + * @apiviz.uses PrimitiveSimilarityFunction */ public class KernelMatrix { /** * The kernel matrix */ Matrix kernel; - + + /** + * Static mapping from DBIDs to indexes. + */ + DBIDMap idmap; + /** - * Wraps the matrixArray in a KernelMatrix + * Map a DBID to its offset * - * @param matrixArray two dimensional double array + * TODO: move to shared code. + * + * @author Erich Schubert + * @apiviz.exclude */ - public KernelMatrix(final double[][] matrixArray) { - kernel = new Matrix(matrixArray); + private static interface DBIDMap { + /** + * Get the offset of the DBID in the range. + * + * @param id ID + * @return Offset + */ + int getOffset(DBIDRef id); + + /** + * Get an array iterator, for scanning. + * + * @return Array iterator + */ + DBIDArrayIter iter(); + } + + /** + * Map a DBID to an integer offset, DBIDRange version. + * + * @author Erich Schubert + * @apiviz.exclude + */ + private static class RangeMap implements DBIDMap { + DBIDRange range; + + public RangeMap(DBIDRange range) { + super(); + this.range = range; + } + + @Override + public int getOffset(DBIDRef id) { + return range.getOffset(id); + } + + @Override + public DBIDArrayIter iter() { + return range.iter(); + } + } + + /** + * Map a DBID to an integer offset, Version to support arbitrary DBIDs. + * + * @author Erich Schubert + * @apiviz.exclude + */ + private static class SortedArrayMap implements DBIDMap { + ArrayModifiableDBIDs ids; + + public SortedArrayMap(DBIDs ids) { + super(); + this.ids = DBIDUtil.newArray(ids); + this.ids.sort(); + } + + @Override + public int getOffset(DBIDRef id) { + return ids.binarySearch(id); + } + + @Override + public DBIDArrayIter iter() { + return ids.iter(); + } } /** * Provides a new kernel matrix. * * @param kernelFunction the kernel function used to compute the kernel matrix - * @param database the database for which the kernel matrix is computed - * - * @deprecated ID mapping is not reliable! + * @param relation the database that holds the objects + * @param ids the IDs of those objects for which the kernel matrix is computed */ - @Deprecated - public <O extends FeatureVector<?>> KernelMatrix(final PrimitiveSimilarityFunction<? super O, DoubleDistance> kernelFunction, final Relation<? extends O> database) { - this(kernelFunction, database, DBIDUtil.ensureArray(database.getDBIDs())); + public <O, D extends NumberDistance<?, ?>> KernelMatrix(PrimitiveSimilarityFunction<? super O, D> kernelFunction, final Relation<? extends O> relation, final DBIDs ids) { + LoggingUtil.logExpensive(Level.FINER, "Computing kernel matrix"); + this.kernel = new Matrix(ids.size(), ids.size()); + if(ids instanceof DBIDRange) { + this.idmap = new RangeMap((DBIDRange) ids); + } + else { + this.idmap = new SortedArrayMap(ids); + } + + DBIDArrayIter i1 = this.idmap.iter(), i2 = this.idmap.iter(); + for(i1.seek(0); i1.valid(); i1.advance()) { + O o1 = relation.get(i1); + for(i2.seek(i1.getOffset()); i2.valid(); i2.advance()) { + double value = kernelFunction.similarity(o1, relation.get(i2)).doubleValue(); + kernel.set(i1.getOffset(), i2.getOffset(), value); + kernel.set(i2.getOffset(), i1.getOffset(), value); + } + } } /** * Provides a new kernel matrix. * * @param kernelFunction the kernel function used to compute the kernel matrix - * @param database the database that holds the objects + * @param relation the database that holds the objects * @param ids the IDs of those objects for which the kernel matrix is computed */ - public <O extends FeatureVector<?>> KernelMatrix(final PrimitiveSimilarityFunction<? super O, DoubleDistance> kernelFunction, final Relation<? extends O> database, final ArrayDBIDs ids) { + public <O, D extends NumberDistance<?, ?>> KernelMatrix(SimilarityQuery<? super O, D> kernelFunction, final Relation<? extends O> relation, final DBIDs ids) { LoggingUtil.logExpensive(Level.FINER, "Computing kernel matrix"); kernel = new Matrix(ids.size(), ids.size()); - double value; - for(int idx = 0; idx < ids.size(); idx++) { - for(int idy = idx; idy < ids.size(); idy++) { - value = kernelFunction.similarity(database.get(ids.get(idx)), database.get(ids.get(idy))).doubleValue(); - kernel.set(idx, idy, value); - kernel.set(idy, idx, value); + if(ids instanceof DBIDRange) { + this.idmap = new RangeMap((DBIDRange) ids); + } + else { + this.idmap = new SortedArrayMap(ids); + } + DBIDArrayIter i1 = idmap.iter(), i2 = idmap.iter(); + for(i1.seek(0); i1.valid(); i1.advance()) { + O o1 = relation.get(i1); + for(i2.seek(i1.getOffset()); i2.valid(); i2.advance()) { + double value = kernelFunction.similarity(o1, i2).doubleValue(); + kernel.set(i1.getOffset(), i2.getOffset(), value); + kernel.set(i2.getOffset(), i1.getOffset(), value); } } } @@ -109,8 +205,7 @@ public class KernelMatrix { * @param o2 second ObjectID * @return the distance between the two objects */ - // FIXME: really use objectids! - public double getDistance(final int o1, final int o2) { + public double getDistance(final DBIDRef o1, final DBIDRef o2) { return Math.sqrt(getSquaredDistance(o1, o2)); } @@ -124,40 +219,32 @@ public class KernelMatrix { } /** - * Returns the kernel value of object o1 and object o2 - * - * @param o1 ID of first object - * @param o2 ID of second object - * @return the kernel value of object o1 and object o2 - */ - public double getSimilarity(final int o1, final int o2) { - return kernel.get(o1 - 1, o2 - 1); // correct index shifts. - } - - /** * Returns the squared kernel distance between the two specified objects. * - * @param o1 first ObjectID - * @param o2 second ObjectID + * @param id1 first ObjectID + * @param id2 second ObjectID * @return the distance between the two objects */ - public double getSquaredDistance(final int o1, final int o2) { - return getSimilarity(o1, o1) + getSimilarity(o2, o2) - 2 * getSimilarity(o1, o2); + public double getSquaredDistance(final DBIDRef id1, final DBIDRef id2) { + final int o1 = idmap.getOffset(id1), o2 = idmap.getOffset(id2); + return kernel.get(o1, o1) + kernel.get(o2, o2) - 2 * kernel.get(o1, o2); } /** * Returns the ith kernel matrix column for all objects in ids * - * @param i the column which should be returned + * @param i1 the column which should be returned * @param ids the objects * @return the ith kernel matrix column for all objects in ids */ - public Matrix getSubColumn(final int i, final List<Integer> ids) { + @Deprecated + public Matrix getSubColumn(final DBIDRef i1, final DBIDs ids) { final int[] ID = new int[1]; - ID[0] = i - 1; // correct index shift + ID[0] = idmap.getOffset(i1); final int[] IDs = new int[ids.size()]; - for(int x = 0; x < IDs.length; x++) { - IDs[x] = ids.get(x) - 1; // correct index shift + int i = 0; + for(DBIDIter it = ids.iter(); it.valid(); it.advance(), i++) { + IDs[i] = idmap.getOffset(it); } return kernel.getMatrix(IDs, ID); } @@ -168,17 +255,17 @@ public class KernelMatrix { * @param ids the objects * @return a sub kernel matrix for all objects in ids. */ - public Matrix getSubMatrix(final Collection<Integer> ids) { + public Matrix getSubMatrix(DBIDs ids) { final int[] IDs = new int[ids.size()]; int i = 0; - for(Iterator<Integer> it = ids.iterator(); it.hasNext(); i++) { - IDs[i] = it.next() - 1; // correct index shift + for(DBIDIter it = ids.iter(); it.valid(); it.advance(), i++) { + IDs[i] = idmap.getOffset(it); } return kernel.getMatrix(IDs, IDs); } /** - * Centers the matrix in feature space according to Smola et. Schoelkopf, + * Centers the matrix in feature space according to Smola et Schoelkopf, * Learning with Kernels p. 431 Alters the input matrix. If you still need the * original matrix, use * <code>centeredMatrix = centerKernelMatrix(uncenteredMatrix.copy()) {</code> @@ -187,6 +274,7 @@ public class KernelMatrix { * @return centered matrix (for convenience) */ public static Matrix centerMatrix(final Matrix matrix) { + // FIXME: implement more efficiently. Maybe in matrix class itself? final Matrix normalizingMatrix = new Matrix(matrix.getRowDimensionality(), matrix.getColumnDimensionality(), 1.0 / matrix.getColumnDimensionality()); return matrix.minusEquals(normalizingMatrix.times(matrix)).minusEquals(matrix.times(normalizingMatrix)).plusEquals(normalizingMatrix.times(matrix).times(normalizingMatrix)); } @@ -208,4 +296,15 @@ public class KernelMatrix { public static Matrix centerKernelMatrix(final KernelMatrix kernelMatrix) { return centerMatrix(kernelMatrix.getKernel()); } + + /** + * Get the kernel similarity for the given objects. + * + * @param id1 First object + * @param id2 Second object + * @return Similarity. + */ + public double getSimilarity(DBIDRef id1, DBIDRef id2) { + return kernel.get(idmap.getOffset(id1), idmap.getOffset(id2)); + } } diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/LaplaceKernelFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/LaplaceKernelFunction.java new file mode 100644 index 00000000..2a5f6028 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/LaplaceKernelFunction.java @@ -0,0 +1,100 @@ +package de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.similarityfunction.AbstractVectorDoubleSimilarityFunction; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; + +/** + * Provides the laplace / exponential radial basis function kernel. + * + * @author Erich Schubert + */ +public class LaplaceKernelFunction extends AbstractVectorDoubleSimilarityFunction { + /** + * Scaling factor mgamma. (= - 1/sigma) + */ + private final double mgamma; + + /** + * Constructor. + * + * @param sigma Scaling parameter sigma (as in laplace kernel) + */ + public LaplaceKernelFunction(double sigma) { + super(); + this.mgamma = -.5 / (sigma * sigma); + } + + @Override + public double doubleSimilarity(NumberVector<?> o1, NumberVector<?> o2) { + final int dim = AbstractVectorDoubleDistanceFunction.dimensionality(o1, o2); + double sim = 0.; + for(int i = 0; i < dim; i++) { + final double v = o1.doubleValue(i) - o2.doubleValue(i); + sim += v * v; + } + return Math.exp(mgamma * Math.sqrt(sim)); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * Sigma parameter: standard deviation. + */ + public static final OptionID SIGMA_ID = new OptionID("kernel.laplace.sigma", "Standard deviation of the laplace RBF kernel."); + + /** + * Sigma parameter + */ + protected double sigma = 1.; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + final DoubleParameter sigmaP = new DoubleParameter(SIGMA_ID, 1.); + sigmaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE); + if(config.grab(sigmaP)) { + sigma = sigmaP.doubleValue(); + } + } + + @Override + protected LaplaceKernelFunction makeInstance() { + return new LaplaceKernelFunction(sigma); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/LinearKernelFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/LinearKernelFunction.java index 0ae44b29..a86ad55d 100644 --- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/LinearKernelFunction.java +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/LinearKernelFunction.java @@ -24,74 +24,58 @@ package de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel; */ import de.lmu.ifi.dbs.elki.data.NumberVector; -import de.lmu.ifi.dbs.elki.data.type.TypeUtil; -import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation; -import de.lmu.ifi.dbs.elki.database.query.DistanceSimilarityQuery; -import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceSimilarityQuery; -import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; -import de.lmu.ifi.dbs.elki.distance.similarityfunction.AbstractPrimitiveSimilarityFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; /** * Provides a linear Kernel function that computes a similarity between the two * feature vectors V1 and V2 defined by V1^T*V2. * + * Note: this is effectively equivalent to using + * {@link de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction} + * * @author Simon Paradies - * @param <O> vector type */ -public class LinearKernelFunction<O extends NumberVector<?>> extends AbstractPrimitiveSimilarityFunction<O, DoubleDistance> implements PrimitiveDistanceFunction<O, DoubleDistance> { +public class LinearKernelFunction extends PolynomialKernelFunction { /** - * Provides a linear Kernel function that computes a similarity between the - * two vectors V1 and V2 defined by V1^T*V2. + * Static instance. */ - public LinearKernelFunction() { - super(); - } + public static final LinearKernelFunction STATIC = new LinearKernelFunction(); /** - * Provides a linear Kernel function that computes a similarity between the - * two feature vectors V1 and V2 definded by V1^T*V2. - * - * @param o1 first feature vector - * @param o2 second feature vector - * @return the linear kernel similarity between the given two vectors as an - * instance of {@link DoubleDistance DoubleDistance}. + * Linear kernel. Use static instance {@link #STATIC}! */ - @Override - public DoubleDistance similarity(final O o1, final O o2) { - if(o1.getDimensionality() != o2.getDimensionality()) { - throw new IllegalArgumentException("Different dimensionality of Feature-Vectors" + "\n first argument: " + o1.toString() + "\n second argument: " + o2.toString()); - } - double sim = 0; - for(int i = 0; i < o1.getDimensionality(); i++) { - sim += o1.doubleValue(i) * o2.doubleValue(i); - } - return new DoubleDistance(sim); - } - - @Override - public DoubleDistance distance(final O fv1, final O fv2) { - return new DoubleDistance(Math.sqrt(similarity(fv1, fv1).doubleValue() + similarity(fv2, fv2).doubleValue() - 2 * similarity(fv1, fv2).doubleValue())); - } - - @Override - public VectorFieldTypeInformation<? super O> getInputTypeRestriction() { - return TypeUtil.NUMBER_VECTOR_FIELD; + @Deprecated + public LinearKernelFunction() { + super(1, 0.); } @Override - public DoubleDistance getDistanceFactory() { - return DoubleDistance.FACTORY; + public double doubleSimilarity(final NumberVector<?> o1, final NumberVector<?> o2) { + final int dim = AbstractVectorDoubleDistanceFunction.dimensionality(o1, o2); + double sim = 0.; + for (int i = 0; i < dim; i++) { + sim += o1.doubleValue(i) * o2.doubleValue(i); + } + return sim; } @Override - public boolean isMetric() { - return false; + public double doubleDistance(final NumberVector<?> fv1, final NumberVector<?> fv2) { + return Math.sqrt(doubleSimilarity(fv1, fv1) + doubleSimilarity(fv2, fv2) - 2 * doubleSimilarity(fv1, fv2)); } - @Override - public <T extends O> DistanceSimilarityQuery<T, DoubleDistance> instantiate(Relation<T> database) { - return new PrimitiveDistanceSimilarityQuery<>(database, this, this); + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LinearKernelFunction makeInstance() { + return LinearKernelFunction.STATIC; + } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/PolynomialKernelFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/PolynomialKernelFunction.java index 07963962..2b25ba19 100644 --- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/PolynomialKernelFunction.java +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/PolynomialKernelFunction.java @@ -24,18 +24,20 @@ package de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel; */ import de.lmu.ifi.dbs.elki.data.NumberVector; -import de.lmu.ifi.dbs.elki.data.type.TypeUtil; -import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation; import de.lmu.ifi.dbs.elki.database.query.DistanceSimilarityQuery; import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceSimilarityQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractPrimitiveDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; -import de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction; +import de.lmu.ifi.dbs.elki.distance.similarityfunction.AbstractVectorDoubleSimilarityFunction; +import de.lmu.ifi.dbs.elki.math.MathUtil; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; /** * Provides a polynomial Kernel function that computes a similarity between the @@ -43,66 +45,66 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; * * @author Simon Paradies */ -public class PolynomialKernelFunction extends AbstractPrimitiveDistanceFunction<NumberVector<?>, DoubleDistance> implements PrimitiveSimilarityFunction<NumberVector<?>, DoubleDistance> { +public class PolynomialKernelFunction extends AbstractVectorDoubleSimilarityFunction implements PrimitiveDoubleDistanceFunction<NumberVector<?>> { /** * The default degree. */ - public static final double DEFAULT_DEGREE = 2.0; + public static final int DEFAULT_DEGREE = 2; /** - * Degree parameter. + * Degree of the polynomial kernel function. */ - public static final OptionID DEGREE_ID = new OptionID("kernel.degree", "The degree of the polynomial kernel function. Default: " + DEFAULT_DEGREE); + private final int degree; /** - * Degree of the polynomial kernel function. + * Bias of the similarity function. */ - private double degree = 0.0; + private final double bias; /** * Constructor. * * @param degree Kernel degree + * @param bias Bias offset */ - public PolynomialKernelFunction(double degree) { + public PolynomialKernelFunction(int degree, double bias) { super(); this.degree = degree; + this.bias = bias; } /** - * Provides the linear kernel similarity between the given two vectors. + * Constructor. * - * @param o1 first vector - * @param o2 second vector - * @return the linear kernel similarity between the given two vectors as an - * instance of {@link DoubleDistance DoubleDistance}. + * @param degree Kernel degree */ - @Override - public DoubleDistance similarity(NumberVector<?> o1, NumberVector<?> o2) { - if(o1.getDimensionality() != o2.getDimensionality()) { - throw new IllegalArgumentException("Different dimensionality of Feature-Vectors" + "\n first argument: " + o1.toString() + "\n second argument: " + o2.toString()); - } + public PolynomialKernelFunction(int degree) { + this(degree, 0.); + } - double sim = 0; - for(int i = 0; i < o1.getDimensionality(); i++) { + @Override + public double doubleSimilarity(NumberVector<?> o1, NumberVector<?> o2) { + final int dim = AbstractVectorDoubleDistanceFunction.dimensionality(o1, o2); + double sim = 0.; + for(int i = 0; i < dim; i++) { sim += o1.doubleValue(i) * o2.doubleValue(i); } - return new DoubleDistance(Math.pow(sim, degree)); + return MathUtil.powi(sim + bias, degree); } @Override public DoubleDistance distance(final NumberVector<?> fv1, final NumberVector<?> fv2) { - return new DoubleDistance(Math.sqrt(similarity(fv1, fv1).doubleValue() + similarity(fv2, fv2).doubleValue() - 2 * similarity(fv1, fv2).doubleValue())); + return new DoubleDistance(doubleDistance(fv1, fv2)); } @Override - public VectorFieldTypeInformation<? super NumberVector<?>> getInputTypeRestriction() { - return TypeUtil.NUMBER_VECTOR_FIELD; + public boolean isMetric() { + return true; } @Override - public DoubleDistance getDistanceFactory() { - return DoubleDistance.FACTORY; + public double doubleDistance(NumberVector<?> fv1, NumberVector<?> fv2) { + return Math.sqrt(doubleSimilarity(fv1, fv1) + doubleSimilarity(fv2, fv2) - 2 * doubleSimilarity(fv1, fv2)); } @Override @@ -119,22 +121,46 @@ public class PolynomialKernelFunction extends AbstractPrimitiveDistanceFunction< */ public static class Parameterizer extends AbstractParameterizer { /** + * Degree parameter. + */ + public static final OptionID DEGREE_ID = new OptionID("kernel.polynomial.degree", "The degree of the polynomial kernel function. Default: " + DEFAULT_DEGREE); + + /** + * Bias parameter. + */ + public static final OptionID BIAS_ID = new OptionID("kernel.polynomial.bias", "The bias of the polynomial kernel, a constant that is added to the scalar product."); + + /** * Degree of the polynomial kernel function. */ - protected double degree = 0; + protected int degree = 0; + + /** + * Bias parameter. + */ + protected double bias = 0.; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final DoubleParameter degreeP = new DoubleParameter(DEGREE_ID, DEFAULT_DEGREE); + final IntParameter degreeP = new IntParameter(DEGREE_ID, DEFAULT_DEGREE); + degreeP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); if(config.grab(degreeP)) { - degree = degreeP.getValue(); + degree = degreeP.intValue(); + } + final DoubleParameter biasP = new DoubleParameter(BIAS_ID); + biasP.setOptional(true); + if(config.grab(biasP)) { + bias = biasP.doubleValue(); } } @Override protected PolynomialKernelFunction makeInstance() { - return new PolynomialKernelFunction(degree); + if(degree == 1 && (bias == 0.)) { + return LinearKernelFunction.STATIC; + } + return new PolynomialKernelFunction(degree, bias); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/RadialBasisFunctionKernelFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/RadialBasisFunctionKernelFunction.java new file mode 100644 index 00000000..a7613a78 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/RadialBasisFunctionKernelFunction.java @@ -0,0 +1,102 @@ +package de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.similarityfunction.AbstractVectorDoubleSimilarityFunction; +import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; + +/** + * Provides the Gaussian radial basis function kernel (RBF Kernel). + * + * @author Erich Schubert + */ +@Alias({ "rbf" }) +public class RadialBasisFunctionKernelFunction extends AbstractVectorDoubleSimilarityFunction { + /** + * Scaling factor gamma. (= - 1/(2sigma^2)) + */ + private final double gamma; + + /** + * Constructor. + * + * @param sigma Scaling parameter sigma + */ + public RadialBasisFunctionKernelFunction(double sigma) { + super(); + this.gamma = -.5 / (sigma * sigma); + } + + @Override + public double doubleSimilarity(NumberVector<?> o1, NumberVector<?> o2) { + final int dim = AbstractVectorDoubleDistanceFunction.dimensionality(o1, o2); + double sim = 0.; + for(int i = 0; i < dim; i++) { + final double v = o1.doubleValue(i) - o2.doubleValue(i); + sim += v * v; + } + return Math.exp(gamma * sim); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * Sigma parameter: standard deviation. + */ + public static final OptionID SIGMA_ID = new OptionID("kernel.rbf.sigma", "Standard deviation of the Gaussian RBF kernel."); + + /** + * Sigma parameter + */ + protected double sigma = 1.; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + final DoubleParameter sigmaP = new DoubleParameter(SIGMA_ID, 1.); + sigmaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE); + if(config.grab(sigmaP)) { + sigma = sigmaP.doubleValue(); + } + } + + @Override + protected RadialBasisFunctionKernelFunction makeInstance() { + return new RadialBasisFunctionKernelFunction(sigma); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/RationalQuadraticKernelFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/RationalQuadraticKernelFunction.java new file mode 100644 index 00000000..0a3dc45c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/RationalQuadraticKernelFunction.java @@ -0,0 +1,101 @@ +package de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.similarityfunction.AbstractVectorDoubleSimilarityFunction; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; + +/** + * Provides the rational quadratic kernel, a less computational approximation of + * the Gaussian RBF kerne ({@link RadialBasisFunctionKernelFunction}). + * + * @author Erich Schubert + */ +public class RationalQuadraticKernelFunction extends AbstractVectorDoubleSimilarityFunction { + /** + * Constant term c. + */ + private final double c; + + /** + * Constructor. + * + * @param c Constant term c. + */ + public RationalQuadraticKernelFunction(double c) { + super(); + this.c = c; + } + + @Override + public double doubleSimilarity(NumberVector<?> o1, NumberVector<?> o2) { + final int dim = AbstractVectorDoubleDistanceFunction.dimensionality(o1, o2); + double sim = 0.; + for(int i = 0; i < dim; i++) { + final double v = o1.doubleValue(i) - o2.doubleValue(i); + sim += v * v; + } + return 1. - sim / (sim + c); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * C parameter + */ + public static final OptionID C_ID = new OptionID("kernel.rationalquadratic.c", "Constant term in the rational quadratic kernel."); + + /** + * C parameter + */ + protected double c = 1.; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + final DoubleParameter cP = new DoubleParameter(C_ID, 1.); + cP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE); + if(config.grab(cP)) { + c = cP.doubleValue(); + } + } + + @Override + protected RationalQuadraticKernelFunction makeInstance() { + return new RationalQuadraticKernelFunction(c); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/SigmoidKernelFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/SigmoidKernelFunction.java new file mode 100644 index 00000000..a88225f9 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/SigmoidKernelFunction.java @@ -0,0 +1,110 @@ +package de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.similarityfunction.AbstractVectorDoubleSimilarityFunction; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; + +/** + * Sigmoid kernel function (aka: hyperbolic tangent kernel, multilayer + * perceptron MLP kernel). + * + * @author Erich Schubert + */ +public class SigmoidKernelFunction extends AbstractVectorDoubleSimilarityFunction { + /** + * Scaling factor c, bias theta + */ + private final double c, theta; + + /** + * Constructor. + * + * @param c Scaling factor c. + * @param theta Bias parameter theta. + */ + public SigmoidKernelFunction(double c, double theta) { + super(); + this.c = c; + this.theta = theta; + } + + @Override + public double doubleSimilarity(NumberVector<?> o1, NumberVector<?> o2) { + final int dim = AbstractVectorDoubleDistanceFunction.dimensionality(o1, o2); + double sim = 0.; + for (int i = 0; i < dim; i++) { + final double v = o1.doubleValue(i) * o2.doubleValue(i); + sim += v; + } + return Math.tanh(c * sim + theta); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * C parameter: scaling + */ + public static final OptionID C_ID = new OptionID("kernel.sigmoid.c", "Sigmoid c parameter (scaling)."); + + /** + * Theta parameter: bias + */ + public static final OptionID THETA_ID = new OptionID("kernel.sigmoid.theta", "Sigmoid theta parameter (bias)."); + + /** + * C parameter, theta parameter + */ + protected double c = 1., theta = 0.; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + final DoubleParameter cP = new DoubleParameter(C_ID, 1.); + if (config.grab(cP)) { + c = cP.doubleValue(); + } + final DoubleParameter thetaP = new DoubleParameter(THETA_ID, 0.); + if (config.grab(thetaP)) { + theta = thetaP.doubleValue(); + } + } + + @Override + protected SigmoidKernelFunction makeInstance() { + return new SigmoidKernelFunction(c, theta); + } + } +} |