package de.lmu.ifi.dbs.elki.distance.distancefunction.probabilistic; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.query.DistanceSimilarityQuery; import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceSimilarityQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractNumberVectorDistanceFunction; import de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction; import de.lmu.ifi.dbs.elki.math.MathUtil; import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; /** * Hellinger kernel / Hellinger distance are used with SIFT vectors, and also * known as Bhattacharyya distance / coefficient. * * This distance is appropriate for histograms, and is equal to A) L1 * normalizing the vectors and then B) using Euclidean distance. As this is * usually much faster, this is the recommended way of handling such data. * * Reference: *

* E. Hellinger
* Neue Begründung der Theorie quadratischer Formen von unendlichvielen * Veränderlichen
* Journal für die reine und angewandte Mathematik *

* * TODO: support acceleration for sparse vectors * * @author Erich Schubert */ @Reference(authors = "E. Hellinger", // title = "Neue Begründung der Theorie quadratischer Formen von unendlichvielen Veränderlichen", // booktitle = "Journal für die reine und angewandte Mathematik ", // url = "http://resolver.sub.uni-goettingen.de/purl?GDZPPN002166941") @Alias({ "hellinger", "bhattacharyya" }) public class HellingerDistanceFunction extends AbstractNumberVectorDistanceFunction implements PrimitiveSimilarityFunction { /** * Static instance. */ public static final HellingerDistanceFunction STATIC = new HellingerDistanceFunction(); /** * Hellinger kernel. Use static instance {@link #STATIC}! */ @Deprecated public HellingerDistanceFunction() { super(); } @Override public double distance(final NumberVector fv1, final NumberVector fv2) { final int dim1 = fv1.getDimensionality(), dim2 = fv2.getDimensionality(); final int mindim = (dim1 < dim2) ? dim1 : dim2; double agg = 0.; for(int d = 0; d < mindim; d++) { final double v = Math.sqrt(fv1.doubleValue(d)) - Math.sqrt(fv2.doubleValue(d)); agg += v * v; } for(int d = mindim; d < dim1; d++) { agg += Math.abs(fv1.doubleValue(d)); } for(int d = mindim; d < dim2; d++) { agg += Math.abs(fv2.doubleValue(d)); } return MathUtil.SQRTHALF * Math.sqrt(agg); } @Override public double similarity(final NumberVector o1, final NumberVector o2) { // TODO: accelerate sparse! final int dim1 = o1.getDimensionality(), dim2 = o2.getDimensionality(); final int mindim = (dim1 < dim2) ? dim1 : dim2; double agg = 0.; for(int d = 0; d < mindim; d++) { agg += Math.sqrt(o1.doubleValue(d) * o2.doubleValue(d)); } return agg; } @Override public boolean isMetric() { return true; // as this equals Euclidean in sqrt space } @Override public DistanceSimilarityQuery instantiate(Relation database) { return new PrimitiveDistanceSimilarityQuery<>(database, this, this); } @Override public SimpleTypeInformation getInputTypeRestriction() { return TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH; } /** * Parameterization class. * * @author Erich Schubert * * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { @Override protected HellingerDistanceFunction makeInstance() { return HellingerDistanceFunction.STATIC; } } }