summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise')
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/HellingerHistogramNormalization.java97
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/InstanceMeanVarianceNormalization.java159
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/InstanceMinMaxNormalization.java177
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/LengthNormalization.java106
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/Log1PlusNormalization.java119
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/package-info.java27
6 files changed, 685 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/HellingerHistogramNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/HellingerHistogramNormalization.java
new file mode 100644
index 00000000..b2da96a9
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/HellingerHistogramNormalization.java
@@ -0,0 +1,97 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.normalization.instancewise;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.datasource.filter.normalization.AbstractStreamNormalization;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Normalize histograms by scaling them to L1 norm 1, then taking the square
+ * root in each attribute.
+ *
+ * Using Euclidean distance (linear kernel) and this transformation is the same
+ * as using Hellinger distance:
+ * {@link de.lmu.ifi.dbs.elki.distance.distancefunction.probabilistic.HellingerDistanceFunction}
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> vector type
+ */
+@Alias({ "de.lmu.ifi.dbs.elki.datasource.filter.normalization.HellingerHistogramNormalization" })
+public class HellingerHistogramNormalization<V extends NumberVector> extends AbstractStreamNormalization<V> {
+ /**
+ * Static instance.
+ */
+ public static final HellingerHistogramNormalization<NumberVector> STATIC = new HellingerHistogramNormalization<>();
+
+ /**
+ * Constructor.
+ */
+ public HellingerHistogramNormalization() {
+ super();
+ }
+
+ @Override
+ protected V filterSingleObject(V featureVector) {
+ double[] data = new double[featureVector.getDimensionality()];
+ double sum = 0.;
+ for(int d = 0; d < data.length; ++d) {
+ data[d] = featureVector.doubleValue(d);
+ data[d] = data[d] > 0 ? data[d] : -data[d];
+ sum += data[d];
+ }
+ // Normalize and sqrt:
+ if(sum > 0.) {
+ for(int d = 0; d < data.length; ++d) {
+ if(data[d] > 0) {
+ data[d] = Math.sqrt(data[d] / sum);
+ }
+ }
+ }
+ return factory.newNumberVector(data);
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
+ return TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected HellingerHistogramNormalization<NumberVector> makeInstance() {
+ return STATIC;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/InstanceMeanVarianceNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/InstanceMeanVarianceNormalization.java
new file mode 100644
index 00000000..05485909
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/InstanceMeanVarianceNormalization.java
@@ -0,0 +1,159 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.normalization.instancewise;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.data.type.VectorTypeInformation;
+import de.lmu.ifi.dbs.elki.datasource.filter.normalization.AbstractStreamNormalization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Normalize vectors such that they have zero mean and unit variance.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> vector type
+ */
+public class InstanceMeanVarianceNormalization<V extends NumberVector> extends AbstractStreamNormalization<V> {
+ /**
+ * Multiplicity of the vector.
+ */
+ private int multiplicity;
+
+ /**
+ * Constructor.
+ */
+ public InstanceMeanVarianceNormalization() {
+ super();
+ }
+
+ @Override
+ protected V filterSingleObject(V featureVector) {
+ double[] raw = featureVector.getColumnVector().getArrayRef();
+ if(raw.length == 0) {
+ return factory.newNumberVector(new double[] {});
+ }
+ if(raw.length == 1) {
+ // Constant, but preserve NaNs
+ return factory.newNumberVector(new double[] { raw[0] == raw[0] ? 0. : Double.NaN });
+ }
+ // Multivariate codepath:
+ if(multiplicity > 1) {
+ assert (raw.length % multiplicity == 0) : "Vector length is not divisible by multiplicity?";
+ return factory.newNumberVector(multivariateStandardization(raw));
+ }
+ return factory.newNumberVector(univariateStandardization(raw));
+ }
+
+ protected double[] univariateStandardization(double[] raw) {
+ // Two pass normalization is numerically most stable,
+ // And Java should optimize this well enough.
+ double sum = 0.;
+ for(int i = 0; i < raw.length; ++i) {
+ final double v = raw[i];
+ if(v != v) { // NaN guard
+ continue;
+ }
+ sum += v;
+ }
+ final double mean = sum / raw.length;
+ double ssum = 0.;
+ for(int i = 0; i < raw.length; ++i) {
+ double v = raw[i] - mean;
+ if(v != v) {
+ continue;
+ }
+ ssum += v * v;
+ }
+ final double std = Math.sqrt(ssum) / (raw.length - 1);
+ if(std > 0.) {
+ for(int i = 0; i < raw.length; ++i) {
+ raw[i] = (raw[i] - mean) / std;
+ }
+ }
+ return raw;
+ }
+
+ protected double[] multivariateStandardization(double[] raw) {
+ final int len = raw.length / multiplicity;
+ if(len <= 1) {
+ return raw;
+ }
+ // Two pass normalization is numerically most stable,
+ // And Java should optimize this well enough.
+ double[] mean = new double[multiplicity];
+ for(int i = 0, j = 0; i < raw.length; ++i, j = ++j % multiplicity) {
+ final double v = raw[i];
+ if(v != v) { // NaN guard
+ continue;
+ }
+ mean[j] += v;
+ }
+ for(int j = 0; j < multiplicity; ++j) {
+ mean[j] /= len;
+ }
+ double[] std = new double[multiplicity];
+ for(int i = 0, j = 0; i < raw.length; ++i, j = ++j % multiplicity) {
+ double v = raw[i] - mean[j];
+ if(v != v) {
+ continue;
+ }
+ std[j] += v * v;
+ }
+ for(int j = 0; j < multiplicity; ++j) {
+ std[j] = std[j] > 0. ? Math.sqrt(std[j]) / (len - 1) : 1;
+ }
+ for(int i = 0, j = 0; i < raw.length; ++i, j = ++j % multiplicity) {
+ raw[i] = (raw[i] - mean[j]) / std[j];
+ }
+ return raw;
+ }
+
+ @Override
+ protected void initializeOutputType(SimpleTypeInformation<V> type) {
+ super.initializeOutputType(type);
+ multiplicity = ((VectorTypeInformation<?>) type).getMultiplicity();
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
+ return TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
+ @Override
+ protected InstanceMeanVarianceNormalization<V> makeInstance() {
+ return new InstanceMeanVarianceNormalization<>();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/InstanceMinMaxNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/InstanceMinMaxNormalization.java
new file mode 100644
index 00000000..9f8f7680
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/InstanceMinMaxNormalization.java
@@ -0,0 +1,177 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.normalization.instancewise;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.data.type.VectorTypeInformation;
+import de.lmu.ifi.dbs.elki.datasource.filter.normalization.AbstractStreamNormalization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessGlobalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
+/**
+ * Normalize vectors such that the smallest attribute is 0, the largest is 1.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> vector type
+ */
+public class InstanceMinMaxNormalization<V extends NumberVector> extends AbstractStreamNormalization<V> {
+ /**
+ * Minimum and maximum values.
+ */
+ private double min, max;
+
+ /**
+ * Multiplicity of the vector.
+ */
+ private int multiplicity;
+
+ /**
+ * Constructor.
+ *
+ * @param min Desired minimum value
+ * @param max Desired maximum value
+ */
+ public InstanceMinMaxNormalization(double min, double max) {
+ super();
+ this.min = min;
+ this.max = max;
+ }
+
+ /**
+ * Constructor, normalizing to {@code [0;1]}
+ */
+ public InstanceMinMaxNormalization() {
+ this(0., 1.);
+ }
+
+ @Override
+ protected V filterSingleObject(V featureVector) {
+ double[] raw = featureVector.getColumnVector().getArrayRef();
+ // Multivariate codepath:
+ if(multiplicity > 1) {
+ assert (raw.length % multiplicity == 0) : "Vector length is not divisible by multiplicity?";
+ double[] mi = new double[multiplicity], ma = new double[multiplicity];
+ for(int i = 0; i < multiplicity; i++) {
+ mi[i] = Double.POSITIVE_INFINITY;
+ ma[i] = Double.NEGATIVE_INFINITY;
+ }
+ for(int i = 0, j = 0; i < raw.length; ++i, j = ++j % multiplicity) {
+ final double v = raw[i];
+ if(v != v) { // NaN guard
+ continue;
+ }
+ mi[j] = (mi[j] < v) ? mi[j] : v;
+ ma[j] = (ma[j] > v) ? ma[j] : v;
+ }
+ for(int j = 0; j < multiplicity; j++) {
+ if(mi[j] < ma[j]) {
+ final double s = (max - min) / (ma[j] - mi[j]);
+ for(int i = 0; i < raw.length; i += multiplicity) {
+ raw[i] = (raw[i] - mi[j]) * s + min;
+ }
+ }
+ }
+ return factory.newNumberVector(raw);
+ }
+ // Default codepath
+ double mi = Double.POSITIVE_INFINITY, ma = Double.NEGATIVE_INFINITY;
+ for(int i = 0; i < raw.length; ++i) {
+ final double v = raw[i];
+ if(v != v) { // NaN guard
+ continue;
+ }
+ mi = (mi < v) ? mi : v;
+ ma = (ma > v) ? ma : v;
+ }
+ if(mi < ma) {
+ final double s = (max - min) / (ma - mi);
+ for(int i = 0; i < raw.length; ++i) {
+ raw[i] = (raw[i] - mi) * s + min;
+ }
+ }
+ return factory.newNumberVector(raw);
+ }
+
+ @Override
+ protected void initializeOutputType(SimpleTypeInformation<V> type) {
+ super.initializeOutputType(type);
+ multiplicity = ((VectorTypeInformation<?>) type).getMultiplicity();
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
+ return TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
+ /**
+ * Option ID for minimum value.
+ */
+ public static final OptionID MIN_ID = new OptionID("normalization.min", "Minimum value to assign to objects.");
+
+ /**
+ * Option ID for maximum value.
+ */
+ public static final OptionID MAX_ID = new OptionID("normalization.max", "Maximum value to assign to objects.");
+
+ /**
+ * Minimum and maximum values.
+ */
+ private double min, max;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ DoubleParameter minP = new DoubleParameter(MIN_ID, 0.) //
+ .setOptional(true);
+ if(config.grab(minP)) {
+ min = minP.doubleValue();
+ }
+ DoubleParameter maxP = new DoubleParameter(MAX_ID, 1.) //
+ .setOptional(true);
+ if(config.grab(maxP)) {
+ max = maxP.doubleValue();
+ }
+ config.checkConstraint(new LessGlobalConstraint<>(minP, maxP));
+ }
+
+ @Override
+ protected InstanceMinMaxNormalization<V> makeInstance() {
+ return new InstanceMinMaxNormalization<>(min, max);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/LengthNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/LengthNormalization.java
new file mode 100644
index 00000000..51b2a34b
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/LengthNormalization.java
@@ -0,0 +1,106 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.normalization.instancewise;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.datasource.filter.normalization.AbstractStreamNormalization;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.Norm;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Class to perform a normalization on vectors to norm 1.
+ *
+ * @author Heidi Kolb
+ * @author Erich Schubert
+ *
+ * @param <V> vector type
+ */
+@Alias({ "de.lmu.ifi.dbs.elki.datasource.filter.normalization.LengthNormalization"})
+public class LengthNormalization<V extends NumberVector> extends AbstractStreamNormalization<V> {
+ /**
+ * Norm to use.
+ */
+ Norm<? super V> norm;
+
+ /**
+ * Constructor.
+ *
+ * @param norm Norm to use
+ */
+ public LengthNormalization(Norm<? super V> norm) {
+ super();
+ this.norm = norm;
+ }
+
+ @Override
+ protected V filterSingleObject(V featureVector) {
+ final double d = norm.norm(featureVector);
+ return factory.newNumberVector(featureVector.getColumnVector().timesEquals(1 / d).getArrayRef());
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
+ return TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
+ /**
+ * Option ID for normalization norm.
+ */
+ public static final OptionID NORM_ID = new OptionID("normalization.norm", "Norm (length function) to use for computing the vector length.");
+
+ /**
+ * Norm to use.
+ */
+ Norm<? super V> norm;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<Norm<? super V>> normP = new ObjectParameter<>(NORM_ID, Norm.class, EuclideanDistanceFunction.class);
+ if(config.grab(normP)) {
+ norm = normP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected LengthNormalization<V> makeInstance() {
+ return new LengthNormalization<>(norm);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/Log1PlusNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/Log1PlusNormalization.java
new file mode 100644
index 00000000..8970e7ef
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/Log1PlusNormalization.java
@@ -0,0 +1,119 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.normalization.instancewise;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.datasource.filter.normalization.AbstractStreamNormalization;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
+/**
+ * Normalize the data set by applying log(1+|x|*b)/log(b+1) to any value. If the
+ * input data was in [0;1], then the resulting values will be in the same range.
+ *
+ * By default b=1, and thus the transformation is log2(1+|x|).
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> vector type
+ */
+@Alias({ "de.lmu.ifi.dbs.elki.datasource.filter.normalization.Log1PlusNormalization" })
+public class Log1PlusNormalization<V extends NumberVector> extends AbstractStreamNormalization<V> {
+ /**
+ * Static instance.
+ */
+ public static final Log1PlusNormalization<NumberVector> STATIC = new Log1PlusNormalization<>(1.);
+
+ /**
+ * Boosting factor, and scaling coefficient.
+ */
+ protected double boost, scale;
+
+ /**
+ * Constructor.
+ *
+ * @param boost Boosting parameter
+ */
+ public Log1PlusNormalization(double boost) {
+ super();
+ this.boost = boost;
+ this.scale = 1. / Math.log1p(boost);
+ }
+
+ @Override
+ protected V filterSingleObject(V featureVector) {
+ double[] data = new double[featureVector.getDimensionality()];
+ for(int d = 0; d < data.length; ++d) {
+ data[d] = featureVector.doubleValue(d);
+ data[d] = Math.log1p((data[d] > 0 ? data[d] : -data[d]) * boost) * scale;
+ }
+ return factory.newNumberVector(data);
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
+ return TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
+ /**
+ * Boosting factor parameter.
+ */
+ public static final OptionID BOOST_ID = new OptionID("log1pscale.boost", "Boosting factor. Larger values will yield a steeper curve.");
+
+ /**
+ * Boosting factor.
+ */
+ protected double boost;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter boostP = new DoubleParameter(BOOST_ID, 1.) //
+ .addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(boostP)) {
+ boost = boostP.doubleValue();
+ }
+ }
+
+ @Override
+ protected Log1PlusNormalization<V> makeInstance() {
+ return new Log1PlusNormalization<>(boost);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/package-info.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/package-info.java
new file mode 100644
index 00000000..9ac613c0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/package-info.java
@@ -0,0 +1,27 @@
+/**
+ * Instancewise normalization, where each instance is normalized independently.
+ */
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.datasource.filter.normalization.instancewise; \ No newline at end of file