summaryrefslogtreecommitdiff
path: root/elki/src/main/java/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/InstanceMinMaxNormalization.java
diff options
context:
space:
mode:
authorAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:41 +0000
committerAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:41 +0000
commit38212b3127e90751fb39cda34250bc11be62b76c (patch)
treedc1397346030e9695bd763dddc93b3be527cd643 /elki/src/main/java/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/InstanceMinMaxNormalization.java
parent337087b668d3a54f3afee3a9adb597a32e9f7e94 (diff)
Import Upstream version 0.7.0
Diffstat (limited to 'elki/src/main/java/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/InstanceMinMaxNormalization.java')
-rw-r--r--elki/src/main/java/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/InstanceMinMaxNormalization.java177
1 files changed, 177 insertions, 0 deletions
diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/InstanceMinMaxNormalization.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/InstanceMinMaxNormalization.java
new file mode 100644
index 00000000..f0b1ceea
--- /dev/null
+++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/datasource/filter/normalization/instancewise/InstanceMinMaxNormalization.java
@@ -0,0 +1,177 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.normalization.instancewise;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2015
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.data.type.VectorTypeInformation;
+import de.lmu.ifi.dbs.elki.datasource.filter.normalization.AbstractStreamNormalization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessGlobalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
+/**
+ * Normalize vectors such that the smallest attribute is 0, the largest is 1.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> vector type
+ */
+public class InstanceMinMaxNormalization<V extends NumberVector> extends AbstractStreamNormalization<V> {
+ /**
+ * Minimum and maximum values.
+ */
+ private double min, max;
+
+ /**
+ * Multiplicity of the vector.
+ */
+ private int multiplicity;
+
+ /**
+ * Constructor.
+ *
+ * @param min Desired minimum value
+ * @param max Desired maximum value
+ */
+ public InstanceMinMaxNormalization(double min, double max) {
+ super();
+ this.min = min;
+ this.max = max;
+ }
+
+ /**
+ * Constructor, normalizing to {@code [0;1]}
+ */
+ public InstanceMinMaxNormalization() {
+ this(0., 1.);
+ }
+
+ @Override
+ protected V filterSingleObject(V featureVector) {
+ double[] raw = featureVector.getColumnVector().getArrayRef();
+ // Multivariate codepath:
+ if(multiplicity > 1) {
+ assert (raw.length % multiplicity == 0) : "Vector length is not divisible by multiplicity?";
+ double[] mi = new double[multiplicity], ma = new double[multiplicity];
+ for(int i = 0; i < multiplicity; i++) {
+ mi[i] = Double.POSITIVE_INFINITY;
+ ma[i] = Double.NEGATIVE_INFINITY;
+ }
+ for(int i = 0, j = 0; i < raw.length; ++i, j = ++j % multiplicity) {
+ final double v = raw[i];
+ if(v != v) { // NaN guard
+ continue;
+ }
+ mi[j] = (mi[j] < v) ? mi[j] : v;
+ ma[j] = (ma[j] > v) ? ma[j] : v;
+ }
+ for(int j = 0; j < multiplicity; j++) {
+ if(mi[j] < ma[j]) {
+ final double s = (max - min) / (ma[j] - mi[j]);
+ for(int i = 0; i < raw.length; i += multiplicity) {
+ raw[i] = (raw[i] - mi[j]) * s + min;
+ }
+ }
+ }
+ return factory.newNumberVector(raw);
+ }
+ // Default codepath
+ double mi = Double.POSITIVE_INFINITY, ma = Double.NEGATIVE_INFINITY;
+ for(int i = 0; i < raw.length; ++i) {
+ final double v = raw[i];
+ if(v != v) { // NaN guard
+ continue;
+ }
+ mi = (mi < v) ? mi : v;
+ ma = (ma > v) ? ma : v;
+ }
+ if(mi < ma) {
+ final double s = (max - min) / (ma - mi);
+ for(int i = 0; i < raw.length; ++i) {
+ raw[i] = (raw[i] - mi) * s + min;
+ }
+ }
+ return factory.newNumberVector(raw);
+ }
+
+ @Override
+ protected void initializeOutputType(SimpleTypeInformation<V> type) {
+ super.initializeOutputType(type);
+ multiplicity = ((VectorTypeInformation<?>) type).getMultiplicity();
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
+ return TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
+ /**
+ * Option ID for minimum value.
+ */
+ public static final OptionID MIN_ID = new OptionID("normalization.min", "Minimum value to assign to objects.");
+
+ /**
+ * Option ID for maximum value.
+ */
+ public static final OptionID MAX_ID = new OptionID("normalization.max", "Maximum value to assign to objects.");
+
+ /**
+ * Minimum and maximum values.
+ */
+ private double min, max;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ DoubleParameter minP = new DoubleParameter(MIN_ID, 0.) //
+ .setOptional(true);
+ if(config.grab(minP)) {
+ min = minP.doubleValue();
+ }
+ DoubleParameter maxP = new DoubleParameter(MAX_ID, 1.) //
+ .setOptional(true);
+ if(config.grab(maxP)) {
+ max = maxP.doubleValue();
+ }
+ config.checkConstraint(new LessGlobalConstraint<>(minP, maxP));
+ }
+
+ @Override
+ protected InstanceMinMaxNormalization<V> makeInstance() {
+ return new InstanceMinMaxNormalization<>(min, max);
+ }
+ }
+} \ No newline at end of file