package de.lmu.ifi.dbs.elki.datasource.filter.normalization.instancewise;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.filter.normalization.AbstractStreamNormalization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessGlobalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
/**
* Normalize vectors such that the smallest attribute is 0, the largest is 1.
*
* @author Erich Schubert
*
* @param vector type
*/
public class InstanceMinMaxNormalization extends AbstractStreamNormalization {
/**
* Minimum and maximum values.
*/
private double min, max;
/**
* Multiplicity of the vector.
*/
private int multiplicity;
/**
* Constructor.
*
* @param min Desired minimum value
* @param max Desired maximum value
*/
public InstanceMinMaxNormalization(double min, double max) {
super();
this.min = min;
this.max = max;
}
/**
* Constructor, normalizing to {@code [0;1]}
*/
public InstanceMinMaxNormalization() {
this(0., 1.);
}
@Override
protected V filterSingleObject(V featureVector) {
double[] raw = featureVector.getColumnVector().getArrayRef();
// Multivariate codepath:
if(multiplicity > 1) {
assert (raw.length % multiplicity == 0) : "Vector length is not divisible by multiplicity?";
double[] mi = new double[multiplicity], ma = new double[multiplicity];
for(int i = 0; i < multiplicity; i++) {
mi[i] = Double.POSITIVE_INFINITY;
ma[i] = Double.NEGATIVE_INFINITY;
}
for(int i = 0, j = 0; i < raw.length; ++i, j = ++j % multiplicity) {
final double v = raw[i];
if(v != v) { // NaN guard
continue;
}
mi[j] = (mi[j] < v) ? mi[j] : v;
ma[j] = (ma[j] > v) ? ma[j] : v;
}
for(int j = 0; j < multiplicity; j++) {
if(mi[j] < ma[j]) {
final double s = (max - min) / (ma[j] - mi[j]);
for(int i = 0; i < raw.length; i += multiplicity) {
raw[i] = (raw[i] - mi[j]) * s + min;
}
}
}
return factory.newNumberVector(raw);
}
// Default codepath
double mi = Double.POSITIVE_INFINITY, ma = Double.NEGATIVE_INFINITY;
for(int i = 0; i < raw.length; ++i) {
final double v = raw[i];
if(v != v) { // NaN guard
continue;
}
mi = (mi < v) ? mi : v;
ma = (ma > v) ? ma : v;
}
if(mi < ma) {
final double s = (max - min) / (ma - mi);
for(int i = 0; i < raw.length; ++i) {
raw[i] = (raw[i] - mi) * s + min;
}
}
return factory.newNumberVector(raw);
}
@Override
protected void initializeOutputType(SimpleTypeInformation type) {
super.initializeOutputType(type);
multiplicity = ((VectorTypeInformation>) type).getMultiplicity();
}
@Override
protected SimpleTypeInformation super V> getInputTypeRestriction() {
return TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH;
}
/**
* Parameterization class.
*
* @author Erich Schubert
*
* @apiviz.exclude
*/
public static class Parameterizer extends AbstractParameterizer {
/**
* Option ID for minimum value.
*/
public static final OptionID MIN_ID = new OptionID("normalization.min", "Minimum value to assign to objects.");
/**
* Option ID for maximum value.
*/
public static final OptionID MAX_ID = new OptionID("normalization.max", "Maximum value to assign to objects.");
/**
* Minimum and maximum values.
*/
private double min, max;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter minP = new DoubleParameter(MIN_ID, 0.) //
.setOptional(true);
if(config.grab(minP)) {
min = minP.doubleValue();
}
DoubleParameter maxP = new DoubleParameter(MAX_ID, 1.) //
.setOptional(true);
if(config.grab(maxP)) {
max = maxP.doubleValue();
}
config.checkConstraint(new LessGlobalConstraint<>(minP, maxP));
}
@Override
protected InstanceMinMaxNormalization makeInstance() {
return new InstanceMinMaxNormalization<>(min, max);
}
}
}