diff options
Diffstat (limited to 'elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierLinearScaling.java')
-rw-r--r-- | elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierLinearScaling.java | 332 |
1 files changed, 332 insertions, 0 deletions
diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierLinearScaling.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierLinearScaling.java new file mode 100644 index 00000000..d7feb2b4 --- /dev/null +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierLinearScaling.java @@ -0,0 +1,332 @@ +package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2015 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.ArrayList; + +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GlobalParameterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.OnlyOneIsAllowedToBeSetGlobalConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Parameter; + +/** + * Scaling that can map arbitrary values to a value in the range of [0:1]. + * + * Transformation is done by linear mapping onto 0:1 using the minimum and + * maximum values. + * + * @author Erich Schubert + */ +public class OutlierLinearScaling implements OutlierScalingFunction { + /** + * Parameter to specify a fixed minimum to use. + * <p> + * Key: {@code -linearscale.min} + * </p> + */ + public static final OptionID MIN_ID = new OptionID("linearscale.min", "Fixed minimum to use in linear scaling."); + + /** + * Parameter to specify the maximum value. + * <p> + * Key: {@code -linearscale.max} + * </p> + */ + public static final OptionID MAX_ID = new OptionID("linearscale.max", "Fixed maximum to use in linear scaling."); + + /** + * Flag to use the mean as minimum for scaling. + * + * <p> + * Key: {@code -linearscale.usemean} + * </p> + */ + public static final OptionID MEAN_ID = new OptionID("linearscale.usemean", "Use the mean as minimum for scaling."); + + /** + * Flag to use ignore zeros when computing the min and max. + * + * <p> + * Key: {@code -linearscale.ignorezero} + * </p> + */ + public static final OptionID NOZEROS_ID = new OptionID("linearscale.ignorezero", "Ignore zero entries when computing the minimum and maximum."); + + /** + * Field storing the Minimum to use + */ + protected Double min = null; + + /** + * Field storing the Maximum value + */ + protected Double max = null; + + /** + * Scaling factor to use (1/ max - min) + */ + double factor; + + /** + * Use the mean for scaling + */ + boolean usemean = false; + + /** + * Ignore zero values + */ + boolean nozeros = false; + + /** + * Constructor. + */ + public OutlierLinearScaling() { + this(null, null, false, false); + } + + /** + * Constructor. + * + * @param min + * @param max + * @param usemean + * @param nozeros + */ + public OutlierLinearScaling(Double min, Double max, boolean usemean, boolean nozeros) { + super(); + this.min = min; + this.max = max; + this.usemean = usemean; + this.nozeros = nozeros; + if (min != null && max != null) { + this.factor = (max - min); + } + } + + @Override + public double getScaled(double value) { + assert (factor != 0) : "prepare() was not run prior to using the scaling function."; + if (value <= min) { + return 0; + } + return Math.min(1, ((value - min) / factor)); + } + + @Override + public void prepare(OutlierResult or) { + if (usemean) { + MeanVariance mv = new MeanVariance(); + DoubleMinMax mm = (max == null) ? new DoubleMinMax() : null; + boolean skippedzeros = false; + DoubleRelation scores = or.getScores(); + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + double val = scores.doubleValue(id); + if (nozeros && val == 0.0) { + skippedzeros = true; + continue; + } + if (!Double.isNaN(val) && !Double.isInfinite(val)) { + mv.put(val); + } + if (max == null) { + mm.put(val); + } + } + if (skippedzeros && mm.getMin() == mm.getMax()) { + mm.put(0.0); + mv.put(0.0); + } + min = mv.getMean(); + if (max == null) { + max = mm.getMax(); + } + } else { + if (min == null || max == null) { + boolean skippedzeros = false; + DoubleMinMax mm = new DoubleMinMax(); + DoubleRelation scores = or.getScores(); + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + double val = scores.doubleValue(id); + if (nozeros && val == 0.0) { + skippedzeros = true; + continue; + } + mm.put(val); + } + if (skippedzeros && mm.getMin() == mm.getMax()) { + mm.put(0.0); + } + if (min == null) { + min = mm.getMin(); + } + if (max == null) { + max = mm.getMax(); + } + } + } + factor = (max - min); + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + if (usemean) { + MeanVariance mv = new MeanVariance(); + DoubleMinMax mm = (max == null) ? new DoubleMinMax() : null; + boolean skippedzeros = false; + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (nozeros && val == 0.0) { + skippedzeros = true; + continue; + } + if (!Double.isNaN(val) && !Double.isInfinite(val)) { + mv.put(val); + } + if (max == null) { + mm.put(val); + } + } + if (skippedzeros && mm.getMin() == mm.getMax()) { + mm.put(0.0); + mv.put(0.0); + } + min = mv.getMean(); + if (max == null) { + max = mm.getMax(); + } + } else { + if (min == null || max == null) { + boolean skippedzeros = false; + DoubleMinMax mm = new DoubleMinMax(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (nozeros && val == 0.0) { + skippedzeros = true; + continue; + } + mm.put(val); + } + if (skippedzeros && mm.getMin() == mm.getMax()) { + mm.put(0.0); + } + if (min == null) { + min = mm.getMin(); + } + if (max == null) { + max = mm.getMax(); + } + } + } + factor = (max - min); + } + + @Override + public double getMin() { + return 0.0; + } + + @Override + public double getMax() { + return 1.0; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * Field storing the Minimum to use + */ + protected Double min = null; + + /** + * Field storing the Maximum value + */ + protected Double max = null; + + /** + * Use the mean for scaling + */ + boolean usemean = false; + + /** + * Ignore zero values + */ + boolean nozeros = false; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + DoubleParameter minP = new DoubleParameter(MIN_ID); + minP.setOptional(true); + if (config.grab(minP)) { + min = minP.getValue(); + } + + DoubleParameter maxP = new DoubleParameter(MAX_ID); + maxP.setOptional(true); + if (config.grab(maxP)) { + max = maxP.getValue(); + } + + Flag meanF = new Flag(MEAN_ID); + if (config.grab(meanF)) { + usemean = meanF.getValue(); + } + + Flag nozerosF = new Flag(NOZEROS_ID); + if (config.grab(nozerosF)) { + nozeros = nozerosF.getValue(); + } + + // Use-Mean and Minimum value must not be set at the same time! + ArrayList<Parameter<?>> minmean = new ArrayList<>(); + minmean.add(minP); + minmean.add(meanF); + GlobalParameterConstraint gpc = new OnlyOneIsAllowedToBeSetGlobalConstraint(minmean); + config.checkConstraint(gpc); + } + + @Override + protected OutlierLinearScaling makeInstance() { + return new OutlierLinearScaling(min, max, usemean, nozeros); + } + } +} |