package de.lmu.ifi.dbs.elki.datasource.filter.typeconversions; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2015 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import de.lmu.ifi.dbs.elki.data.DoubleVector; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation; import de.lmu.ifi.dbs.elki.data.uncertain.WeightedDiscreteUncertainObject; import de.lmu.ifi.dbs.elki.datasource.filter.AbstractConversionFilter; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; /** * Filter to transform a single vector into a set of samples and weights to * interpret as uncertain observation. * * @author Erich Schubert */ public class WeightedUncertainSplitFilter extends AbstractConversionFilter { /** * Class logger. */ private static final Logging LOG = Logging.getLogger(WeightedUncertainSplitFilter.class); /** * Data dimensionality. */ protected int dims; /** * Modulo value for splitting. */ protected int mod; /** * Column in which the probability is stored. */ protected int probcol; /** * Constructor. * * @param dims Number of dimensions * @param probcol Column in which probabilities are stored. */ public WeightedUncertainSplitFilter(int dims, int probcol) { this.dims = dims; if(probcol < -dims || probcol >= dims) { throw new AbortException("Invalid column given for prob: " + probcol); } else if(probcol < 0) { probcol += dims; } this.probcol = probcol; this.mod = dims + 1; } @Override protected WeightedDiscreteUncertainObject filterSingleObject(NumberVector vec) { final int dim = vec.getDimensionality(); if(dim % mod != 0) { throw new AbortException("Vector length " + dim + " not divisible by the number of dimensions + 1 (for probability): " + mod); } final int num = dim / mod; final DoubleVector[] samples = new DoubleVector[num]; final double[] weights = new double[dims]; final double[] buf = new double[dims]; for(int i = 0, j = 0, k = 0, l = 0; i < mod; i++) { if(l++ == probcol) { weights[k] = vec.doubleValue(i); } else { buf[j++] = vec.doubleValue(i); } if(l == mod) { samples[k] = new DoubleVector(buf); j = 0; l = 0; k++; } } return new WeightedDiscreteUncertainObject(samples, weights); } @Override protected SimpleTypeInformation getInputTypeRestriction() { return TypeUtil.NUMBER_VECTOR_FIELD; } @Override protected SimpleTypeInformation convertedType(SimpleTypeInformation in) { final int dim = ((VectorFieldTypeInformation) in).getDimensionality(); if(dim % mod != 0) { throw new AbortException("Vector length " + dim + " not divisible by the number of dimensions + 1 (for probability): " + mod); } return new VectorFieldTypeInformation(WeightedDiscreteUncertainObject.FACTORY, dim); } @Override protected Logging getLogger() { return LOG; } /** * Parameterization class. * * @author Erich Schubert * * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { /** * Parameter for specifying the number of dimensions of the sample. */ public static final OptionID DIM_ID = new OptionID("uncertain.dimensionality", "Dimensionality of the data set (used for splitting)."); /** * Parameter to specify where the probability is stored. */ public static final OptionID PROBCOL_ID = new OptionID("uncertain.probability.column", "Column in which the probability is stored, starting at 0. -1 is the last column."); /** * Field to hold the dimensional constraint. */ protected int dims = 0; /** * Column in which the probability is stored. */ protected int probcol = Integer.MIN_VALUE; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); IntParameter dimsP = new IntParameter(DIM_ID) // .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); IntParameter probP = new IntParameter(PROBCOL_ID); if(config.grab(dimsP)) { dims = dimsP.intValue(); // Add additional constraints: probP.addConstraint(new LessConstraint(dimsP.intValue())); probP.addConstraint(new GreaterEqualConstraint(-dimsP.intValue())); } if(config.grab(probP)) { probcol = probP.intValue(); } } @Override protected WeightedUncertainSplitFilter makeInstance() { return new WeightedUncertainSplitFilter(dims, probcol); } } }