summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/datasource/filter/typeconversions/SplitNumberVectorFilter.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/datasource/filter/typeconversions/SplitNumberVectorFilter.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/typeconversions/SplitNumberVectorFilter.java190
1 files changed, 190 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/typeconversions/SplitNumberVectorFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/typeconversions/SplitNumberVectorFilter.java
new file mode 100644
index 00000000..81f640df
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/typeconversions/SplitNumberVectorFilter.java
@@ -0,0 +1,190 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.typeconversions;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.datasource.filter.FilterUtil;
+import de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
+
+/**
+ * Split an existing column into two types.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.uses NumberVector
+ *
+ * @param <V> Vector type
+ */
+@Alias({ "de.lmu.ifi.dbs.elki.datasource.filter.normalization.SplitNumberVectorFilter" })
+public class SplitNumberVectorFilter<V extends NumberVector> implements ObjectFilter {
+ /**
+ * Selected dimensions.
+ */
+ final int[] dims;
+
+ /**
+ * Constructor.
+ *
+ * @param dims Dimensions to use.
+ */
+ public SplitNumberVectorFilter(int[] dims) {
+ super();
+ this.dims = dims;
+ }
+
+ @Override
+ public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
+ if(objects.dataLength() == 0) {
+ return objects;
+ }
+ MultipleObjectsBundle bundle = new MultipleObjectsBundle();
+
+ for(int r = 0; r < objects.metaLength(); r++) {
+ @SuppressWarnings("unchecked")
+ SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
+ @SuppressWarnings("unchecked")
+ final List<Object> column = (List<Object>) objects.getColumn(r);
+ if(!getInputTypeRestriction().isAssignableFromType(type)) {
+ bundle.appendColumn(type, column);
+ continue;
+ }
+ // Should be a vector type after above test.
+ @SuppressWarnings("unchecked")
+ final VectorFieldTypeInformation<V> vtype = VectorFieldTypeInformation.class.cast(type);
+ NumberVector.Factory<V> factory = FilterUtil.guessFactory(vtype);
+
+ // Get the replacement type informations
+ VectorFieldTypeInformation<V> type1 = new VectorFieldTypeInformation<>(factory, dims.length);
+ VectorFieldTypeInformation<V> type2 = new VectorFieldTypeInformation<>(factory, vtype.getDimensionality() - dims.length);
+ final List<V> col1 = new ArrayList<>(column.size());
+ final List<V> col2 = new ArrayList<>(column.size());
+ bundle.appendColumn(type1, col1);
+ bundle.appendColumn(type2, col2);
+
+ // Build other dimensions array.
+ int[] odims = new int[vtype.getDimensionality() - dims.length];
+ {
+ int i = 0;
+ for(int d = 0; d < vtype.getDimensionality(); d++) {
+ boolean found = false;
+ for(int j = 0; j < dims.length; j++) {
+ if(dims[j] == d) {
+ found = true;
+ break;
+ }
+ }
+ if(!found) {
+ if(i >= odims.length) {
+ throw new AbortException("Dimensionalities not proper!");
+ }
+ odims[i] = d;
+ i++;
+ }
+ }
+ }
+ // Splitting scan.
+ for(int i = 0; i < objects.dataLength(); i++) {
+ @SuppressWarnings("unchecked")
+ final V obj = (V) column.get(i);
+ double[] part1 = new double[dims.length];
+ double[] part2 = new double[obj.getDimensionality() - dims.length];
+ for(int d = 0; d < dims.length; d++) {
+ part1[d] = obj.doubleValue(dims[d]);
+ }
+ for(int d = 0; d < odims.length; d++) {
+ part2[d] = obj.doubleValue(odims[d]);
+ }
+ col1.add(factory.newNumberVector(part1));
+ col2.add(factory.newNumberVector(part2));
+ }
+ }
+ return bundle;
+ }
+
+ /**
+ * The input type we use.
+ *
+ * @return type information
+ */
+ private TypeInformation getInputTypeRestriction() {
+ // Find maximum dimension requested
+ int m = dims[0];
+ for(int i = 1; i < dims.length; i++) {
+ m = Math.max(dims[i], m);
+ }
+ return VectorFieldTypeInformation.typeRequest(NumberVector.class, m, Integer.MAX_VALUE);
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
+ /**
+ * The parameter listing the split dimensions.
+ */
+ public static final OptionID SELECTED_ATTRIBUTES_ID = new OptionID("split.dims", "Dimensions to split into the first relation.");
+
+ /**
+ * Dimensions to use.
+ */
+ protected int[] dims;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntListParameter selectedAttributesP = new IntListParameter(SELECTED_ATTRIBUTES_ID);
+ selectedAttributesP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT_LIST);
+ if(config.grab(selectedAttributesP)) {
+ List<Integer> dimensionList = selectedAttributesP.getValue();
+ dims = new int[dimensionList.size()];
+ for(int i = 0; i < dimensionList.size(); i++) {
+ dims[i] = dimensionList.get(i).intValue();
+ }
+ }
+ }
+
+ @Override
+ protected SplitNumberVectorFilter<V> makeInstance() {
+ return new SplitNumberVectorFilter<>(dims);
+ }
+ }
+}