diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/datasource/filter')
33 files changed, 779 insertions, 170 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractConversionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractConversionFilter.java index 990458bf..34fb6bad 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractConversionFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractConversionFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -65,10 +65,7 @@ public abstract class AbstractConversionFilter<I, O> implements ObjectFilter { // Get the replacement type information @SuppressWarnings("unchecked") final SimpleTypeInformation<I> castType = (SimpleTypeInformation<I>) type; - @SuppressWarnings("unchecked") - final List<O> castColumn = (List<O>) column; - bundle.appendColumn(convertedType(castType), castColumn); - + // When necessary, perform an initialization scan if(prepareStart(castType)) { for(Object o : column) { @@ -79,6 +76,10 @@ public abstract class AbstractConversionFilter<I, O> implements ObjectFilter { prepareComplete(); } + @SuppressWarnings("unchecked") + final List<O> castColumn = (List<O>) column; + bundle.appendColumn(convertedType(castType), castColumn); + // Normalization scan for(int i = 0; i < objects.dataLength(); i++) { @SuppressWarnings("unchecked") diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractFeatureSelectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractFeatureSelectionFilter.java index d53dfb94..009296b1 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractFeatureSelectionFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractFeatureSelectionFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -45,7 +45,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter; * @param <V> the type of FeatureVector contained in both the original and * projected data. */ -public abstract class AbstractFeatureSelectionFilter<V extends FeatureVector<?, ?>> extends AbstractConversionFilter<V, V> { +public abstract class AbstractFeatureSelectionFilter<V extends FeatureVector<?, ?>> extends AbstractStreamConversionFilter<V, V> { /** * <p> * Selected attributes parameter. diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractRandomFeatureSelectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractRandomFeatureSelectionFilter.java index 975d5bd5..b52c7887 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractRandomFeatureSelectionFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractRandomFeatureSelectionFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -45,14 +45,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * * The cardinality of the subset of attributes is specified as a parameter. * - * * @author Arthur Zimek * @author Erich Schubert * * @param <V> the type of FeatureVector contained in both the original data of * the base parser and the projected data of this ProjectionParser */ -public abstract class AbstractRandomFeatureSelectionFilter<V extends FeatureVector<?, ?>> extends AbstractConversionFilter<V, V> { +public abstract class AbstractRandomFeatureSelectionFilter<V extends FeatureVector<?, ?>> extends AbstractStreamConversionFilter<V, V> { /** * The selected attributes */ @@ -94,13 +93,17 @@ public abstract class AbstractRandomFeatureSelectionFilter<V extends FeatureVect super(); this.k = dim; } - - @Override - protected boolean prepareStart(SimpleTypeInformation<V> in) { + + /** + * Initialize random attributes. + * + * Invoke this from {@link #convertedType}! + * + * @param in Type information. + */ + void initializeRandomAttributes(SimpleTypeInformation<V> in) { int d = ((VectorFieldTypeInformation<V>) in).dimensionality(); selectedAttributes = Util.randomBitSet(k, d, random); - // We don't need the full loop, so return false. - return false; } /** diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamConversionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamConversionFilter.java new file mode 100644 index 00000000..1c8acb72 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamConversionFilter.java @@ -0,0 +1,117 @@ +package de.lmu.ifi.dbs.elki.datasource.filter; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; +import de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta; + +/** + * Abstract base class for simple conversion filters such as normalizations and + * projections. + * + * @author Erich Schubert + * + * @param <I> Input object type + * @param <O> Input object type + */ +public abstract class AbstractStreamConversionFilter<I, O> extends AbstractStreamFilter { + /** + * The filtered meta + */ + BundleMeta meta; + + /** + * The column to filter + */ + int column = -1; + + @Override + public BundleMeta getMeta() { + return meta; + } + + @Override + public Object data(int rnum) { + if(rnum != column) { + return source.data(rnum); + } + // Convert: + @SuppressWarnings("unchecked") + final I obj = (I) source.data(rnum); + return filterSingleObject(obj); + } + + @Override + public Event nextEvent() { + Event ev = source.nextEvent(); + if(ev == Event.META_CHANGED) { + if(meta == null) { + meta = new BundleMeta(); + } + BundleMeta origmeta = source.getMeta(); + for(int i = meta.size(); i < origmeta.size(); i++) { + if(column < 0) { + @SuppressWarnings("unchecked") + SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) origmeta.get(i); + // Test whether this type matches + if(getInputTypeRestriction().isAssignableFromType(type)) { + @SuppressWarnings("unchecked") + final SimpleTypeInformation<I> castType = (SimpleTypeInformation<I>) type; + meta.add(convertedType(castType)); + column = i; + continue; + } + } + meta.add(origmeta.get(i)); + } + } + return ev; + } + + /** + * Normalize a single instance. + * + * You can implement this as UnsupportedOperationException if you override + * both public "normalize" functions! + * + * @param obj Database object to normalize + * @return Normalized database object + */ + abstract protected O filterSingleObject(I obj); + + /** + * Get the input type restriction used for negotiating the data query. + * + * @return Type restriction + */ + abstract protected SimpleTypeInformation<? super I> getInputTypeRestriction(); + + /** + * Get the output type from the input type after conversion. + * + * @param in input type restriction + * @return output type restriction + */ + abstract protected SimpleTypeInformation<? super O> convertedType(SimpleTypeInformation<I> in); +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamFilter.java new file mode 100644 index 00000000..368be1a2 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamFilter.java @@ -0,0 +1,50 @@ +package de.lmu.ifi.dbs.elki.datasource.filter; + +import de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource; +import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle; +import de.lmu.ifi.dbs.elki.datasource.bundle.StreamFromBundle; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +/** + * Abstract base class for streaming filters. + * + * @author Erich Schubert + */ +public abstract class AbstractStreamFilter implements StreamFilter { + /** + * Data source + */ + protected BundleStreamSource source = null; + + @Override + public MultipleObjectsBundle filter(MultipleObjectsBundle objects) { + init(new StreamFromBundle(objects)); + return MultipleObjectsBundle.fromStream(this); + } + + @Override + public void init(BundleStreamSource source) { + this.source = source; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/FilterByLabelFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java index b950080d..ebf01cfd 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/FilterByLabelFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,12 +23,11 @@ package de.lmu.ifi.dbs.elki.datasource.filter; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.ArrayList; import java.util.regex.Pattern; import de.lmu.ifi.dbs.elki.data.LabelList; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; -import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle; +import de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; @@ -43,79 +42,103 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter; * * @apiviz.uses LabelList oneway - - «reads» */ -public class FilterByLabelFilter implements ObjectFilter { +public class ByLabelFilter extends AbstractStreamFilter { /** * Class logger */ - private static final Logging logger = Logging.getLogger(FilterByLabelFilter.class); + private static final Logging logger = Logging.getLogger(ByLabelFilter.class); /** * The filter pattern */ private final Pattern pattern; - + /** * Inversion flag */ private final boolean inverted; /** + * Label column + */ + private int lblcol = -1; + + /** * Constructor. * * @param pattern Filter pattern * @param inverted Inversion flag */ - public FilterByLabelFilter(Pattern pattern, boolean inverted) { + public ByLabelFilter(Pattern pattern, boolean inverted) { super(); this.pattern = pattern; this.inverted = inverted; } @Override - public MultipleObjectsBundle filter(final MultipleObjectsBundle objects) { - if(logger.isDebugging()) { - logger.debug("Filtering the data set"); - } + public BundleMeta getMeta() { + return source.getMeta(); + } - // Identify a label column - final int lblcol; - { - int lblc = -1; - for(int i = 0; i < objects.metaLength(); i++) { - if(TypeUtil.GUESSED_LABEL.isAssignableFromType(objects.meta(i))) { - lblc = i; - break; - } - } - lblcol = lblc; // make static - } + @Override + public Object data(int rnum) { + return source.data(rnum); + } - MultipleObjectsBundle bundle = new MultipleObjectsBundle(); - for(int j = 0; j < objects.metaLength(); j++) { - bundle.appendColumn(objects.meta(j), new ArrayList<Object>()); - } - for(int i = 0; i < objects.dataLength(); i++) { - Object l = objects.data(i, lblcol); - if(l instanceof LabelList) { - boolean good = false; - for(String label : (LabelList) l) { - if(pattern.matcher(label).matches()) { - good = true; - break; + @Override + public Event nextEvent() { + while(true) { + Event ev = source.nextEvent(); + switch(ev){ + case END_OF_STREAM: + if (lblcol < 0) { + logger.warning("By label filter was used, but never saw a label relation!"); + } + return Event.END_OF_STREAM; + case META_CHANGED: + // Search for the first label column + if(lblcol < 0) { + BundleMeta meta = source.getMeta(); + for(int i = 0; i < meta.size(); i++) { + if(TypeUtil.GUESSED_LABEL.isAssignableFromType(meta.get(i))) { + lblcol = i; + break; + } } } - if(good == inverted) { - continue; + return Event.META_CHANGED; + case NEXT_OBJECT: + if(lblcol > 0) { + Object l = source.data(lblcol); + if(l instanceof LabelList) { + boolean good = false; + for(String label : (LabelList) l) { + if(pattern.matcher(label).matches()) { + good = true; + break; + } + } + if(good == inverted) { + continue; + } + } + else { + if(!pattern.matcher(l.toString()).matches()) { + continue; + } + } } - } - else { - if(!pattern.matcher(l.toString()).matches()) { - continue; + else { + // No labels known yet. + if(!inverted) { + continue; + } } + return Event.NEXT_OBJECT; + default: + logger.warning("Unknown event: " + ev); } - bundle.appendSimple(objects.getRow(i)); } - return bundle; } /** @@ -167,7 +190,7 @@ public class FilterByLabelFilter implements ObjectFilter { @Override protected Object makeInstance() { - return new FilterByLabelFilter(pattern, inverted); + return new ByLabelFilter(pattern, inverted); } } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java index 1c9a2274..95596773 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/DoubleVectorProjectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/DoubleVectorProjectionFilter.java index 21b00b0d..4793b041 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/DoubleVectorProjectionFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/DoubleVectorProjectionFilter.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2011 +Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -61,7 +61,7 @@ public class DoubleVectorProjectionFilter extends AbstractFeatureSelectionFilter @Override
protected SimpleTypeInformation<? super DoubleVector> convertedType(SimpleTypeInformation<DoubleVector> in) {
- return new VectorFieldTypeInformation<DoubleVector>(DoubleVector.class, getDimensionality(), new DoubleVector(new double[getDimensionality()]));
+ return new VectorFieldTypeInformation<DoubleVector>(DoubleVector.class, DoubleVector.STATIC, getDimensionality(), DoubleVector.STATIC);
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/DoubleVectorRandomProjectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/DoubleVectorRandomProjectionFilter.java index 802b00d6..5aa31967 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/DoubleVectorRandomProjectionFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/DoubleVectorRandomProjectionFilter.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2011 +Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -61,7 +61,8 @@ public class DoubleVectorRandomProjectionFilter extends AbstractRandomFeatureSel @Override
protected SimpleTypeInformation<? super DoubleVector> convertedType(SimpleTypeInformation<DoubleVector> in) {
- return new VectorFieldTypeInformation<DoubleVector>(DoubleVector.class, k, new DoubleVector(new double[k]));
+ initializeRandomAttributes(in);
+ return new VectorFieldTypeInformation<DoubleVector>(DoubleVector.class, DoubleVector.STATIC, k, DoubleVector.STATIC);
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java index c53c2e4d..f48810f5 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -48,7 +48,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; // TODO: use a non-string class for external ids? public class ExternalIDFilter implements ObjectFilter { /** - * Optional parameter that specifies the index of the label to be used as + * Parameter that specifies the index of the label to be used as * external Id, must be an integer equal to or greater than 0. * <p> * Key: {@code -dbc.externalIdIndex} @@ -57,8 +57,7 @@ public class ExternalIDFilter implements ObjectFilter { public static final OptionID EXTERNALID_INDEX_ID = OptionID.getOrCreateOptionID("dbc.externalIdIndex", "The index of the label to be used as external Id."); /** - * The index of the label to be used as external Id, null if no external id - * index is specified. + * The index of the label to be used as external Id. */ private final int externalIdIndex; diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java index b49494e4..c34ecbe7 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,13 +23,10 @@ package de.lmu.ifi.dbs.elki.datasource.filter; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.ArrayList; -import java.util.List; - import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; -import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle; +import de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; @@ -43,7 +40,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * * @apiviz.has DBID oneway - - «produces» */ -public class FixedDBIDsFilter implements ObjectFilter { +public class FixedDBIDsFilter extends AbstractStreamFilter { /** * Optional parameter to specify the first object ID to use. * <p> @@ -53,9 +50,14 @@ public class FixedDBIDsFilter implements ObjectFilter { public static final OptionID IDSTART_ID = OptionID.getOrCreateOptionID("dbc.startid", "Object ID to start counting with"); /** - * The first ID to assign + * The filtered meta + */ + BundleMeta meta; + + /** + * The next ID to assign */ - final int startid; + int curid = 0; /** * Constructor. @@ -64,22 +66,39 @@ public class FixedDBIDsFilter implements ObjectFilter { */ public FixedDBIDsFilter(int startid) { super(); - this.startid = startid; + this.curid = startid; + } + + @Override + public BundleMeta getMeta() { + return meta; } @Override - public MultipleObjectsBundle filter(MultipleObjectsBundle objects) { - MultipleObjectsBundle bundle = new MultipleObjectsBundle(); - List<DBID> ids = new ArrayList<DBID>(objects.dataLength()); - for(int i = 0; i < objects.dataLength(); i++) { - ids.add(DBIDUtil.importInteger(startid + i)); + public Event nextEvent() { + Event ev = source.nextEvent(); + if(ev == Event.META_CHANGED) { + if(meta == null) { + meta = new BundleMeta(); + meta.add(TypeUtil.DBID); + } + BundleMeta origmeta = source.getMeta(); + // Note -1 for the injected DBID column + for(int i = meta.size() - 1; i < origmeta.size(); i++) { + meta.add(origmeta.get(i)); + } } - bundle.appendColumn(TypeUtil.DBID, ids); - // copy other columns - for(int j = 0; j < objects.metaLength(); j++) { - bundle.appendColumn(objects.meta(j), objects.getColumn(j)); + return ev; + } + + @Override + public Object data(int rnum) { + if(rnum == 0) { + DBID ret = DBIDUtil.importInteger(curid); + curid++; + return ret; } - return bundle; + return source.data(rnum - 1); } /** diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/FilterNoMissingValuesFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/NoMissingValuesFilter.java index ceb671df..da5f066f 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/FilterNoMissingValuesFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/NoMissingValuesFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -34,16 +34,16 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; * * @author Erich Schubert */ -public class FilterNoMissingValuesFilter implements ObjectFilter { +public class NoMissingValuesFilter implements ObjectFilter { /** * Class logger */ - private static final Logging logger = Logging.getLogger(FilterNoMissingValuesFilter.class); + private static final Logging logger = Logging.getLogger(NoMissingValuesFilter.class); /** * Constructor. */ - public FilterNoMissingValuesFilter() { + public NoMissingValuesFilter() { super(); } @@ -82,7 +82,7 @@ public class FilterNoMissingValuesFilter implements ObjectFilter { public static class Parameterizer extends AbstractParameterizer { @Override protected Object makeInstance() { - return new FilterNoMissingValuesFilter(); + return new NoMissingValuesFilter(); } } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/NoOpFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/NoOpFilter.java index 9275c3c2..264f58fd 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/NoOpFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/NoOpFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta; import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle; /** @@ -34,7 +35,7 @@ import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle; * * @author Erich Schubert */ -public class NoOpFilter implements ObjectFilter { +public class NoOpFilter extends AbstractStreamFilter { /** * Constructor. */ @@ -46,4 +47,19 @@ public class NoOpFilter implements ObjectFilter { public MultipleObjectsBundle filter(MultipleObjectsBundle objects) { return objects; } -} + + @Override + public BundleMeta getMeta() { + return source.getMeta(); + } + + @Override + public Object data(int rnum) { + return source.data(rnum); + } + + @Override + public Event nextEvent() { + return source.nextEvent(); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ObjectFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ObjectFilter.java index 1d5c2ba9..b3670e9b 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ObjectFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ObjectFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; */ import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle; +import de.lmu.ifi.dbs.elki.utilities.InspectionUtilFrequentlyScanned; /** * Object filters as part of the input step. @@ -34,7 +35,7 @@ import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle; * * @apiviz.uses MultipleObjectsBundle oneway - - «filters» */ -public interface ObjectFilter { +public interface ObjectFilter extends InspectionUtilFrequentlyScanned { /** * Filter a set of object packages. * diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java index 6618721f..a8bf2cec 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -50,10 +50,10 @@ public class ShuffleObjectsFilter implements ObjectFilter { * database. If unused, no shuffling will be performed. Shuffling takes time * linearly dependent from the size of the database. * <p> - * Key: {@code -dbc.seed} + * Key: {@code -shuffle.seed} * </p> */ - public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("dbc.seed", "Seed for randomly shuffling the rows for the database. If the parameter is not set, no shuffling will be performed."); + public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("shuffle.seed", "Seed for randomly shuffling the rows for the database. If the parameter is not set, no shuffling will be performed."); /** * Seed for randomly shuffling the rows of the database. If null, no shuffling diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/SortByLabelFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/SortByLabelFilter.java index 74bbe3ac..5aedc79c 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/SortByLabelFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/SortByLabelFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseFloatVectorProjectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseFloatVectorProjectionFilter.java index 7bbbab2d..06e686c3 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseFloatVectorProjectionFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseFloatVectorProjectionFilter.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2011 +Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -22,9 +22,9 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import gnu.trove.map.hash.TIntFloatHashMap;
+
import java.util.BitSet;
-import java.util.Collections;
-import java.util.Map;
import de.lmu.ifi.dbs.elki.data.SparseFloatVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
@@ -40,7 +40,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz * </p>
*
* @author Arthur Zimek
- *
*/
public class SparseFloatVectorProjectionFilter extends AbstractFeatureSelectionFilter<SparseFloatVector> {
/**
@@ -64,7 +63,7 @@ public class SparseFloatVectorProjectionFilter extends AbstractFeatureSelectionF @Override
protected SimpleTypeInformation<? super SparseFloatVector> convertedType(SimpleTypeInformation<SparseFloatVector> in) {
- final Map<Integer, Float> emptyMap = Collections.emptyMap();
+ final TIntFloatHashMap emptyMap = new TIntFloatHashMap();
return new VectorFieldTypeInformation<SparseFloatVector>(SparseFloatVector.class, getDimensionality(), new SparseFloatVector(emptyMap, getDimensionality()));
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseFloatVectorRandomProjectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseFloatVectorRandomProjectionFilter.java index f8e999b9..fbf26eea 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseFloatVectorRandomProjectionFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseFloatVectorRandomProjectionFilter.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2011 +Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -22,9 +22,6 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Collections;
-import java.util.Map;
-
import de.lmu.ifi.dbs.elki.data.SparseFloatVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -60,8 +57,8 @@ public class SparseFloatVectorRandomProjectionFilter extends AbstractRandomFeatu @Override
protected SimpleTypeInformation<? super SparseFloatVector> convertedType(SimpleTypeInformation<SparseFloatVector> in) {
- final Map<Integer, Float> emptyMap = Collections.emptyMap();
- return new VectorFieldTypeInformation<SparseFloatVector>(SparseFloatVector.class, k, new SparseFloatVector(emptyMap, k));
+ initializeRandomAttributes(in);
+ return new VectorFieldTypeInformation<SparseFloatVector>(SparseFloatVector.class, k, new SparseFloatVector(SparseFloatVector.EMPTYMAP, k));
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseVectorFieldFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseVectorFieldFilter.java new file mode 100644 index 00000000..482fc498 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseVectorFieldFilter.java @@ -0,0 +1,75 @@ +package de.lmu.ifi.dbs.elki.datasource.filter; +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.data.SparseFloatVector; +import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation; + +/** + * Class that turns sparse float vectors into a proper vector field, by setting + * the maximum dimensionality for each vector. + * + * @author Erich Schubert + */ +public class SparseVectorFieldFilter extends AbstractConversionFilter<SparseFloatVector, SparseFloatVector> { + /** + * Maximum dimension + */ + int maxdim = -1; + + /** + * Constructor. + */ + public SparseVectorFieldFilter() { + super(); + } + + @Override + protected boolean prepareStart(SimpleTypeInformation<SparseFloatVector> in) { + return true; + } + + @Override + protected void prepareProcessInstance(SparseFloatVector obj) { + maxdim = Math.max(maxdim, obj.getDimensionality()); + } + + @Override + protected SparseFloatVector filterSingleObject(SparseFloatVector obj) { + assert(maxdim > 0); + obj.setDimensionality(maxdim); + return obj; + } + + @Override + protected SimpleTypeInformation<? super SparseFloatVector> getInputTypeRestriction() { + return TypeUtil.SPARSE_FLOAT_FIELD; + } + + @Override + protected SimpleTypeInformation<? super SparseFloatVector> convertedType(SimpleTypeInformation<SparseFloatVector> in) { + return new VectorFieldTypeInformation<SparseFloatVector>(SparseFloatVector.class, maxdim, SparseFloatVector.STATIC); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java index 25ab7e89..827a5011 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -82,8 +82,8 @@ public class SplitNumberVectorFilter<V extends NumberVector<V, ?>> implements Ob final VectorFieldTypeInformation<V> vtype = VectorFieldTypeInformation.class.cast(type); // Get the replacement type informations - VectorFieldTypeInformation<V> type1 = new VectorFieldTypeInformation<V>(type.getRestrictionClass(), dims.length, dims.length); - VectorFieldTypeInformation<V> type2 = new VectorFieldTypeInformation<V>(type.getRestrictionClass(), vtype.dimensionality() - dims.length, vtype.dimensionality() - dims.length); + VectorFieldTypeInformation<V> type1 = new VectorFieldTypeInformation<V>(type.getRestrictionClass(), type.getSerializer(), dims.length, dims.length); + VectorFieldTypeInformation<V> type2 = new VectorFieldTypeInformation<V>(type.getRestrictionClass(), type.getSerializer(), vtype.dimensionality() - dims.length, vtype.dimensionality() - dims.length); final List<V> col1 = new ArrayList<V>(column.size()); final List<V> col2 = new ArrayList<V>(column.size()); bundle.appendColumn(type1, col1); @@ -122,8 +122,8 @@ public class SplitNumberVectorFilter<V extends NumberVector<V, ?>> implements Ob for(int d = 0; d < odims.length; d++) { part2[d] = obj.doubleValue(odims[d]); } - col1.add(obj.newInstance(part1)); - col2.add(obj.newInstance(part2)); + col1.add(obj.newNumberVector(part1)); + col2.add(obj.newNumberVector(part2)); } } return bundle; diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/StreamFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/StreamFilter.java new file mode 100644 index 00000000..e40565f9 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/StreamFilter.java @@ -0,0 +1,43 @@ +package de.lmu.ifi.dbs.elki.datasource.filter; + +import de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Streaming filters are often more efficient (less memory use) and can be used + * in more settings. + * + * @author Erich Schubert + * + * @apiviz.uses BundleStreamSource - - «filters» + */ +public interface StreamFilter extends ObjectFilter, BundleStreamSource { + /** + * Connect to the previous stream. + * + * @param source Stream source + */ + public void init(BundleStreamSource source); +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java index ae75a979..3a629760 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractNormalization.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.datasource.filter; +package de.lmu.ifi.dbs.elki.datasource.filter.normalization; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,6 +25,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle; +import de.lmu.ifi.dbs.elki.datasource.filter.AbstractConversionFilter; import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem; /** diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AttributeWiseErfNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java index d365a9ae..c0f2a955 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/AttributeWiseErfNormalization.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.datasource.filter; +package de.lmu.ifi.dbs.elki.datasource.filter.normalization; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -26,7 +26,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; -import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; /** * Attribute-wise Normalization using the error function. This mostly makes @@ -55,9 +55,9 @@ public class AttributeWiseErfNormalization<O extends NumberVector<O, ?>> extends protected O filterSingleObject(O obj) { double[] val = new double[obj.getDimensionality()]; for(int i = 0; i < val.length; i++) { - val[i] = MathUtil.erf(obj.doubleValue(i + 1)); + val[i] = NormalDistribution.erf(obj.doubleValue(i + 1)); } - return obj.newInstance(val); + return obj.newNumberVector(val); } @Override diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AttributeWiseMinMaxNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java index d9a636ec..4cf3c606 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/AttributeWiseMinMaxNormalization.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.datasource.filter; +package de.lmu.ifi.dbs.elki.datasource.filter.normalization; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,14 +24,13 @@ package de.lmu.ifi.dbs.elki.datasource.filter; */ import java.util.ArrayList; -import java.util.List; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem; import de.lmu.ifi.dbs.elki.utilities.FormatUtil; -import de.lmu.ifi.dbs.elki.utilities.Util; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.AllOrNoneMustBeSetGlobalConstraint; @@ -118,10 +117,13 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte @Override protected V filterSingleObject(V featureVector) { double[] values = new double[featureVector.getDimensionality()]; + if(minima.length != featureVector.getDimensionality()) { + throw new IllegalArgumentException("FeatureVectors and given Minima/Maxima differ in length."); + } for(int d = 1; d <= featureVector.getDimensionality(); d++) { values[d - 1] = (featureVector.doubleValue(d) - minima[d - 1]) / factor(d); } - return featureVector.newInstance(values); + return featureVector.newNumberVector(values); } @Override @@ -131,7 +133,7 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte for(int d = 1; d <= featureVector.getDimensionality(); d++) { values[d - 1] = (featureVector.doubleValue(d) * (factor(d)) + minima[d - 1]); } - return featureVector.newInstance(values); + return featureVector.newNumberVector(values); } else { throw new NonNumericFeaturesException("Attributes cannot be resized: current dimensionality: " + featureVector.getDimensionality() + " former dimensionality: " + maxima.length); @@ -214,13 +216,11 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte super.makeOptions(config); DoubleListParameter minimaP = new DoubleListParameter(MINIMA_ID, true); if(config.grab(minimaP)) { - List<Double> min_list = minimaP.getValue(); - minima = Util.unbox(min_list.toArray(new Double[min_list.size()])); + minima = ArrayLikeUtil.toPrimitiveDoubleArray(minimaP.getValue()); } DoubleListParameter maximaP = new DoubleListParameter(MAXIMA_ID, true); if(config.grab(maximaP)) { - List<Double> max_list = maximaP.getValue(); - maxima = Util.unbox(max_list.toArray(new Double[max_list.size()])); + maxima = ArrayLikeUtil.toPrimitiveDoubleArray(maximaP.getValue()); } ArrayList<Parameter<?, ?>> global_1 = new ArrayList<Parameter<?, ?>>(); diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AttributeWiseVarianceNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java index 3ae2fdad..52a0499f 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/AttributeWiseVarianceNormalization.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.datasource.filter; +package de.lmu.ifi.dbs.elki.datasource.filter.normalization; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.datasource.filter; */ import java.util.ArrayList; -import java.util.List; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; @@ -33,7 +32,7 @@ import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem; import de.lmu.ifi.dbs.elki.utilities.FormatUtil; -import de.lmu.ifi.dbs.elki.utilities.Util; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException; @@ -145,7 +144,7 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> ex for(int d = 1; d <= featureVector.getDimensionality(); d++) { values[d - 1] = normalize(d - 1, featureVector.doubleValue(d)); } - return featureVector.newInstance(values); + return featureVector.newNumberVector(values); } @Override @@ -155,7 +154,7 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> ex for(int d = 1; d <= featureVector.getDimensionality(); d++) { values[d - 1] = restore(d - 1, featureVector.doubleValue(d)); } - return featureVector.newInstance(values); + return featureVector.newNumberVector(values); } else { throw new NonNumericFeaturesException("Attributes cannot be resized: current dimensionality: " + featureVector.getDimensionality() + " former dimensionality: " + mean.length); @@ -163,11 +162,21 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> ex } private double normalize(int d, double val) { - return (val - mean[d]) / stddev[d]; + if(mean.length == 1) { + return (val - mean[0]) / stddev[0]; + } + else { + return (val - mean[d]) / stddev[d]; + } } private double restore(int d, double val) { - return (val * stddev[d]) + mean[d]; + if(mean.length == 1) { + return (val * stddev[0]) + mean[0]; + } + else { + return (val * stddev[d]) + mean[d]; + } } @Override @@ -236,11 +245,8 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> ex config.grab(stddevP); // Note: grab first, then use isDefined, to ensure the stddev is grabbed. if(meanP.isDefined() && stddevP.isDefined()) { - List<Double> mean_list = meanP.getValue(); - List<Double> stddev_list = stddevP.getValue(); - - mean = Util.unbox(mean_list.toArray(new Double[mean_list.size()])); - stddev = Util.unbox(stddev_list.toArray(new Double[stddev_list.size()])); + mean = ArrayLikeUtil.toPrimitiveDoubleArray(meanP.getValue()); + stddev = ArrayLikeUtil.toPrimitiveDoubleArray(stddevP.getValue()); for(double d : stddev) { if(d == 0) { diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/InverseDocumentFrequencyNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java index d8ffd71c..41cce2b9 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/InverseDocumentFrequencyNormalization.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.datasource.filter; +package de.lmu.ifi.dbs.elki.datasource.filter.normalization; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,10 +23,12 @@ package de.lmu.ifi.dbs.elki.datasource.filter; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import gnu.trove.iterator.TIntDoubleIterator; +import gnu.trove.map.TIntDoubleMap; +import gnu.trove.map.hash.TIntDoubleHashMap; +import gnu.trove.map.hash.TIntFloatHashMap; + import java.util.BitSet; -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; import de.lmu.ifi.dbs.elki.data.SparseFloatVector; import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; @@ -44,7 +46,7 @@ public class InverseDocumentFrequencyNormalization extends AbstractNormalization /** * The IDF storage */ - Map<Integer, Number> idf = new HashMap<Integer, Number>(); + TIntDoubleMap idf = new TIntDoubleHashMap(); /** * The number of objects in the dataset @@ -88,18 +90,19 @@ public class InverseDocumentFrequencyNormalization extends AbstractNormalization protected void prepareComplete() { final double dbsize = objcnt; // Compute IDF values - for(Entry<Integer, Number> ent : idf.entrySet()) { + for(TIntDoubleIterator iter = idf.iterator(); iter.hasNext();) { + iter.advance(); // Note: dbsize is a double! - ent.setValue(Math.log(dbsize / ent.getValue().intValue())); + iter.setValue(Math.log(dbsize / iter.value())); } } @Override protected SparseFloatVector filterSingleObject(SparseFloatVector featureVector) { BitSet b = featureVector.getNotNullMask(); - Map<Integer, Float> vals = new HashMap<Integer, Float>(); + TIntFloatHashMap vals = new TIntFloatHashMap(); for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) { - vals.put(i, (float) (featureVector.doubleValue(i) * idf.get(i).doubleValue())); + vals.put(i, (float) (featureVector.doubleValue(i) * idf.get(i))); } return new SparseFloatVector(vals, featureVector.getDimensionality()); } @@ -107,9 +110,9 @@ public class InverseDocumentFrequencyNormalization extends AbstractNormalization @Override public SparseFloatVector restore(SparseFloatVector featureVector) { BitSet b = featureVector.getNotNullMask(); - Map<Integer, Float> vals = new HashMap<Integer, Float>(); + TIntFloatHashMap vals = new TIntFloatHashMap(); for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) { - vals.put(i, (float) (featureVector.doubleValue(i) / idf.get(i).doubleValue())); + vals.put(i, (float) (featureVector.doubleValue(i) / idf.get(i))); } return new SparseFloatVector(vals, featureVector.getDimensionality()); } diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java new file mode 100644 index 00000000..6de7eaba --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java @@ -0,0 +1,115 @@ +package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2011
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DoubleNorm;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Class to perform a normalization on vectors to norm 1.
+ *
+ * @author Heidi Kolb
+ * @author Erich Schubert
+ *
+ * @param <V> vector type
+ */
+public class LengthNormalization<V extends NumberVector<V, ?>> extends AbstractNormalization<V> {
+ /**
+ * Norm to use
+ */
+ DoubleNorm<? super V> norm;
+
+ /**
+ * Constructor
+ *
+ * @param norm Norm to use
+ */
+ public LengthNormalization(DoubleNorm<? super V> norm) {
+ super();
+ this.norm = norm;
+ }
+
+ @Override
+ protected V filterSingleObject(V featureVector) {
+ final double d = norm.doubleNorm(featureVector);
+ return featureVector.newNumberVector(featureVector.getColumnVector().timesEquals(1 / d).getArrayRef());
+ }
+
+ @Override
+ public V restore(V featureVector) throws NonNumericFeaturesException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public LinearEquationSystem transform(LinearEquationSystem linearEquationSystem) {
+ // TODO.
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
+ return TypeUtil.NUMBER_VECTOR_FIELD;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ /**
+ * Option ID for normalization norm
+ */
+ public static final OptionID NORM_ID = OptionID.getOrCreateOptionID("normalization.norm", "Norm (length function) to use for computing the vector length.");
+
+ /**
+ * Norm to use
+ */
+ DoubleNorm<? super V> norm;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<DoubleNorm<? super V>> normP = new ObjectParameter<DoubleNorm<? super V>>(NORM_ID, DoubleNorm.class, EuclideanDistanceFunction.class);
+ if(config.grab(normP)) {
+ norm = normP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected LengthNormalization<V> makeInstance() {
+ return new LengthNormalization<V>(norm);
+ }
+ }
+}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/NonNumericFeaturesException.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/NonNumericFeaturesException.java index 3206518d..9f26482a 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/NonNumericFeaturesException.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/NonNumericFeaturesException.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.datasource.filter; +package de.lmu.ifi.dbs.elki.datasource.filter.normalization; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/Normalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/Normalization.java index 417c4456..96f6bdc1 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/Normalization.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/Normalization.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.datasource.filter; +package de.lmu.ifi.dbs.elki.datasource.filter.normalization; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter; */ import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle; +import de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter; import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem; import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java new file mode 100644 index 00000000..be8c1166 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java @@ -0,0 +1,113 @@ +package de.lmu.ifi.dbs.elki.datasource.filter.normalization; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import de.lmu.ifi.dbs.elki.data.IntegerVector; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation; +import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle; +import de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair; + +/** + * Normalize vectors according to their rank in the attributes. + * + * Note: ranks are multiplied by 2, to be able to give ties an integer rank. + * (e.g. first two records are tied at "1" then, followed by the next on "4") + * + * @author Erich Schubert + */ +public class RankTieNormalization implements ObjectFilter { + /** + * Constructor. + */ + public RankTieNormalization() { + super(); + } + + @Override + public MultipleObjectsBundle filter(MultipleObjectsBundle objects) { + final int len = objects.dataLength(); + MultipleObjectsBundle bundle = new MultipleObjectsBundle(); + + for(int r = 0; r < objects.metaLength(); r++) { + final SimpleTypeInformation<?> type = objects.meta(r); + final List<?> column = objects.getColumn(r); + if(!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) { + bundle.appendColumn(type, column); + continue; + } + @SuppressWarnings("unchecked") + final List<? extends NumberVector<?, ?>> castColumn = (List<? extends NumberVector<?, ?>>) column; + // Get the replacement type information + final int dim = ((VectorFieldTypeInformation<?>) type).dimensionality(); + final VectorFieldTypeInformation<IntegerVector> outType = new VectorFieldTypeInformation<IntegerVector>(IntegerVector.class, dim, IntegerVector.STATIC); + + // Output vectors + int[][] posvecs = new int[len][dim]; + // Sort for each dimension + // TODO: an int[] array would be enough, if we could use a comparator... + DoubleIntPair[] sorter = new DoubleIntPair[len]; + for(int i = 0; i < sorter.length; i++) { + sorter[i] = new DoubleIntPair(Double.NaN, -1); + } + for(int d = 1; d <= dim; d++) { + // fill array + for(int i = 0; i < sorter.length; i++) { + sorter[i].first = castColumn.get(i).doubleValue(d); + sorter[i].second = i; + } + // Sort + Arrays.sort(sorter); + // Transfer positions to output vectors + for(int sta = 0; sta < sorter.length;) { + // Compute ties + int end = sta + 1; + while(end < sorter.length && sorter[sta].first == sorter[end].first) { + end++; + } + final int pos = (sta + end - 1); + for(int i = sta; i < end; i++) { + posvecs[sorter[i].second][d - 1] = pos; + } + sta = end; + } + } + + // Prepare output data + final List<IntegerVector> outColumn = new ArrayList<IntegerVector>(len); + for(int i = 0; i < len; i++) { + outColumn.add(new IntegerVector(posvecs[i])); + } + bundle.appendColumn(outType, outColumn); + } + return bundle; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/TFIDFNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java index 65fab4cb..e279c42c 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/TFIDFNormalization.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.datasource.filter; +package de.lmu.ifi.dbs.elki.datasource.filter.normalization; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,9 +23,9 @@ package de.lmu.ifi.dbs.elki.datasource.filter; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import gnu.trove.map.hash.TIntFloatHashMap; + import java.util.BitSet; -import java.util.HashMap; -import java.util.Map; import de.lmu.ifi.dbs.elki.data.SparseFloatVector; @@ -58,9 +58,9 @@ public class TFIDFNormalization extends InverseDocumentFrequencyNormalization { if(sum <= 0) { sum = 1.0; } - Map<Integer, Float> vals = new HashMap<Integer, Float>(); + TIntFloatHashMap vals = new TIntFloatHashMap(); for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) { - vals.put(i, (float) (featureVector.doubleValue(i) / sum * idf.get(i).doubleValue())); + vals.put(i, (float) (featureVector.doubleValue(i) / sum * idf.get(i))); } return new SparseFloatVector(vals, featureVector.getDimensionality()); } diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/package-info.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/package-info.java new file mode 100644 index 00000000..c0c10a7c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Data normalization.</p> + */ +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2012 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/package-info.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/package-info.java index 0379c7aa..ca52f814 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/filter/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2011 +Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team |