summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/datasource/filter
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/datasource/filter')
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java69
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFromPatternFilter.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/DropNaNFilter.java (renamed from src/de/lmu/ifi/dbs/elki/datasource/filter/NaNFilter.java)10
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java50
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java19
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ReplaceNaNWithRandomFilter.java220
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseCDFNormalization.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java48
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java23
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java10
19 files changed, 422 insertions, 206 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java
index 2109761a..66707da6 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java
@@ -112,8 +112,9 @@ public class ByLabelFilter extends AbstractStreamFilter {
Object l = source.data(lblcol);
if(l instanceof LabelList) {
boolean good = false;
- for(String label : (LabelList) l) {
- if(pattern.matcher(label).matches()) {
+ final LabelList ll = (LabelList) l;
+ for(int i = 0; i < ll.size(); i++) {
+ if(pattern.matcher(ll.get(i)).matches()) {
good = true;
break;
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java
index e8dc69c3..020dcb31 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java
@@ -34,7 +34,6 @@ import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -49,23 +48,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*/
public class ClassLabelFilter implements ObjectFilter {
/**
- * Optional parameter that specifies the index of the label to be used as
- * class label, must be an integer equal to or greater than 0.
- * <p>
- * Key: {@code -dbc.classLabelIndex}
- * </p>
- */
- public static final OptionID CLASS_LABEL_INDEX_ID = new OptionID("dbc.classLabelIndex", "The index of the label to be used as class label.");
-
- /**
- * Parameter to specify the class of occurring class labels.
- * <p>
- * Key: {@code -dbc.classLabelClass}
- * </p>
- */
- public static final OptionID CLASS_LABEL_CLASS_ID = new OptionID("dbc.classLabelClass", "Class label class to use.");
-
- /**
* The index of the label to be used as class label, null if no class label is
* specified.
*/
@@ -94,10 +76,10 @@ public class ClassLabelFilter implements ObjectFilter {
// Find a labellist column
boolean done = false;
boolean keeplabelcol = false;
- for (int i = 0; i < objects.metaLength(); i++) {
+ for(int i = 0; i < objects.metaLength(); i++) {
SimpleTypeInformation<?> meta = objects.meta(i);
// Skip non-labellist columns - or if we already had a labellist
- if (done || !LabelList.class.equals(meta.getRestrictionClass())) {
+ if(done || !LabelList.class.equals(meta.getRestrictionClass())) {
bundle.appendColumn(meta, objects.getColumn(i));
continue;
}
@@ -107,28 +89,39 @@ public class ClassLabelFilter implements ObjectFilter {
List<ClassLabel> clscol = new ArrayList<>(objects.dataLength());
List<LabelList> lblcol = new ArrayList<>(objects.dataLength());
+ ArrayList<String> lbuf = new ArrayList<>();
// Split the column
- for (Object obj : objects.getColumn(i)) {
- if (obj != null) {
+ for(Object obj : objects.getColumn(i)) {
+ if(obj != null) {
LabelList ll = (LabelList) obj;
+ int off = (classLabelIndex >= 0) ? classLabelIndex : (ll.size() - classLabelIndex);
try {
- ClassLabel lbl = classLabelFactory.makeFromString(ll.remove(classLabelIndex));
+ ClassLabel lbl = classLabelFactory.makeFromString(ll.get(off));
clscol.add(lbl);
- } catch (Exception e) {
+ }
+ catch(Exception e) {
throw new AbortException("Cannot initialize class labels: " + e.getMessage(), e);
}
- lblcol.add(ll);
- if (ll.size() > 0) {
+ lbuf.clear();
+ for(int j = 0; j < ll.size(); j++) {
+ if(j == off) {
+ continue;
+ }
+ lbuf.add(ll.get(j));
+ }
+ lblcol.add(LabelList.make(lbuf));
+ if(lbuf.size() > 0) {
keeplabelcol = true;
}
- } else {
+ }
+ else {
clscol.add(null);
lblcol.add(null);
}
}
bundle.appendColumn(classLabelFactory.getTypeInformation(), clscol);
// Only add the label column when it's not empty.
- if (keeplabelcol) {
+ if(keeplabelcol) {
bundle.appendColumn(meta, lblcol);
}
}
@@ -144,6 +137,23 @@ public class ClassLabelFilter implements ObjectFilter {
*/
public static class Parameterizer extends AbstractParameterizer {
/**
+ * Optional parameter that specifies the index of the label to be used as
+ * class label, must be an integer equal to or greater than 0.
+ * <p>
+ * Key: {@code -dbc.classLabelIndex}
+ * </p>
+ */
+ public static final OptionID CLASS_LABEL_INDEX_ID = new OptionID("dbc.classLabelIndex", "The index of the label to be used as class label. The first label is 0, negative indexes are relative to the end.");
+
+ /**
+ * Parameter to specify the class of occurring class labels.
+ * <p>
+ * Key: {@code -dbc.classLabelClass}
+ * </p>
+ */
+ public static final OptionID CLASS_LABEL_CLASS_ID = new OptionID("dbc.classLabelClass", "Class label class to use.");
+
+ /**
* The index of the label to be used as class label, null if no class label
* is specified.
*/
@@ -159,12 +169,11 @@ public class ClassLabelFilter implements ObjectFilter {
super.makeOptions(config);
// parameter class label index
final IntParameter classLabelIndexParam = new IntParameter(CLASS_LABEL_INDEX_ID);
- classLabelIndexParam.addConstraint(new GreaterEqualConstraint(0));
final ObjectParameter<ClassLabel.Factory<?>> classlabelClassParam = new ObjectParameter<>(CLASS_LABEL_CLASS_ID, ClassLabel.Factory.class, SimpleClassLabel.Factory.class);
config.grab(classLabelIndexParam);
config.grab(classlabelClassParam);
- if (classLabelIndexParam.isDefined() && classlabelClassParam.isDefined()) {
+ if(classLabelIndexParam.isDefined() && classlabelClassParam.isDefined()) {
classLabelIndex = classLabelIndexParam.intValue();
classLabelFactory = classlabelClassParam.instantiateClass(config);
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFromPatternFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFromPatternFilter.java
index 97624ac8..517eb301 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFromPatternFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFromPatternFilter.java
@@ -129,8 +129,9 @@ public class ClassLabelFromPatternFilter extends AbstractStreamFilter {
continue;
}
if (o instanceof LabelList) {
- for (String l : (LabelList) o) {
- if (pattern.matcher(l).find()) {
+ final LabelList ll = (LabelList) o;
+ for(int j = 0; j < ll.size(); j++) {
+ if (pattern.matcher(ll.get(j)).find()) {
return positive;
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/NaNFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/DropNaNFilter.java
index 769f3009..fb9cf83e 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/NaNFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/DropNaNFilter.java
@@ -43,11 +43,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
*
* @author Erich Schubert
*/
-public class NaNFilter extends AbstractStreamFilter {
+public class DropNaNFilter extends AbstractStreamFilter {
/**
* Class logger
*/
- private static final Logging LOG = Logging.getLogger(NaNFilter.class);
+ private static final Logging LOG = Logging.getLogger(DropNaNFilter.class);
/**
* Columns to check.
@@ -57,7 +57,7 @@ public class NaNFilter extends AbstractStreamFilter {
/**
* Constructor.
*/
- public NaNFilter() {
+ public DropNaNFilter() {
super();
}
@@ -178,8 +178,8 @@ public class NaNFilter extends AbstractStreamFilter {
*/
public static class Parameterizer extends AbstractParameterizer {
@Override
- protected Object makeInstance() {
- return new NaNFilter();
+ protected DropNaNFilter makeInstance() {
+ return new DropNaNFilter();
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java
index 926ebe99..17538dc9 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java
@@ -33,7 +33,6 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -48,15 +47,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
// TODO: use a non-string class for external ids?
public class ExternalIDFilter implements ObjectFilter {
/**
- * Parameter that specifies the index of the label to be used as external Id,
- * must be an integer equal to or greater than 0.
- * <p>
- * Key: {@code -dbc.externalIdIndex}
- * </p>
- */
- public static final OptionID EXTERNALID_INDEX_ID = new OptionID("dbc.externalIdIndex", "The index of the label to be used as external Id.");
-
- /**
* The index of the label to be used as external Id.
*/
private final int externalIdIndex;
@@ -77,10 +67,10 @@ public class ExternalIDFilter implements ObjectFilter {
// Find a labellist column
boolean done = false;
boolean keeplabelcol = false;
- for (int i = 0; i < objects.metaLength(); i++) {
+ for(int i = 0; i < objects.metaLength(); i++) {
SimpleTypeInformation<?> meta = objects.meta(i);
// Skip non-labellist columns - or if we already had a labellist
- if (done || !LabelList.class.equals(meta.getRestrictionClass())) {
+ if(done || !LabelList.class.equals(meta.getRestrictionClass())) {
bundle.appendColumn(meta, objects.getColumn(i));
continue;
}
@@ -91,15 +81,25 @@ public class ExternalIDFilter implements ObjectFilter {
List<LabelList> lblcol = new ArrayList<>(objects.dataLength());
// Split the column
- for (Object obj : objects.getColumn(i)) {
- if (obj != null) {
+ ArrayList<String> lbuf = new ArrayList<>();
+ for(Object obj : objects.getColumn(i)) {
+ if(obj != null) {
LabelList ll = (LabelList) obj;
- eidcol.add(new ExternalID(ll.remove(externalIdIndex)));
- lblcol.add(ll);
- if (ll.size() > 0) {
+ int off = externalIdIndex >= 0 ? externalIdIndex : (ll.size() - externalIdIndex);
+ eidcol.add(new ExternalID(ll.get(off)));
+ lbuf.clear();
+ for(int j = 0; j < ll.size(); j++) {
+ if(j == off) {
+ continue;
+ }
+ lbuf.add(ll.get(j));
+ }
+ lblcol.add(LabelList.make(lbuf));
+ if(ll.size() > 0) {
keeplabelcol = true;
}
- } else {
+ }
+ else {
eidcol.add(null);
lblcol.add(null);
}
@@ -107,7 +107,7 @@ public class ExternalIDFilter implements ObjectFilter {
bundle.appendColumn(TypeUtil.EXTERNALID, eidcol);
// Only add the label column when it's not empty.
- if (keeplabelcol) {
+ if(keeplabelcol) {
bundle.appendColumn(meta, lblcol);
}
}
@@ -122,14 +122,22 @@ public class ExternalIDFilter implements ObjectFilter {
* @apiviz.exclude
*/
public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Parameter that specifies the index of the label to be used as external
+ * Id, starting at 0. Negative numbers are counted from the end.
+ * <p>
+ * Key: {@code -dbc.externalIdIndex}
+ * </p>
+ */
+ public static final OptionID EXTERNALID_INDEX_ID = new OptionID("dbc.externalIdIndex", "The index of the label to be used as external Id. The first label is 0; negative indexes are relative to the end.");
+
int externalIdIndex = -1;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter externalIdIndexParam = new IntParameter(EXTERNALID_INDEX_ID);
- externalIdIndexParam.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(externalIdIndexParam)) {
+ if(config.grab(externalIdIndexParam)) {
externalIdIndex = externalIdIndexParam.intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java
index 7f09b905..ce02fc29 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java
@@ -42,14 +42,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
public class FixedDBIDsFilter extends AbstractStreamFilter {
/**
- * Optional parameter to specify the first object ID to use.
- * <p>
- * Key: {@code -dbc.startid}
- * </p>
- */
- public static final OptionID IDSTART_ID = new OptionID("dbc.startid", "Object ID to start counting with");
-
- /**
* The filtered meta
*/
BundleMeta meta;
@@ -109,6 +101,13 @@ public class FixedDBIDsFilter extends AbstractStreamFilter {
* @apiviz.exclude
*/
public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Optional parameter to specify the first object ID to use.
+ * <p>
+ * Key: {@code -dbc.startid}
+ * </p>
+ */
+ public static final OptionID IDSTART_ID = new OptionID("dbc.startid", "Object ID to start counting with");
int startid = -1;
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java
index 37f8f8d9..453d294e 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java
@@ -31,7 +31,7 @@ import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
@@ -75,7 +75,7 @@ public class HistogramJitterFilter<V extends NumberVector<?>> extends AbstractVe
public HistogramJitterFilter(double jitter, RandomFactory rnd) {
super();
this.jitter = jitter;
- this.rnd = new ExponentialDistribution(1, rnd.getRandom());
+ this.rnd = new ExponentialDistribution(1, rnd.getSingleThreadedRandom());
}
@Override
@@ -83,7 +83,7 @@ public class HistogramJitterFilter<V extends NumberVector<?>> extends AbstractVe
final int dim = obj.getDimensionality();
// Compute the total sum.
double osum = 0;
- for (int i = 0; i < dim; i++) {
+ for(int i = 0; i < dim; i++) {
osum += obj.doubleValue(i);
}
// Actual maximum jitter amount:
@@ -91,13 +91,13 @@ public class HistogramJitterFilter<V extends NumberVector<?>> extends AbstractVe
// Generate jitter vector
double[] raw = new double[dim];
double jsum = 0; // Sum of jitter
- for (int i = 0; i < raw.length; i++) {
+ for(int i = 0; i < raw.length; i++) {
raw[i] = rnd.nextRandom() * maxjitter;
jsum += raw[i];
}
final double mix = jsum / osum;
// Combine the two vector
- for (int i = 0; i < raw.length; i++) {
+ for(int i = 0; i < raw.length; i++) {
raw[i] = raw[i] + (1 - mix) * obj.doubleValue(i);
}
return factory.newNumberVector(raw);
@@ -146,12 +146,12 @@ public class HistogramJitterFilter<V extends NumberVector<?>> extends AbstractVe
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter jitterP = new DoubleParameter(JITTER_ID);
- jitterP.addConstraint(new GreaterEqualConstraint(Double.valueOf(0.0)));
- if (config.grab(jitterP)) {
+ jitterP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(jitterP)) {
jitter = jitterP.getValue().doubleValue();
}
RandomParameter rndP = new RandomParameter(SEED_ID);
- if (config.grab(rndP)) {
+ if(config.grab(rndP)) {
rnd = rndP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java
index 5c8d07d0..a7e44d4d 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java
@@ -29,8 +29,7 @@ import de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta;
import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
@@ -60,7 +59,7 @@ public class RandomSamplingStreamFilter extends AbstractStreamFilter {
public RandomSamplingStreamFilter(double prob, RandomFactory rnd) {
super();
this.prob = prob;
- this.random = rnd.getRandom();
+ this.random = rnd.getSingleThreadedRandom();
}
@Override
@@ -75,15 +74,15 @@ public class RandomSamplingStreamFilter extends AbstractStreamFilter {
@Override
public Event nextEvent() {
- while (true) {
+ while(true) {
Event ev = source.nextEvent();
- switch(ev) {
+ switch(ev){
case END_OF_STREAM:
return ev;
case META_CHANGED:
return ev;
case NEXT_OBJECT:
- if (random.nextDouble() < prob) {
+ if(random.nextDouble() < prob) {
return ev;
}
continue;
@@ -123,13 +122,13 @@ public class RandomSamplingStreamFilter extends AbstractStreamFilter {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter probP = new DoubleParameter(PROB_ID);
- probP.addConstraint(new GreaterEqualConstraint(0.0));
- probP.addConstraint(new LessEqualConstraint(1.0));
- if (config.grab(probP)) {
+ probP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ probP.addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
+ if(config.grab(probP)) {
prob = probP.getValue().doubleValue();
}
RandomParameter rndP = new RandomParameter(SEED_ID);
- if (config.grab(rndP)) {
+ if(config.grab(rndP)) {
rnd = rndP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ReplaceNaNWithRandomFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ReplaceNaNWithRandomFilter.java
new file mode 100644
index 00000000..9029d8ea
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ReplaceNaNWithRandomFilter.java
@@ -0,0 +1,220 @@
+package de.lmu.ifi.dbs.elki.datasource.filter;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta;
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * A filter to replace all NaN values.
+ *
+ * Note: currently, only dense vector columns are supported.
+ *
+ * TODO: add support for sparse vectors.
+ *
+ * @author Erich Schubert
+ */
+public class ReplaceNaNWithRandomFilter extends AbstractStreamFilter {
+ /**
+ * Class logger
+ */
+ private static final Logging LOG = Logging.getLogger(ReplaceNaNWithRandomFilter.class);
+
+ /**
+ * Columns to check.
+ */
+ private NumberVector.Factory<?, ?>[] densecols = null;
+
+ /**
+ * Distribution to generate replacement values with.
+ */
+ private Distribution dist;
+
+ /**
+ * Row cache.
+ */
+ private ArrayList<Object> rows = new ArrayList<>();
+
+ /**
+ * Constructor.
+ */
+ public ReplaceNaNWithRandomFilter(Distribution dist) {
+ super();
+ this.dist = dist;
+ }
+
+ @Override
+ public BundleMeta getMeta() {
+ return source.getMeta();
+ }
+
+ @Override
+ public Object data(int rnum) {
+ return rows.get(rnum);
+ }
+
+ @Override
+ public Event nextEvent() {
+ while (true) {
+ Event ev = source.nextEvent();
+ switch(ev) {
+ case END_OF_STREAM:
+ return ev;
+ case META_CHANGED:
+ updateMeta(source.getMeta());
+ return ev;
+ case NEXT_OBJECT:
+ if (densecols == null) {
+ updateMeta(source.getMeta());
+ }
+ rows.clear();
+ for (int j = 0; j < densecols.length; j++) {
+ Object o = source.data(j);
+ if (densecols[j] != null) {
+ NumberVector<?> v = (NumberVector<?>) o;
+ double[] ro = null; // replacement
+ if (v != null) {
+ for (int i = 0; i < v.getDimensionality(); i++) {
+ if (Double.isNaN(v.doubleValue(i))) {
+ if (ro != null) {
+ ro = v.getColumnVector().getArrayRef();
+ }
+ ro[i] = dist.nextRandom();
+ }
+ }
+ }
+ o = densecols[j].newNumberVector(ro);
+ }
+ rows.add(o);
+ }
+ return ev;
+ }
+ }
+ }
+
+ /**
+ * Process an updated meta record.
+ *
+ * @param meta Meta record
+ */
+ private void updateMeta(BundleMeta meta) {
+ final int cols = meta.size();
+ densecols = new NumberVector.Factory<?, ?>[cols];
+ for (int i = 0; i < cols; i++) {
+ if (TypeUtil.SPARSE_VECTOR_VARIABLE_LENGTH.isAssignableFromType(meta.get(i))) {
+ throw new AbortException("Filtering sparse vectors is not yet supported by this filter. Please contribute.");
+ }
+ if (TypeUtil.FLOAT_VECTOR_FIELD.isAssignableFromType(meta.get(i))) {
+ VectorFieldTypeInformation<?> vmeta = (VectorFieldTypeInformation<?>) meta.get(i);
+ densecols[i] = (NumberVector.Factory<?, ?>) vmeta.getFactory();
+ continue;
+ }
+ if (TypeUtil.DOUBLE_VECTOR_FIELD.isAssignableFromType(meta.get(i))) {
+ VectorFieldTypeInformation<?> vmeta = (VectorFieldTypeInformation<?>) meta.get(i);
+ densecols[i] = (NumberVector.Factory<?, ?>) vmeta.getFactory();
+ continue;
+ }
+ }
+ }
+
+ @Override
+ public MultipleObjectsBundle filter(final MultipleObjectsBundle objects) {
+ if (LOG.isDebuggingFinest()) {
+ LOG.debugFinest("Removing records with NaN values.");
+ }
+
+ updateMeta(objects.meta());
+ MultipleObjectsBundle bundle = new MultipleObjectsBundle();
+ for (int j = 0; j < objects.metaLength(); j++) {
+ bundle.appendColumn(objects.meta(j), new ArrayList<>());
+ }
+ for (int i = 0; i < objects.dataLength(); i++) {
+ final Object[] row = objects.getRow(i);
+ for (int j = 0; j < densecols.length; j++) {
+ if (densecols[j] != null) {
+ NumberVector<?> v = (NumberVector<?>) row[j];
+ double[] ro = null; // replacement
+ if (v != null) {
+ for (int d = 0; d < v.getDimensionality(); d++) {
+ if (Double.isNaN(v.doubleValue(d))) {
+ if (ro != null) {
+ ro = v.getColumnVector().getArrayRef();
+ }
+ ro[d] = dist.nextRandom();
+ }
+ }
+ }
+ row[j] = densecols[j].newNumberVector(ro);
+ }
+ }
+ bundle.appendSimple(row);
+ }
+ return bundle;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Parameter to specify the distribution to sample replacement values from.
+ */
+ public static final OptionID REPLACEMENT_DISTRIBUTION = new OptionID("nanfilter.replacement", "Distribution to sample replacement values from.");
+
+ /**
+ * Distribution to generate replacement values with.
+ */
+ private Distribution dist;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<Distribution> distP = new ObjectParameter<>(REPLACEMENT_DISTRIBUTION, Distribution.class);
+ if (config.grab(distP)) {
+ dist = distP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected ReplaceNaNWithRandomFilter makeInstance() {
+ return new ReplaceNaNWithRandomFilter(dist);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java
index b8bf968b..8afa8290 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java
@@ -76,7 +76,7 @@ public class ShuffleObjectsFilter implements ObjectFilter {
if (LOG.isDebugging()) {
LOG.debug("Shuffling the data set");
}
- final Random random = rnd.getRandom();
+ final Random random = rnd.getSingleThreadedRandom();
final int size = objects.dataLength();
final int[] offsets = new int[size];
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java
index 8146bd5b..6ac046ec 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java
@@ -35,8 +35,7 @@ import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListEachConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
@@ -67,17 +66,17 @@ public class SplitNumberVectorFilter<V extends NumberVector<?>> implements Objec
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
- if (objects.dataLength() == 0) {
+ if(objects.dataLength() == 0) {
return objects;
}
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
- for (int r = 0; r < objects.metaLength(); r++) {
+ for(int r = 0; r < objects.metaLength(); r++) {
@SuppressWarnings("unchecked")
SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
@SuppressWarnings("unchecked")
final List<Object> column = (List<Object>) objects.getColumn(r);
- if (!getInputTypeRestriction().isAssignableFromType(type)) {
+ if(!getInputTypeRestriction().isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
@@ -98,16 +97,16 @@ public class SplitNumberVectorFilter<V extends NumberVector<?>> implements Objec
int[] odims = new int[vtype.getDimensionality() - dims.length];
{
int i = 0;
- for (int d = 0; d < vtype.getDimensionality(); d++) {
+ for(int d = 0; d < vtype.getDimensionality(); d++) {
boolean found = false;
- for (int j = 0; j < dims.length; j++) {
- if (dims[j] == d) {
+ for(int j = 0; j < dims.length; j++) {
+ if(dims[j] == d) {
found = true;
break;
}
}
- if (!found) {
- if (i >= odims.length) {
+ if(!found) {
+ if(i >= odims.length) {
throw new AbortException("Dimensionalities not proper!");
}
odims[i] = d;
@@ -116,15 +115,15 @@ public class SplitNumberVectorFilter<V extends NumberVector<?>> implements Objec
}
}
// Splitting scan.
- for (int i = 0; i < objects.dataLength(); i++) {
+ for(int i = 0; i < objects.dataLength(); i++) {
@SuppressWarnings("unchecked")
final V obj = (V) column.get(i);
double[] part1 = new double[dims.length];
double[] part2 = new double[obj.getDimensionality() - dims.length];
- for (int d = 0; d < dims.length; d++) {
+ for(int d = 0; d < dims.length; d++) {
part1[d] = obj.doubleValue(dims[d]);
}
- for (int d = 0; d < odims.length; d++) {
+ for(int d = 0; d < odims.length; d++) {
part2[d] = obj.doubleValue(odims[d]);
}
col1.add(factory.newNumberVector(part1));
@@ -142,7 +141,7 @@ public class SplitNumberVectorFilter<V extends NumberVector<?>> implements Objec
private TypeInformation getInputTypeRestriction() {
// Find maximum dimension requested
int m = dims[0];
- for (int i = 1; i < dims.length; i++) {
+ for(int i = 1; i < dims.length; i++) {
m = Math.max(dims[i], m);
}
return new VectorFieldTypeInformation<>(NumberVector.class, m, Integer.MAX_VALUE);
@@ -170,11 +169,11 @@ public class SplitNumberVectorFilter<V extends NumberVector<?>> implements Objec
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntListParameter selectedAttributesP = new IntListParameter(SELECTED_ATTRIBUTES_ID);
- selectedAttributesP.addConstraint(new ListEachConstraint<Integer>(new GreaterEqualConstraint(0)));
- if (config.grab(selectedAttributesP)) {
+ selectedAttributesP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT_LIST);
+ if(config.grab(selectedAttributesP)) {
List<Integer> dimensionList = selectedAttributesP.getValue();
dims = new int[dimensionList.size()];
- for (int i = 0; i < dimensionList.size(); i++) {
+ for(int i = 0; i < dimensionList.size(); i++) {
dims[i] = dimensionList.get(i).intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseCDFNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseCDFNormalization.java
index 8fd46336..dd86cc5a 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseCDFNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseCDFNormalization.java
@@ -123,7 +123,7 @@ public class AttributeWiseCDFNormalization<V extends NumberVector<?>> implements
// We iterate over dimensions, this kind of filter needs fast random
// access.
- Adapter<V> adapter = new Adapter<>();
+ Adapter adapter = new Adapter();
for (int d = 0; d < dim; d++) {
adapter.dim = d;
if (estimators.size() == 1) {
@@ -208,50 +208,56 @@ public class AttributeWiseCDFNormalization<V extends NumberVector<?>> implements
return result.toString();
}
- private static class Adapter<V extends NumberVector<?>> implements NumberArrayAdapter<Double, List<V>> {
+ /**
+ * Array adapter class for vectors.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private static class Adapter implements NumberArrayAdapter<Double, List<? extends NumberVector<?>>> {
/**
* Dimension to process.
*/
-
int dim;
@Override
- public int size(List<V> array) {
+ public int size(List<? extends NumberVector<?>> array) {
return array.size();
}
@Override
- public Double get(List<V> array, int off) throws IndexOutOfBoundsException {
+ public Double get(List<? extends NumberVector<?>> array, int off) throws IndexOutOfBoundsException {
return getDouble(array, off);
}
@Override
- public double getDouble(List<V> array, int off) throws IndexOutOfBoundsException {
+ public double getDouble(List<? extends NumberVector<?>> array, int off) throws IndexOutOfBoundsException {
return array.get(off).doubleValue(dim);
}
@Override
- public float getFloat(List<V> array, int off) throws IndexOutOfBoundsException {
+ public float getFloat(List<? extends NumberVector<?>> array, int off) throws IndexOutOfBoundsException {
return array.get(off).floatValue(dim);
}
@Override
- public int getInteger(List<V> array, int off) throws IndexOutOfBoundsException {
+ public int getInteger(List<? extends NumberVector<?>> array, int off) throws IndexOutOfBoundsException {
return array.get(off).intValue(dim);
}
@Override
- public short getShort(List<V> array, int off) throws IndexOutOfBoundsException {
+ public short getShort(List<? extends NumberVector<?>> array, int off) throws IndexOutOfBoundsException {
return array.get(off).shortValue(dim);
}
@Override
- public long getLong(List<V> array, int off) throws IndexOutOfBoundsException {
+ public long getLong(List<? extends NumberVector<?>> array, int off) throws IndexOutOfBoundsException {
return array.get(off).longValue(dim);
}
@Override
- public byte getByte(List<V> array, int off) throws IndexOutOfBoundsException {
+ public byte getByte(List<? extends NumberVector<?>> array, int off) throws IndexOutOfBoundsException {
return array.get(off).byteValue(dim);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java
index 31f72660..47b6db5f 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java
@@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.ArrayList;
-
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -38,8 +36,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.AllOrNoneMustBeS
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.EqualSizeGlobalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleListParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ListParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Parameter;
/**
* Class to perform and undo a normalization on real vectors with respect to
@@ -97,24 +93,24 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
@Override
protected void prepareProcessInstance(V featureVector) {
// First object? Then initialize.
- if (minima.length == 0 || maxima.length == 0) {
+ if(minima.length == 0 || maxima.length == 0) {
int dimensionality = featureVector.getDimensionality();
minima = new double[dimensionality];
maxima = new double[dimensionality];
- for (int i = 0; i < dimensionality; i++) {
+ for(int i = 0; i < dimensionality; i++) {
maxima[i] = -Double.MAX_VALUE;
minima[i] = Double.MAX_VALUE;
}
}
- if (minima.length != featureVector.getDimensionality()) {
+ if(minima.length != featureVector.getDimensionality()) {
throw new IllegalArgumentException("FeatureVectors differ in length.");
}
- for (int d = 0; d < featureVector.getDimensionality(); d++) {
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
final double val = featureVector.doubleValue(d);
- if (val > maxima[d]) {
+ if(val > maxima[d]) {
maxima[d] = val;
}
- if (val < minima[d]) {
+ if(val < minima[d]) {
minima[d] = val;
}
}
@@ -123,10 +119,10 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
@Override
protected V filterSingleObject(V featureVector) {
double[] values = new double[featureVector.getDimensionality()];
- if (minima.length != featureVector.getDimensionality()) {
+ if(minima.length != featureVector.getDimensionality()) {
throw new IllegalArgumentException("FeatureVectors and given Minima/Maxima differ in length.");
}
- for (int d = 0; d < featureVector.getDimensionality(); d++) {
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
values[d] = (featureVector.doubleValue(d) - minima[d]) / factor(d);
}
return factory.newNumberVector(values);
@@ -134,13 +130,14 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
@Override
public V restore(V featureVector) throws NonNumericFeaturesException {
- if (featureVector.getDimensionality() == maxima.length && featureVector.getDimensionality() == minima.length) {
+ if(featureVector.getDimensionality() == maxima.length && featureVector.getDimensionality() == minima.length) {
double[] values = new double[featureVector.getDimensionality()];
- for (int d = 0; d < featureVector.getDimensionality(); d++) {
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
values[d] = (featureVector.doubleValue(d) * (factor(d)) + minima[d]);
}
return factory.newNumberVector(values);
- } else {
+ }
+ else {
throw new NonNumericFeaturesException("Attributes cannot be resized: current dimensionality: " + featureVector.getDimensionality() + " former dimensionality: " + maxima.length);
}
}
@@ -166,10 +163,10 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
int[] row = linearEquationSystem.getRowPermutations();
int[] col = linearEquationSystem.getColumnPermutations();
- for (int i = 0; i < coeff.length; i++) {
- for (int r = 0; r < coeff.length; r++) {
+ for(int i = 0; i < coeff.length; i++) {
+ for(int r = 0; r < coeff.length; r++) {
double sum = 0.0;
- for (int c = 0; c < coeff[0].length; c++) {
+ for(int c = 0; c < coeff[0].length; c++) {
sum += minima[c] * coeff[row[r]][col[c]] / factor(c);
coeff[row[r]][col[c]] = coeff[row[r]][col[c]] / factor(c);
}
@@ -224,23 +221,16 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleListParameter minimaP = new DoubleListParameter(MINIMA_ID, true);
- if (config.grab(minimaP)) {
+ if(config.grab(minimaP)) {
minima = ArrayLikeUtil.toPrimitiveDoubleArray(minimaP.getValue());
}
DoubleListParameter maximaP = new DoubleListParameter(MAXIMA_ID, true);
- if (config.grab(maximaP)) {
+ if(config.grab(maximaP)) {
maxima = ArrayLikeUtil.toPrimitiveDoubleArray(maximaP.getValue());
}
- ArrayList<Parameter<?>> global_1 = new ArrayList<>();
- global_1.add(minimaP);
- global_1.add(maximaP);
- config.checkConstraint(new AllOrNoneMustBeSetGlobalConstraint(global_1));
-
- ArrayList<ListParameter<?>> global = new ArrayList<>();
- global.add(minimaP);
- global.add(maximaP);
- config.checkConstraint(new EqualSizeGlobalConstraint(global));
+ config.checkConstraint(new AllOrNoneMustBeSetGlobalConstraint(minimaP, maximaP));
+ config.checkConstraint(new EqualSizeGlobalConstraint(minimaP, maximaP));
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java
index 072d1a68..a24cae25 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java
@@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.ArrayList;
-
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -40,8 +38,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.AllOrNoneMustBeS
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.EqualSizeGlobalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleListParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ListParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Parameter;
/**
* Class to perform and undo a normalization on real vectors with respect to
@@ -186,6 +182,7 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<?>> exten
/**
* Restore a single dimension.
+ *
* @param d Dimension
* @param val Value
* @return Normalized value
@@ -280,15 +277,8 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<?>> exten
}
}
- ArrayList<Parameter<?>> global_1 = new ArrayList<>();
- global_1.add(meanP);
- global_1.add(stddevP);
- config.checkConstraint(new AllOrNoneMustBeSetGlobalConstraint(global_1));
-
- ArrayList<ListParameter<?>> global = new ArrayList<>();
- global.add(meanP);
- global.add(stddevP);
- config.checkConstraint(new EqualSizeGlobalConstraint(global));
+ config.checkConstraint(new AllOrNoneMustBeSetGlobalConstraint(meanP, stddevP));
+ config.checkConstraint(new EqualSizeGlobalConstraint(meanP, stddevP));
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java
index 94bcb32f..21263890 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java
@@ -26,9 +26,6 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
import gnu.trove.iterator.TIntDoubleIterator;
import gnu.trove.map.TIntDoubleMap;
import gnu.trove.map.hash.TIntDoubleHashMap;
-
-import java.util.BitSet;
-
import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -78,10 +75,10 @@ public class InverseDocumentFrequencyNormalization<V extends SparseNumberVector<
@Override
protected void prepareProcessInstance(V featureVector) {
- BitSet b = featureVector.getNotNullMask();
- for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
- if(featureVector.doubleValue(i) >= 0.0) {
- idf.put(i, idf.get(i) + 1);
+ for(int it = featureVector.iter(); featureVector.iterValid(it); it = featureVector.iterAdvance(it)) {
+ if(featureVector.iterDoubleValue(it) >= 0.) {
+ final int dim = featureVector.iterDim(it);
+ idf.put(dim, idf.get(dim) + 1);
}
}
objcnt += 1;
@@ -100,20 +97,20 @@ public class InverseDocumentFrequencyNormalization<V extends SparseNumberVector<
@Override
protected V filterSingleObject(V featureVector) {
- BitSet b = featureVector.getNotNullMask();
TIntDoubleHashMap vals = new TIntDoubleHashMap();
- for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
- vals.put(i, (float) (featureVector.doubleValue(i) * idf.get(i)));
+ for(int it = featureVector.iter(); featureVector.iterValid(it); it = featureVector.iterAdvance(it)) {
+ final int dim = featureVector.iterDim(it);
+ vals.put(dim, featureVector.iterDoubleValue(it) * idf.get(dim));
}
return ((SparseNumberVector.Factory<V, ?>) factory).newNumberVector(vals, featureVector.getDimensionality());
}
@Override
public V restore(V featureVector) {
- BitSet b = featureVector.getNotNullMask();
TIntDoubleHashMap vals = new TIntDoubleHashMap();
- for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
- vals.put(i, (float) (featureVector.doubleValue(i) / idf.get(i)));
+ for(int it = featureVector.iter(); featureVector.iterValid(it); it = featureVector.iterAdvance(it)) {
+ final int dim = featureVector.iterDim(it);
+ vals.put(dim, featureVector.iterDoubleValue(it) / idf.get(dim));
}
return ((SparseNumberVector.Factory<V, ?>) factory).newNumberVector(vals, featureVector.getDimensionality());
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java
index 5110d6fe..09b73aa4 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java
@@ -24,9 +24,6 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
*/
import gnu.trove.map.hash.TIntDoubleHashMap;
-
-import java.util.BitSet;
-
import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -58,17 +55,17 @@ public class TFIDFNormalization<V extends SparseNumberVector<?>> extends Inverse
@Override
protected V filterSingleObject(V featureVector) {
- BitSet b = featureVector.getNotNullMask();
double sum = 0.0;
- for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
- sum += featureVector.doubleValue(i);
+ for(int it = featureVector.iter(); featureVector.iterValid(it); it = featureVector.iterAdvance(it)) {
+ sum += featureVector.iterDoubleValue(it);
}
if(sum <= 0) {
sum = 1.0;
}
TIntDoubleHashMap vals = new TIntDoubleHashMap();
- for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
- vals.put(i, (float) (featureVector.doubleValue(i) / sum * idf.get(i)));
+ for(int it = featureVector.iter(); featureVector.iterValid(it); it = featureVector.iterAdvance(it)) {
+ final int dim = featureVector.iterDim(it);
+ vals.put(dim, featureVector.iterDoubleValue(it) / sum * idf.get(dim));
}
return ((SparseNumberVector.Factory<V, ?>) factory).newNumberVector(vals, featureVector.getDimensionality());
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java
index 742eb977..462db9eb 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java
@@ -46,7 +46,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -62,7 +62,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberVector<?>> implements ObjectFilter {
/**
- * r: the dimension to which the data should be reduced
+ * The dimensionality to which the data should be reduced.
*/
protected int tdim;
@@ -79,23 +79,23 @@ public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberV
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
final int dataLength = objects.dataLength();
- if (dataLength == 0) {
+ if(dataLength == 0) {
return objects;
}
List<? extends ClassLabel> classcolumn = null;
// First of all, identify a class label column.
- for (int r = 0; r < objects.metaLength(); r++) {
+ for(int r = 0; r < objects.metaLength(); r++) {
SimpleTypeInformation<?> type = objects.meta(r);
List<?> column = objects.getColumn(r);
- if (TypeUtil.CLASSLABEL.isAssignableFromType(type)) {
+ if(TypeUtil.CLASSLABEL.isAssignableFromType(type)) {
@SuppressWarnings("unchecked")
final List<? extends ClassLabel> castcolumn = (List<? extends ClassLabel>) column;
classcolumn = castcolumn;
break;
}
}
- if (classcolumn == null) {
+ if(classcolumn == null) {
getLogger().warning("No class label column found (try " + ClassLabelFilter.class.getSimpleName() + ") -- cannot run " + this.getClass().getSimpleName());
return objects;
}
@@ -103,10 +103,10 @@ public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberV
boolean somesuccess = false;
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
// Secondly, look for columns to train the projection on.
- for (int r = 0; r < objects.metaLength(); r++) {
+ for(int r = 0; r < objects.metaLength(); r++) {
SimpleTypeInformation<?> type = objects.meta(r);
List<?> column = objects.getColumn(r);
- if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
+ if(!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
@@ -117,8 +117,8 @@ public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberV
NumberVector.Factory<V, ?> factory = (NumberVector.Factory<V, ?>) vtype.getFactory();
int dim = vtype.getDimensionality();
- if (tdim > dim) {
- if (getLogger().isVerbose()) {
+ if(tdim > dim) {
+ if(getLogger().isVerbose()) {
getLogger().verbose("Setting projection dimension to original dimension: projection dimension: " + tdim + " larger than original dimension: " + dim);
}
tdim = dim;
@@ -126,21 +126,22 @@ public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberV
try {
Matrix proj = computeProjectionMatrix(vectorcolumn, classcolumn, dim);
- for (int i = 0; i < dataLength; i++) {
+ for(int i = 0; i < dataLength; i++) {
final Vector pv = proj.times(vectorcolumn.get(i).getColumnVector());
V filteredObj = factory.newNumberVector(pv, ArrayLikeUtil.VECTORADAPTER);
vectorcolumn.set(i, filteredObj);
}
bundle.appendColumn(convertedType(type, factory), column);
somesuccess = true;
- } catch (Exception e) {
+ }
+ catch(Exception e) {
getLogger().error("Projection failed -- continuing with unprojected data!", e);
bundle.appendColumn(type, column);
continue;
}
}
- if (!somesuccess) {
+ if(!somesuccess) {
getLogger().warning("No vector field of fixed dimensionality found.");
return objects;
}
@@ -179,15 +180,15 @@ public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberV
* Partition the bundle based on the class label.
*
* @param classcolumn
- * @return
+ * @return Partitioned data set.
*/
protected <O> Map<O, TIntList> partition(List<? extends O> classcolumn) {
Map<O, TIntList> classes = new HashMap<>();
Iterator<? extends O> iter = classcolumn.iterator();
- for (int i = 0; iter.hasNext(); i++) {
+ for(int i = 0; iter.hasNext(); i++) {
O lbl = iter.next();
TIntList ids = classes.get(lbl);
- if (ids == null) {
+ if(ids == null) {
ids = new TIntArrayList();
classes.put(lbl, ids);
}
@@ -220,9 +221,9 @@ public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberV
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter dimP = new IntParameter(P_ID, 2);
- dimP.addConstraint(new GreaterConstraint(0));
+ dimP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
- if (config.grab(dimP)) {
+ if(config.grab(dimP)) {
tdim = dimP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java
index 720c88df..e6d0d15d 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java
@@ -35,8 +35,7 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.filter.AbstractVectorStreamConversionFilter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListEachConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
@@ -150,11 +149,11 @@ public class NumberVectorFeatureSelectionFilter<V extends NumberVector<?>> exten
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntListParameter selectedAttributesP = new IntListParameter(SELECTED_ATTRIBUTES_ID);
- selectedAttributesP.addConstraint(new ListEachConstraint<Integer>(new GreaterEqualConstraint(0)));
- if (config.grab(selectedAttributesP)) {
+ selectedAttributesP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT_LIST);
+ if(config.grab(selectedAttributesP)) {
selectedAttributes = new BitSet();
List<Integer> dimensionList = selectedAttributesP.getValue();
- for (int d : dimensionList) {
+ for(int d : dimensionList) {
selectedAttributes.set(d);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java
index 9b1ddbff..4086270c 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java
@@ -36,7 +36,7 @@ import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.Util;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
@@ -106,7 +106,7 @@ public class NumberVectorRandomFeatureSelectionFilter<V extends NumberVector<?>>
*/
void initializeRandomAttributes(SimpleTypeInformation<V> in) {
int d = ((VectorFieldTypeInformation<V>) in).getDimensionality();
- selectedAttributes = Util.randomBitSet(k, d, rnd.getRandom());
+ selectedAttributes = Util.randomBitSet(k, d, rnd.getSingleThreadedRandom());
}
/**
@@ -156,12 +156,12 @@ public class NumberVectorRandomFeatureSelectionFilter<V extends NumberVector<?>>
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter kP = new IntParameter(NUMBER_SELECTED_ATTRIBUTES_ID, 1);
- kP.addConstraint(new GreaterEqualConstraint(1));
- if (config.grab(kP)) {
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kP)) {
k = kP.getValue().intValue();
}
RandomParameter rndP = new RandomParameter(SEED_ID);
- if (config.grab(rndP)) {
+ if(config.grab(rndP)) {
rnd = rndP.getValue();
}
}