summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/datasource/filter/normalization')
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractStreamNormalization.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java45
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java51
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java18
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java6
9 files changed, 97 insertions, 85 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java
index 5b6c02e3..2dcf09f8 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java
@@ -23,9 +23,10 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
-import de.lmu.ifi.dbs.elki.datasource.filter.AbstractConversionFilter;
+import de.lmu.ifi.dbs.elki.datasource.filter.AbstractVectorConversionFilter;
import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
/**
@@ -35,7 +36,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
*
* @param <O> Object type processed
*/
-public abstract class AbstractNormalization<O> extends AbstractConversionFilter<O, O> implements Normalization<O> {
+public abstract class AbstractNormalization<O extends NumberVector<?>> extends AbstractVectorConversionFilter<O, O> implements Normalization<O> {
/**
* Initializes the option handler and the parameter map.
*/
@@ -45,11 +46,12 @@ public abstract class AbstractNormalization<O> extends AbstractConversionFilter<
@Override
protected SimpleTypeInformation<? super O> convertedType(SimpleTypeInformation<O> in) {
+ initializeOutputType(in);
return in;
}
@Override
- public MultipleObjectsBundle normalizeObjects(MultipleObjectsBundle objects) throws NonNumericFeaturesException {
+ public MultipleObjectsBundle normalizeObjects(MultipleObjectsBundle objects) {
return super.filter(objects);
}
@@ -61,8 +63,6 @@ public abstract class AbstractNormalization<O> extends AbstractConversionFilter<
@Override
public String toString() {
- StringBuffer result = new StringBuffer();
- result.append("normalization class: ").append(getClass().getName());
- return result.toString();
+ return getClass().getName();
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractStreamNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractStreamNormalization.java
index c1524788..a1e2c55e 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractStreamNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractStreamNormalization.java
@@ -23,9 +23,10 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
-import de.lmu.ifi.dbs.elki.datasource.filter.AbstractStreamConversionFilter;
+import de.lmu.ifi.dbs.elki.datasource.filter.AbstractVectorStreamConversionFilter;
import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
/**
@@ -35,7 +36,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
*
* @param <O> Object type processed
*/
-public abstract class AbstractStreamNormalization<O> extends AbstractStreamConversionFilter<O, O> implements Normalization<O> {
+public abstract class AbstractStreamNormalization<O extends NumberVector<?>> extends AbstractVectorStreamConversionFilter<O, O> implements Normalization<O> {
/**
* Initializes the option handler and the parameter map.
*/
@@ -45,11 +46,12 @@ public abstract class AbstractStreamNormalization<O> extends AbstractStreamConve
@Override
protected SimpleTypeInformation<? super O> convertedType(SimpleTypeInformation<O> in) {
+ initializeOutputType(in);
return in;
}
@Override
- public MultipleObjectsBundle normalizeObjects(MultipleObjectsBundle objects) throws NonNumericFeaturesException {
+ public MultipleObjectsBundle normalizeObjects(MultipleObjectsBundle objects) {
return super.filter(objects);
}
@@ -61,7 +63,7 @@ public abstract class AbstractStreamNormalization<O> extends AbstractStreamConve
@Override
public String toString() {
- StringBuffer result = new StringBuffer();
+ StringBuilder result = new StringBuilder();
result.append("normalization class: ").append(getClass().getName());
return result.toString();
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java
index c0f2a955..f5e24bca 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java
@@ -38,7 +38,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
*
* @apiviz.uses NumberVector
*/
-public class AttributeWiseErfNormalization<O extends NumberVector<O, ?>> extends AbstractNormalization<O> {
+public class AttributeWiseErfNormalization<O extends NumberVector<?>> extends AbstractNormalization<O> {
/**
* Constructor.
*/
@@ -47,7 +47,7 @@ public class AttributeWiseErfNormalization<O extends NumberVector<O, ?>> extends
}
@Override
- public O restore(O featureVector) throws NonNumericFeaturesException {
+ public O restore(O featureVector) {
throw new UnsupportedOperationException("Not implemented yet.");
}
@@ -55,9 +55,9 @@ public class AttributeWiseErfNormalization<O extends NumberVector<O, ?>> extends
protected O filterSingleObject(O obj) {
double[] val = new double[obj.getDimensionality()];
for(int i = 0; i < val.length; i++) {
- val[i] = NormalDistribution.erf(obj.doubleValue(i + 1));
+ val[i] = NormalDistribution.erf(obj.doubleValue(i));
}
- return obj.newNumberVector(val);
+ return factory.newNumberVector(val);
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java
index 4cf3c606..62c0bf12 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java
@@ -50,16 +50,16 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Parameter;
* @apiviz.uses NumberVector
*/
// TODO: extract superclass AbstractAttributeWiseNormalization
-public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> extends AbstractNormalization<V> {
+public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends AbstractNormalization<V> {
/**
* Parameter for minimum.
*/
- public static final OptionID MINIMA_ID = OptionID.getOrCreateOptionID("normalize.min", "a comma separated concatenation of the minimum values in each dimension that are mapped to 0. If no value is specified, the minimum value of the attribute range in this dimension will be taken.");
+ public static final OptionID MINIMA_ID = new OptionID("normalize.min", "a comma separated concatenation of the minimum values in each dimension that are mapped to 0. If no value is specified, the minimum value of the attribute range in this dimension will be taken.");
/**
* Parameter for maximum.
*/
- public static final OptionID MAXIMA_ID = OptionID.getOrCreateOptionID("normalize.max", "a comma separated concatenation of the maximum values in each dimension that are mapped to 1. If no value is specified, the maximum value of the attribute range in this dimension will be taken.");
+ public static final OptionID MAXIMA_ID = new OptionID("normalize.max", "a comma separated concatenation of the maximum values in each dimension that are mapped to 1. If no value is specified, the maximum value of the attribute range in this dimension will be taken.");
/**
* Stores the maximum in each dimension.
@@ -103,13 +103,13 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte
if(minima.length != featureVector.getDimensionality()) {
throw new IllegalArgumentException("FeatureVectors differ in length.");
}
- for(int d = 1; d <= featureVector.getDimensionality(); d++) {
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
final double val = featureVector.doubleValue(d);
- if(val > maxima[d - 1]) {
- maxima[d - 1] = val;
+ if(val > maxima[d]) {
+ maxima[d] = val;
}
- if(val < minima[d - 1]) {
- minima[d - 1] = val;
+ if(val < minima[d]) {
+ minima[d] = val;
}
}
}
@@ -120,20 +120,20 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte
if(minima.length != featureVector.getDimensionality()) {
throw new IllegalArgumentException("FeatureVectors and given Minima/Maxima differ in length.");
}
- for(int d = 1; d <= featureVector.getDimensionality(); d++) {
- values[d - 1] = (featureVector.doubleValue(d) - minima[d - 1]) / factor(d);
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ values[d] = (featureVector.doubleValue(d) - minima[d]) / factor(d);
}
- return featureVector.newNumberVector(values);
+ return factory.newNumberVector(values);
}
@Override
public V restore(V featureVector) throws NonNumericFeaturesException {
if(featureVector.getDimensionality() == maxima.length && featureVector.getDimensionality() == minima.length) {
double[] values = new double[featureVector.getDimensionality()];
- for(int d = 1; d <= featureVector.getDimensionality(); d++) {
- values[d - 1] = (featureVector.doubleValue(d) * (factor(d)) + minima[d - 1]);
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ values[d] = (featureVector.doubleValue(d) * (factor(d)) + minima[d]);
}
- return featureVector.newNumberVector(values);
+ return factory.newNumberVector(values);
}
else {
throw new NonNumericFeaturesException("Attributes cannot be resized: current dimensionality: " + featureVector.getDimensionality() + " former dimensionality: " + maxima.length);
@@ -151,7 +151,7 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte
* @return a factor for normalization in a certain dimension
*/
private double factor(int dimension) {
- return maxima[dimension - 1] != minima[dimension - 1] ? maxima[dimension - 1] - minima[dimension - 1] : maxima[dimension - 1] != 0 ? maxima[dimension - 1] : 1;
+ return maxima[dimension] > minima[dimension] ? maxima[dimension] - minima[dimension] : maxima[dimension] > 0 ? maxima[dimension] : 1;
}
@Override
@@ -161,13 +161,12 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte
int[] row = linearEquationSystem.getRowPermutations();
int[] col = linearEquationSystem.getColumnPermutations();
- // noinspection ForLoopReplaceableByForEach
for(int i = 0; i < coeff.length; i++) {
for(int r = 0; r < coeff.length; r++) {
double sum = 0.0;
for(int c = 0; c < coeff[0].length; c++) {
- sum += minima[c] * coeff[row[r]][col[c]] / factor(c + 1);
- coeff[row[r]][col[c]] = coeff[row[r]][col[c]] / factor(c + 1);
+ sum += minima[c] * coeff[row[r]][col[c]] / factor(c);
+ coeff[row[r]][col[c]] = coeff[row[r]][col[c]] / factor(c);
}
rhs[row[r]] = rhs[row[r]] + sum;
}
@@ -179,11 +178,11 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte
@Override
public String toString() {
- StringBuffer result = new StringBuffer();
+ StringBuilder result = new StringBuilder();
result.append("normalization class: ").append(getClass().getName());
- result.append("\n");
+ result.append('\n');
result.append("normalization minima: ").append(FormatUtil.format(minima));
- result.append("\n");
+ result.append('\n');
result.append("normalization maxima: ").append(FormatUtil.format(maxima));
return result.toString();
}
@@ -200,7 +199,7 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
/**
* Stores the maximum in each dimension.
*/
@@ -223,7 +222,7 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte
maxima = ArrayLikeUtil.toPrimitiveDoubleArray(maximaP.getValue());
}
- ArrayList<Parameter<?, ?>> global_1 = new ArrayList<Parameter<?, ?>>();
+ ArrayList<Parameter<?>> global_1 = new ArrayList<Parameter<?>>();
global_1.add(minimaP);
global_1.add(maximaP);
config.checkConstraint(new AllOrNoneMustBeSetGlobalConstraint(global_1));
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java
index 52a0499f..0671231d 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java
@@ -53,21 +53,21 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Parameter;
* @apiviz.uses NumberVector
*/
// TODO: extract superclass AbstractAttributeWiseNormalization
-public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> extends AbstractNormalization<V> {
+public class AttributeWiseVarianceNormalization<V extends NumberVector<?>> extends AbstractNormalization<V> {
/**
* Class logger.
*/
- public static final Logging logger = Logging.getLogger(AttributeWiseVarianceNormalization.class);
+ private static final Logging LOG = Logging.getLogger(AttributeWiseVarianceNormalization.class);
/**
* Parameter for means.
*/
- public static final OptionID MEAN_ID = OptionID.getOrCreateOptionID("normalize.mean", "a comma separated concatenation of the mean values in each dimension that are mapped to 0. If no value is specified, the mean value of the attribute range in this dimension will be taken.");
+ public static final OptionID MEAN_ID = new OptionID("normalize.mean", "a comma separated concatenation of the mean values in each dimension that are mapped to 0. If no value is specified, the mean value of the attribute range in this dimension will be taken.");
/**
* Parameter for stddevs.
*/
- public static final OptionID STDDEV_ID = OptionID.getOrCreateOptionID("normalize.stddev", "a comma separated concatenation of the standard deviations in each dimension that are scaled to 1. If no value is specified, the standard deviation of the attribute range in this dimension will be taken.");
+ public static final OptionID STDDEV_ID = new OptionID("normalize.stddev", "a comma separated concatenation of the standard deviations in each dimension that are scaled to 1. If no value is specified, the standard deviation of the attribute range in this dimension will be taken.");
/**
* Stores the mean in each dimension.
@@ -108,14 +108,14 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> ex
int dimensionality = featureVector.getDimensionality();
mvs = MeanVariance.newArray(dimensionality);
}
- for(int d = 1; d <= featureVector.getDimensionality(); d++) {
- mvs[d - 1].put(featureVector.doubleValue(d));
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ mvs[d].put(featureVector.doubleValue(d));
}
}
@Override
protected void prepareComplete() {
- StringBuffer buf = logger.isVerbose() ? new StringBuffer() : null;
+ StringBuilder buf = LOG.isVerbose() ? new StringBuilder() : null;
final int dimensionality = mvs.length;
mean = new double[dimensionality];
stddev = new double[dimensionality];
@@ -134,33 +134,40 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> ex
}
mvs = null;
if(buf != null) {
- logger.debugFine(buf.toString());
+ LOG.debugFine(buf.toString());
}
}
@Override
protected V filterSingleObject(V featureVector) {
double[] values = new double[featureVector.getDimensionality()];
- for(int d = 1; d <= featureVector.getDimensionality(); d++) {
- values[d - 1] = normalize(d - 1, featureVector.doubleValue(d));
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ values[d] = normalize(d, featureVector.doubleValue(d));
}
- return featureVector.newNumberVector(values);
+ return factory.newNumberVector(values);
}
@Override
public V restore(V featureVector) throws NonNumericFeaturesException {
if(featureVector.getDimensionality() == mean.length) {
double[] values = new double[featureVector.getDimensionality()];
- for(int d = 1; d <= featureVector.getDimensionality(); d++) {
- values[d - 1] = restore(d - 1, featureVector.doubleValue(d));
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ values[d] = restore(d, featureVector.doubleValue(d));
}
- return featureVector.newNumberVector(values);
+ return factory.newNumberVector(values);
}
else {
throw new NonNumericFeaturesException("Attributes cannot be resized: current dimensionality: " + featureVector.getDimensionality() + " former dimensionality: " + mean.length);
}
}
+ /**
+ * Normalize a single dimension.
+ *
+ * @param d Dimension
+ * @param val Value
+ * @return Normalized value
+ */
private double normalize(int d, double val) {
if(mean.length == 1) {
return (val - mean[0]) / stddev[0];
@@ -170,6 +177,12 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> ex
}
}
+ /**
+ * Restore a single dimension.
+ * @param d Dimension
+ * @param val Value
+ * @return Normalized value
+ */
private double restore(int d, double val) {
if(mean.length == 1) {
return (val * stddev[0]) + mean[0];
@@ -208,11 +221,11 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> ex
@Override
public String toString() {
- StringBuffer result = new StringBuffer();
+ StringBuilder result = new StringBuilder();
result.append("normalization class: ").append(getClass().getName());
- result.append("\n");
+ result.append('\n');
result.append("normalization means: ").append(FormatUtil.format(mean));
- result.append("\n");
+ result.append('\n');
result.append("normalization stddevs: ").append(FormatUtil.format(stddev));
return result.toString();
@@ -225,7 +238,7 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> ex
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
/**
* Stores the mean in each dimension.
*/
@@ -255,7 +268,7 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> ex
}
}
- ArrayList<Parameter<?, ?>> global_1 = new ArrayList<Parameter<?, ?>>();
+ ArrayList<Parameter<?>> global_1 = new ArrayList<Parameter<?>>();
global_1.add(meanP);
global_1.add(stddevP);
config.checkConstraint(new AllOrNoneMustBeSetGlobalConstraint(global_1));
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java
index 9350426b..24f3a850 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java
@@ -40,15 +40,17 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
* @author Erich Schubert
*
* @apiviz.uses SparseNumberVector
+ *
+ * @param <V> Vector type
*/
-public class InverseDocumentFrequencyNormalization<V extends SparseNumberVector<V, ?>> extends AbstractNormalization<V> {
+public class InverseDocumentFrequencyNormalization<V extends SparseNumberVector<?>> extends AbstractNormalization<V> {
/**
- * The IDF storage
+ * The IDF storage.
*/
TIntDoubleMap idf = new TIntDoubleHashMap();
/**
- * The number of objects in the dataset
+ * The number of objects in the dataset.
*/
int objcnt = 0;
@@ -73,13 +75,7 @@ public class InverseDocumentFrequencyNormalization<V extends SparseNumberVector<
BitSet b = featureVector.getNotNullMask();
for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
if(featureVector.doubleValue(i) >= 0.0) {
- Number c = idf.get(i);
- if(c == null) {
- idf.put(i, 1);
- }
- else {
- idf.put(i, c.intValue() + 1);
- }
+ idf.put(i, idf.get(i) + 1);
}
}
objcnt += 1;
@@ -103,7 +99,7 @@ public class InverseDocumentFrequencyNormalization<V extends SparseNumberVector<
for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
vals.put(i, (float) (featureVector.doubleValue(i) * idf.get(i)));
}
- return featureVector.newNumberVector(vals, featureVector.getDimensionality());
+ return ((SparseNumberVector.Factory<V, ?>) factory).newNumberVector(vals, featureVector.getDimensionality());
}
@Override
@@ -113,7 +109,7 @@ public class InverseDocumentFrequencyNormalization<V extends SparseNumberVector<
for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
vals.put(i, (float) (featureVector.doubleValue(i) / idf.get(i)));
}
- return featureVector.newNumberVector(vals, featureVector.getDimensionality());
+ return ((SparseNumberVector.Factory<V, ?>) factory).newNumberVector(vals, featureVector.getDimensionality());
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java
index 2edeebf9..457cc6eb 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java
@@ -42,14 +42,14 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @param <V> vector type
*/
-public class LengthNormalization<V extends NumberVector<V, ?>> extends AbstractStreamNormalization<V> {
+public class LengthNormalization<V extends NumberVector<?>> extends AbstractStreamNormalization<V> {
/**
- * Norm to use
+ * Norm to use.
*/
DoubleNorm<? super V> norm;
/**
- * Constructor
+ * Constructor.
*
* @param norm Norm to use
*/
@@ -61,11 +61,11 @@ public class LengthNormalization<V extends NumberVector<V, ?>> extends AbstractS
@Override
protected V filterSingleObject(V featureVector) {
final double d = norm.doubleNorm(featureVector);
- return featureVector.newNumberVector(featureVector.getColumnVector().timesEquals(1 / d).getArrayRef());
+ return factory.newNumberVector(featureVector.getColumnVector().timesEquals(1 / d).getArrayRef());
}
@Override
- public V restore(V featureVector) throws NonNumericFeaturesException {
+ public V restore(V featureVector) {
throw new UnsupportedOperationException();
}
@@ -87,14 +87,14 @@ public class LengthNormalization<V extends NumberVector<V, ?>> extends AbstractS
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
/**
- * Option ID for normalization norm
+ * Option ID for normalization norm.
*/
- public static final OptionID NORM_ID = OptionID.getOrCreateOptionID("normalization.norm", "Norm (length function) to use for computing the vector length.");
+ public static final OptionID NORM_ID = new OptionID("normalization.norm", "Norm (length function) to use for computing the vector length.");
/**
- * Norm to use
+ * Norm to use.
*/
DoubleNorm<? super V> norm;
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java
index be8c1166..519a3743 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java
@@ -65,10 +65,10 @@ public class RankTieNormalization implements ObjectFilter {
continue;
}
@SuppressWarnings("unchecked")
- final List<? extends NumberVector<?, ?>> castColumn = (List<? extends NumberVector<?, ?>>) column;
+ final List<? extends NumberVector<?>> castColumn = (List<? extends NumberVector<?>>) column;
// Get the replacement type information
- final int dim = ((VectorFieldTypeInformation<?>) type).dimensionality();
- final VectorFieldTypeInformation<IntegerVector> outType = new VectorFieldTypeInformation<IntegerVector>(IntegerVector.class, dim, IntegerVector.STATIC);
+ final int dim = ((VectorFieldTypeInformation<?>) type).getDimensionality();
+ final VectorFieldTypeInformation<IntegerVector> outType = new VectorFieldTypeInformation<IntegerVector>(IntegerVector.STATIC, dim);
// Output vectors
int[][] posvecs = new int[len][dim];
@@ -78,7 +78,7 @@ public class RankTieNormalization implements ObjectFilter {
for(int i = 0; i < sorter.length; i++) {
sorter[i] = new DoubleIntPair(Double.NaN, -1);
}
- for(int d = 1; d <= dim; d++) {
+ for(int d = 0; d < dim; d++) {
// fill array
for(int i = 0; i < sorter.length; i++) {
sorter[i].first = castColumn.get(i).doubleValue(d);
@@ -90,12 +90,12 @@ public class RankTieNormalization implements ObjectFilter {
for(int sta = 0; sta < sorter.length;) {
// Compute ties
int end = sta + 1;
- while(end < sorter.length && sorter[sta].first == sorter[end].first) {
+ while(end < sorter.length && !(sorter[sta].first < sorter[end].first)) {
end++;
}
final int pos = (sta + end - 1);
for(int i = sta; i < end; i++) {
- posvecs[sorter[i].second][d - 1] = pos;
+ posvecs[sorter[i].second][d] = pos;
}
sta = end;
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java
index 031cfb4c..5d203c6b 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java
@@ -39,8 +39,10 @@ import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
* Restore will only undo the IDF part of the normalization!
*
* @author Erich Schubert
+ *
+ * @param <V> Vector type
*/
-public class TFIDFNormalization<V extends SparseNumberVector<V, ?>> extends InverseDocumentFrequencyNormalization<V> {
+public class TFIDFNormalization<V extends SparseNumberVector<?>> extends InverseDocumentFrequencyNormalization<V> {
/**
* Constructor.
*/
@@ -62,6 +64,6 @@ public class TFIDFNormalization<V extends SparseNumberVector<V, ?>> extends Inve
for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
vals.put(i, (float) (featureVector.doubleValue(i) / sum * idf.get(i)));
}
- return featureVector.newNumberVector(vals, featureVector.getDimensionality());
+ return ((SparseNumberVector.Factory<V, ?>) factory).newNumberVector(vals, featureVector.getDimensionality());
}
} \ No newline at end of file