summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/database/relation/RelationUtil.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/database/relation/RelationUtil.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/relation/RelationUtil.java244
1 files changed, 232 insertions, 12 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/database/relation/RelationUtil.java b/src/de/lmu/ifi/dbs/elki/database/relation/RelationUtil.java
index 662767b7..925af2c2 100644
--- a/src/de/lmu/ifi/dbs/elki/database/relation/RelationUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/database/relation/RelationUtil.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.database.relation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,11 +23,19 @@ package de.lmu.ifi.dbs.elki.database.relation;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.AbstractCollection;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.Iterator;
+
import de.lmu.ifi.dbs.elki.data.FeatureVector;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
/**
* Utility functions for handling database relation.
@@ -35,6 +43,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
* @author Erich Schubert
*
* @apiviz.uses Relation oneway
+ * @apiviz.has CollectionFromRelation
+ * @apiviz.has RelationObjectIterator
*/
public final class RelationUtil {
/**
@@ -54,7 +64,8 @@ public final class RelationUtil {
public static <V extends FeatureVector<?>> VectorFieldTypeInformation<V> assumeVectorField(Relation<V> relation) {
try {
return ((VectorFieldTypeInformation<V>) relation.getDataTypeInformation());
- } catch (Exception e) {
+ }
+ catch(Exception e) {
throw new UnsupportedOperationException("Expected a vector field, got type information: " + relation.getDataTypeInformation().toString(), e);
}
}
@@ -64,13 +75,12 @@ public final class RelationUtil {
*
* @param relation relation
* @param <V> Vector type
- * @param <N> Number type
* @return Vector field type information
*/
- public static <V extends NumberVector<? extends N>, N extends Number> NumberVector.Factory<V, N> getNumberVectorFactory(Relation<V> relation) {
+ public static <V extends NumberVector> NumberVector.Factory<V> getNumberVectorFactory(Relation<V> relation) {
final VectorFieldTypeInformation<V> type = assumeVectorField(relation);
@SuppressWarnings("unchecked")
- final NumberVector.Factory<V, N> factory = (NumberVector.Factory<V, N>) type.getFactory();
+ final NumberVector.Factory<V> factory = (NumberVector.Factory<V>) type.getFactory();
return factory;
}
@@ -83,12 +93,61 @@ public final class RelationUtil {
public static int dimensionality(Relation<? extends FeatureVector<?>> relation) {
try {
return ((VectorFieldTypeInformation<? extends FeatureVector<?>>) relation.getDataTypeInformation()).getDimensionality();
- } catch (Exception e) {
+ }
+ catch(Exception e) {
return -1;
}
}
/**
+ * Determines the minimum and maximum values in each dimension of all objects
+ * stored in the given database.
+ *
+ * @param relation the database storing the objects
+ * @return Minimum and Maximum vector for the hyperrectangle
+ */
+ public static double[][] computeMinMax(Relation<? extends NumberVector> relation) {
+ int dim = RelationUtil.dimensionality(relation);
+ double[] mins = new double[dim], maxs = new double[dim];
+ for(int i = 0; i < dim; i++) {
+ mins[i] = Double.MAX_VALUE;
+ maxs[i] = -Double.MAX_VALUE;
+ }
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ final NumberVector o = relation.get(iditer);
+ for(int d = 0; d < dim; d++) {
+ final double v = o.doubleValue(d);
+ mins[d] = (v < mins[d]) ? v : mins[d];
+ maxs[d] = (v > maxs[d]) ? v : maxs[d];
+ }
+ }
+ return new double[][] { mins, maxs };
+ }
+
+ /**
+ * Determines the variances in each dimension of the specified objects stored
+ * in the given database.
+ *
+ * @param database the database storing the objects
+ * @param ids the ids of the objects
+ * @param centroid the centroid or reference vector of the ids
+ * @return the variances in each dimension of the specified objects
+ */
+ public static double[] variances(Relation<? extends NumberVector> database, NumberVector centroid, DBIDs ids) {
+ final int size = ids.size();
+ double[] variances = new double[centroid.getDimensionality()];
+
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ NumberVector o = database.get(iter);
+ for(int d = 0; d < centroid.getDimensionality(); d++) {
+ final double diff = o.doubleValue(d) - centroid.doubleValue(d);
+ variances[d] += diff * diff / size;
+ }
+ }
+ return variances;
+ }
+
+ /**
* <em>Copy</em> a relation into a double matrix.
*
* This is <em>not recommended</em> unless you need to modify the data
@@ -98,15 +157,15 @@ public final class RelationUtil {
* @param ids IDs, with well-defined order (i.e. array)
* @return Data matrix
*/
- public static double[][] relationAsMatrix(final Relation<? extends NumberVector<?>> relation, ArrayDBIDs ids) {
+ public static double[][] relationAsMatrix(final Relation<? extends NumberVector> relation, ArrayDBIDs ids) {
final int rowdim = ids.size();
final int coldim = dimensionality(relation);
double[][] mat = new double[rowdim][coldim];
int r = 0;
- for (DBIDArrayIter iter = ids.iter(); iter.valid(); iter.advance(), r++) {
- NumberVector<?> vec = relation.get(iter);
+ for(DBIDArrayIter iter = ids.iter(); iter.valid(); iter.advance(), r++) {
+ NumberVector vec = relation.get(iter);
double[] row = mat[r];
- for (int c = 0; c < coldim; c++) {
+ for(int c = 0; c < coldim; c++) {
row[c] = vec.doubleValue(c);
}
}
@@ -124,10 +183,171 @@ public final class RelationUtil {
*/
public static <V extends FeatureVector<?>> String getColumnLabel(Relation<? extends V> rel, int col) {
String lbl = assumeVectorField(rel).getLabel(col);
- if (lbl != null) {
+ if(lbl != null) {
return lbl;
- } else {
+ }
+ else {
return "Column " + col;
}
}
+
+ /**
+ * An ugly vector type cast unavoidable in some situations due to Generics.
+ *
+ * @param <V> Base vector type
+ * @param <T> Derived vector type (is actually V, too)
+ * @param database Database
+ * @return Database
+ */
+ @SuppressWarnings("unchecked")
+ public static <V extends NumberVector, T extends NumberVector> Relation<V> relationUglyVectorCast(Relation<T> database) {
+ return (Relation<V>) database;
+ }
+
+ /**
+ * Iterator class that retrieves the given objects from the database.
+ *
+ * @author Erich Schubert
+ */
+ public static class RelationObjectIterator<O> implements Iterator<O> {
+ /**
+ * The real iterator.
+ */
+ final DBIDIter iter;
+
+ /**
+ * The database we use.
+ */
+ final Relation<? extends O> database;
+
+ /**
+ * Full Constructor.
+ *
+ * @param iter Original iterator.
+ * @param database Database
+ */
+ public RelationObjectIterator(DBIDIter iter, Relation<? extends O> database) {
+ super();
+ this.iter = iter;
+ this.database = database;
+ }
+
+ /**
+ * Simplified constructor.
+ *
+ * @param database Database
+ */
+ public RelationObjectIterator(Relation<? extends O> database) {
+ super();
+ this.database = database;
+ this.iter = database.iterDBIDs();
+ }
+
+ @Override
+ public boolean hasNext() {
+ return iter.valid();
+ }
+
+ @Override
+ public O next() {
+ O ret = database.get(iter);
+ iter.advance();
+ return ret;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ /**
+ * Collection view on a database that retrieves the objects when needed.
+ *
+ * @author Erich Schubert
+ */
+ public static class CollectionFromRelation<O> extends AbstractCollection<O> implements Collection<O> {
+ /**
+ * The database we query.
+ */
+ Relation<? extends O> db;
+
+ /**
+ * Constructor.
+ *
+ * @param db Database
+ */
+ public CollectionFromRelation(Relation<? extends O> db) {
+ super();
+ this.db = db;
+ }
+
+ @Override
+ public Iterator<O> iterator() {
+ return new RelationObjectIterator<>(db);
+ }
+
+ @Override
+ public int size() {
+ return db.size();
+ }
+ }
+
+ /**
+ * Sort objects by a double relation
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class AscendingByDoubleRelation implements Comparator<DBIDRef> {
+ /**
+ * Scores to use for sorting.
+ */
+ private final DoubleRelation scores;
+
+ /**
+ * Constructor.
+ *
+ * @param scores Scores for sorting
+ */
+ public AscendingByDoubleRelation(DoubleRelation scores) {
+ super();
+ this.scores = scores;
+ }
+
+ @Override
+ public int compare(DBIDRef id1, DBIDRef id2) {
+ return Double.compare(scores.doubleValue(id1), scores.doubleValue(id2));
+ }
+ }
+
+ /**
+ * Sort objects by a double relation
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class DescendingByDoubleRelation implements Comparator<DBIDRef> {
+ /**
+ * Scores to use for sorting.
+ */
+ private final DoubleRelation scores;
+
+ /**
+ * Constructor.
+ *
+ * @param scores Scores for sorting
+ */
+ public DescendingByDoubleRelation(DoubleRelation scores) {
+ super();
+ this.scores = scores;
+ }
+
+ @Override
+ public int compare(DBIDRef id1, DBIDRef id2) {
+ return Double.compare(scores.doubleValue(id2), scores.doubleValue(id1));
+ }
+ }
}