summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/data/BitVector.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/data/BitVector.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/BitVector.java309
1 files changed, 226 insertions, 83 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/data/BitVector.java b/src/de/lmu/ifi/dbs/elki/data/BitVector.java
index de0edc2d..0ec4a081 100644
--- a/src/de/lmu/ifi/dbs/elki/data/BitVector.java
+++ b/src/de/lmu/ifi/dbs/elki/data/BitVector.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.data;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,25 +23,28 @@ package de.lmu.ifi.dbs.elki.data;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import gnu.trove.iterator.TIntDoubleIterator;
+import gnu.trove.map.TIntDoubleMap;
+
import java.io.IOException;
import java.nio.ByteBuffer;
-import java.util.BitSet;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
-import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
-import de.lmu.ifi.dbs.elki.persistent.ByteBufferSerializer;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter;
+import de.lmu.ifi.dbs.elki.utilities.io.ByteArrayUtil;
+import de.lmu.ifi.dbs.elki.utilities.io.ByteBufferSerializer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
/**
- * Provides a BitVector wrapping a BitSet.
+ * Vector using a dense bit set encoding, based on {@code long[]} storage.
*
* @author Arthur Zimek
*
* @apiviz.composedOf Bit
*/
-public class BitVector extends AbstractNumberVector<Bit> {
+public class BitVector extends AbstractNumberVector implements SparseNumberVector {
/**
* Static instance.
*/
@@ -55,71 +58,109 @@ public class BitVector extends AbstractNumberVector<Bit> {
/**
* Storing the bits.
*/
- private final BitSet bits;
+ private final long[] bits;
/**
* Dimensionality of this bit vector.
*/
- private final int dimensionality;
+ private int dimensionality;
/**
- * Provides a new BitVector corresponding to the specified bits and of the
+ * Create a new BitVector corresponding to the specified bits and of the
* specified dimensionality.
*
- * @param bits the bits to be set in this BitVector
+ * @param bits the bits to be set in this BitVector.
* @param dimensionality the dimensionality of this BitVector
- * @throws IllegalArgumentException if the specified dimensionality is to
- * small to match the given BitSet
*/
- public BitVector(BitSet bits, int dimensionality) throws IllegalArgumentException {
- if (dimensionality < bits.length()) {
- throw new IllegalArgumentException("Specified dimensionality " + dimensionality + " is to low for specified BitSet of length " + bits.length());
- }
+ public BitVector(long[] bits, int dimensionality) {
this.bits = bits;
this.dimensionality = dimensionality;
}
- /**
- * Provides a new BitVector corresponding to the bits in the given array.
- *
- * @param bits an array of bits specifying the bits in this bit vector
- */
- public BitVector(Bit[] bits) {
- this.bits = new BitSet(bits.length);
- for (int i = 0; i < bits.length; i++) {
- this.bits.set(i, bits[i].bitValue());
- }
- this.dimensionality = bits.length;
- }
-
@Override
public int getDimensionality() {
return dimensionality;
}
@Override
+ public void setDimensionality(int dimensionality) {
+ this.dimensionality = dimensionality;
+ }
+
+ /**
+ * Get the value of a single bit.
+ *
+ * @param dimension Bit number to get
+ * @return {@code true} when set
+ */
+ public boolean booleanValue(int dimension) {
+ return BitsUtil.get(bits, dimension);
+ }
+
+ @Override
@Deprecated
public Bit getValue(int dimension) {
- if (dimension < 1 || dimension > dimensionality) {
- throw new IllegalArgumentException("illegal dimension: " + dimension);
- }
- return new Bit(bits.get(dimension - 1));
+ return new Bit(booleanValue(dimension));
}
@Override
public double doubleValue(int dimension) {
- if (dimension < 0 || dimension >= dimensionality) {
- throw new IllegalArgumentException("illegal dimension: " + dimension);
- }
- return bits.get(dimension) ? 1.0 : 0.0;
+ return BitsUtil.get(bits, dimension) ? 1. : 0.;
}
@Override
public long longValue(int dimension) {
- if (dimension < 0 || dimension >= dimensionality) {
- throw new IllegalArgumentException("illegal dimension: " + dimension);
- }
- return bits.get(dimension) ? 1 : 0;
+ return BitsUtil.get(bits, dimension) ? 1L : 0L;
+ }
+
+ @Override
+ public int iter() {
+ return BitsUtil.nextSetBit(bits, 0);
+ }
+
+ @Override
+ public int iterAdvance(int iter) {
+ return BitsUtil.nextSetBit(bits, iter + 1);
+ }
+
+ @Override
+ public boolean iterValid(int iter) {
+ return iter >= 0;
+ }
+
+ @Override
+ public int iterDim(int iter) {
+ return iter; // Identity
+ }
+
+ @Override
+ public double iterDoubleValue(int iter) {
+ return 1.; // When properly used: always true!
+ }
+
+ @Override
+ public float iterFloatValue(int iter) {
+ return 1.f; // When properly used: always true!
+ }
+
+ @Override
+ public int iterIntValue(int iter) {
+ return 1; // When properly used: always true!
+ }
+
+ @Override
+ public short iterShortValue(int iter) {
+ return 1; // When properly used: always true!
+ }
+
+ @Override
+ public long iterLongValue(int iter) {
+ return 1L; // When properly used: always true!
+ }
+
+ @Override
+ public byte iterByteValue(int iter) {
+ return 1; // When properly used: always true!
}
/**
@@ -136,8 +177,8 @@ public class BitVector extends AbstractNumberVector<Bit> {
@Override
public Vector getColumnVector() {
double[] values = new double[dimensionality];
- for (int i = 0; i < dimensionality; i++) {
- values[i] = bits.get(i) ? 1 : 0;
+ for(int i = 0; i < dimensionality; i++) {
+ values[i] = BitsUtil.get(bits, i) ? 1 : 0;
}
return new Vector(values);
}
@@ -150,13 +191,17 @@ public class BitVector extends AbstractNumberVector<Bit> {
* @return true if this BitVector contains all bits that are set to true in
* the specified BitSet, false otherwise
*/
- public boolean contains(BitSet bitset) {
- boolean contains = true;
- for (int i = bitset.nextSetBit(0); i >= 0 && contains; i = bitset.nextSetBit(i + 1)) {
- // noinspection ConstantConditions
- contains &= bits.get(i);
+ public boolean contains(long[] bitset) {
+ for(int i = 0; i < bitset.length; i++) {
+ final long b = bitset[i];
+ if(i >= bits.length && b != 0L) {
+ return false;
+ }
+ if((b & bits[i]) != b) {
+ return false;
+ }
}
- return contains;
+ return true;
}
/**
@@ -164,8 +209,94 @@ public class BitVector extends AbstractNumberVector<Bit> {
*
* @return a copy of the bits currently set in this BitVector
*/
- public BitSet getBits() {
- return (BitSet) bits.clone();
+ public long[] cloneBits() {
+ return bits.clone();
+ }
+
+ /**
+ * Compute the vector cardinality (uncached!)
+ *
+ * @return Vector cardinality
+ */
+ public int cardinality() {
+ return BitsUtil.cardinality(bits);
+ }
+
+ /**
+ * Compute the Jaccard similarity of two bit vectors.
+ *
+ * @param v2 Second bit vector
+ * @return Jaccard similarity (intersection / union)
+ */
+ public double jaccardSimilarity(BitVector v2) {
+ return BitsUtil.intersectionSize(bits, v2.bits) / (double) BitsUtil.unionSize(bits, v2.bits);
+ }
+
+ /**
+ * Compute the Hamming distance of two bit vectors.
+ *
+ * @param v2 Second bit vector
+ * @return Hamming distance (number of bits difference)
+ */
+ public int hammingDistance(BitVector v2) {
+ return BitsUtil.hammingDistance(bits, v2.bits);
+ }
+
+ /**
+ * Compute the vector intersection size.
+ *
+ * @param v2 Second bit vector
+ * @return Intersection size (number of bits in both)
+ */
+ public int intersectionSize(BitVector v2) {
+ return BitsUtil.intersectionSize(bits, v2.bits);
+ }
+
+ /**
+ * Compute the vector union size.
+ *
+ * @param v2 Second bit vector
+ * @return Intersection size (number of bits in both)
+ */
+ public int unionSize(BitVector v2) {
+ return BitsUtil.unionSize(bits, v2.bits);
+ }
+
+ /**
+ * Compute whether two vectors intersect.
+ *
+ * @param v2 Second bit vector
+ * @return {@code true} if they intersect in at least one bit.
+ */
+ public boolean intersect(BitVector v2) {
+ return BitsUtil.intersect(bits, v2.bits);
+ }
+
+ /**
+ * Combine onto v using the AND operation, i.e. {@code v &= this}.
+ *
+ * @param v Existing bit set of same length.
+ */
+ public void andOnto(long[] v) {
+ BitsUtil.andI(v, bits);
+ }
+
+ /**
+ * Combine onto v using the OR operation, i.e. {@code v |= this}.
+ *
+ * @param v Existing bit set of same length.
+ */
+ public void orOnto(long[] v) {
+ BitsUtil.orI(v, bits);
+ }
+
+ /**
+ * Combine onto v using the XOR operation, i.e. {@code v ^= this}.
+ *
+ * @param v Existing bit set of same length.
+ */
+ public void xorOnto(long[] v) {
+ BitsUtil.xorI(v, bits);
}
/**
@@ -178,16 +309,12 @@ public class BitVector extends AbstractNumberVector<Bit> {
*/
@Override
public String toString() {
- Bit[] bitArray = new Bit[dimensionality];
- for (int i = 0; i < dimensionality; i++) {
- bitArray[i] = bits.get(i) ? Bit.TRUE : Bit.FALSE;
- }
StringBuilder representation = new StringBuilder();
- for (Bit bit : bitArray) {
- if (representation.length() > 0) {
+ for(int i = 0; i < dimensionality; i++) {
+ if(i > 0) {
representation.append(ATTRIBUTE_SEPARATOR);
}
- representation.append(bit.toString());
+ representation.append(BitsUtil.get(bits, i) ? '1' : '0');
}
return representation.toString();
}
@@ -201,11 +328,12 @@ public class BitVector extends AbstractNumberVector<Bit> {
*/
@Override
public boolean equals(Object obj) {
- if (obj instanceof BitVector) {
+ if(obj instanceof BitVector) {
BitVector bv = (BitVector) obj;
return this.getDimensionality() == bv.getDimensionality() && this.bits.equals(bv.bits);
- } else {
+ }
+ else {
return false;
}
}
@@ -217,14 +345,15 @@ public class BitVector extends AbstractNumberVector<Bit> {
*
* @apiviz.has BitVector
*/
- public static class Factory extends AbstractNumberVector.Factory<BitVector, Bit> {
+ public static class Factory extends AbstractNumberVector.Factory<BitVector> implements SparseNumberVector.Factory<BitVector> {
@Override
- public <A> BitVector newFeatureVector(A array, ArrayAdapter<Bit, A> adapter) {
+ public <A> BitVector newFeatureVector(A array, ArrayAdapter<? extends Number, A> adapter) {
int dim = adapter.size(array);
- BitSet bits = new BitSet(dim);
- for (int i = 0; i < dim; i++) {
- bits.set(i, adapter.get(array, i).bitValue());
- i++;
+ long[] bits = BitsUtil.zero(dim);
+ for(int i = 0; i < dim; i++) {
+ if(adapter.get(array, i).doubleValue() >= 0.5) {
+ BitsUtil.setI(bits, i);
+ }
}
return new BitVector(bits, dim);
}
@@ -232,20 +361,33 @@ public class BitVector extends AbstractNumberVector<Bit> {
@Override
public <A> BitVector newNumberVector(A array, NumberArrayAdapter<?, ? super A> adapter) {
int dim = adapter.size(array);
- BitSet bits = new BitSet(dim);
- for (int i = 0; i < dim; i++) {
- if (adapter.getDouble(array, i) >= 0.5) {
- bits.set(i);
+ long[] bits = BitsUtil.zero(dim);
+ for(int i = 0; i < dim; i++) {
+ if(adapter.getDouble(array, i) >= 0.5) {
+ BitsUtil.setI(bits, i);
}
}
return new BitVector(bits, dim);
}
@Override
+ public BitVector newNumberVector(TIntDoubleMap values, int maxdim) {
+ long[] bits = BitsUtil.zero(maxdim);
+ // Import and sort the indexes
+ for(TIntDoubleIterator iter = values.iterator(); iter.hasNext();) {
+ iter.advance();
+ if(iter.value() != 0.) {
+ BitsUtil.setI(bits, iter.key());
+ }
+ }
+ return new BitVector(bits, maxdim);
+ }
+
+ @Override
public ByteBufferSerializer<BitVector> getDefaultSerializer() {
return SHORT_SERIALIZER;
}
-
+
@Override
public Class<? super BitVector> getRestrictionClass() {
return BitVector.class;
@@ -280,23 +422,23 @@ public class BitVector extends AbstractNumberVector<Bit> {
public BitVector fromByteBuffer(ByteBuffer buffer) throws IOException {
short dimensionality = buffer.getShort();
final int len = ByteArrayUtil.SIZE_SHORT + (dimensionality + 7) / 8;
- if (buffer.remaining() < len) {
+ if(buffer.remaining() < len) {
throw new IOException("Not enough data for a bit vector!");
}
// read values
- BitSet values = new BitSet(dimensionality);
+ long[] bits = BitsUtil.zero(dimensionality);
byte b = 0;
- for (int i = 0; i < dimensionality; i++) {
+ for(int i = 0; i < dimensionality; i++) {
// read the next byte when needed.
- if ((i & 7) == 0) {
+ if((i & 7) == 0) {
b = buffer.get();
}
final byte bit = (byte) (1 << (i & 7));
- if ((b & bit) != 0) {
- values.set(i + 1);
+ if((b & bit) != 0) {
+ BitsUtil.setI(bits, i);
}
}
- return new BitVector(values, dimensionality);
+ return new BitVector(bits, dimensionality);
}
@Override
@@ -304,7 +446,7 @@ public class BitVector extends AbstractNumberVector<Bit> {
final int len = getByteSize(vec);
assert (vec.getDimensionality() <= Short.MAX_VALUE);
final short dim = (short) vec.getDimensionality();
- if (buffer.remaining() < len) {
+ if(buffer.remaining() < len) {
throw new IOException("Not enough space for the bit vector!");
}
// write size
@@ -312,15 +454,16 @@ public class BitVector extends AbstractNumberVector<Bit> {
// write values
// Next byte to write:
byte b = 0;
- for (int i = 0; i < dim; i++) {
+ for(int i = 0; i < dim; i++) {
final byte mask = (byte) (1 << (i & 7));
- if (vec.bits.get(i)) {
+ if(BitsUtil.get(vec.bits, i)) {
b |= mask;
- } else {
+ }
+ else {
b &= ~mask;
}
// Write when appropriate
- if ((i & 7) == 7 || i == dim - 1) {
+ if((i & 7) == 7 || i == dim - 1) {
buffer.put(b);
b = 0;
}