package de.lmu.ifi.dbs.elki.data; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2015 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import gnu.trove.iterator.TIntDoubleIterator; import gnu.trove.iterator.TIntFloatIterator; import gnu.trove.map.TIntDoubleMap; import gnu.trove.map.TIntFloatMap; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.io.ByteArrayUtil; import de.lmu.ifi.dbs.elki.utilities.io.ByteBufferSerializer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; /** * Sparse vector type, using {@code float[]} for storing the values, and * {@code int[]} for storing the indexes, approximately 8 bytes per non-zero * value. * * @author Arthur Zimek * @since 0.2 */ public class SparseFloatVector extends AbstractNumberVector implements SparseNumberVector { /** * Static instance. */ public static final SparseFloatVector.Factory FACTORY = new SparseFloatVector.Factory(); /** * Serializer using varint encoding. */ public static final ByteBufferSerializer VARIABLE_SERIALIZER = new VariableSerializer(); /** * Indexes of values. */ private final int[] indexes; /** * Stored values. */ private final float[] values; /** * The dimensionality of this feature vector. */ private int dimensionality; /** * Direct constructor. * * @param indexes Indexes Must be sorted! * @param values Associated value. * @param dimensionality "true" dimensionality */ public SparseFloatVector(int[] indexes, float[] values, int dimensionality) { super(); this.indexes = indexes; this.values = values; this.dimensionality = dimensionality; } /** * Create a SparseFloatVector consisting of double values according to the * specified mapping of indices and values. * * @param values the values to be set as values of the real vector * @param dimensionality the dimensionality of this feature vector * @throws IllegalArgumentException if the given dimensionality is too small * to cover the given values (i.e., the maximum index of any value not * zero is bigger than the given dimensionality) */ public SparseFloatVector(TIntFloatMap values, int dimensionality) throws IllegalArgumentException { if(values.size() > dimensionality) { throw new IllegalArgumentException("values.size() > dimensionality!"); } this.indexes = new int[values.size()]; this.values = new float[values.size()]; // Import and sort the indexes { TIntFloatIterator iter = values.iterator(); for(int i = 0; iter.hasNext(); i++) { iter.advance(); this.indexes[i] = iter.key(); } Arrays.sort(this.indexes); } // Import the values accordingly { for(int i = 0; i < values.size(); i++) { this.values[i] = values.get(this.indexes[i]); } } this.dimensionality = dimensionality; final int maxdim = getMaxDim(); if(maxdim > dimensionality) { throw new IllegalArgumentException("Given dimensionality " + dimensionality + " is too small w.r.t. the given values (occurring maximum: " + maxdim + ")."); } } /** * Get the maximum dimensionality. * * @return the maximum dimensionality seen */ private int getMaxDim() { if(this.indexes.length == 0) { return 0; } else { return this.indexes[this.indexes.length - 1]; } } /** * Create a SparseFloatVector consisting of double values according to the * specified mapping of indices and values. * * @param values the values to be set as values of the real vector * @throws IllegalArgumentException if the given dimensionality is too small * to cover the given values (i.e., the maximum index of any value not * zero is bigger than the given dimensionality) */ public SparseFloatVector(float[] values) throws IllegalArgumentException { this.dimensionality = values.length; // Count the number of non-zero entries int size = 0; { for(int i = 0; i < values.length; i++) { if(values[i] != 0.0f) { size++; } } } this.indexes = new int[size]; this.values = new float[size]; // Copy the values { int pos = 0; for(int i = 0; i < values.length; i++) { float value = values[i]; if(value != 0.0f) { this.indexes[pos] = i; this.values[pos] = value; pos++; } } } } @Override public int getDimensionality() { return dimensionality; } /** * Sets the dimensionality to the new value. * * * @param dimensionality the new dimensionality * @throws IllegalArgumentException if the given dimensionality is too small * to cover the given values (i.e., the maximum index of any value not * zero is bigger than the given dimensionality) */ @Override public void setDimensionality(int dimensionality) throws IllegalArgumentException { final int maxdim = getMaxDim(); if(maxdim > dimensionality) { throw new IllegalArgumentException("Given dimensionality " + dimensionality + " is too small w.r.t. the given values (occurring maximum: " + maxdim + ")."); } this.dimensionality = dimensionality; } @Override @Deprecated public Float getValue(int dimension) { int pos = Arrays.binarySearch(this.indexes, dimension); if(pos >= 0) { return values[pos]; } else { return 0.0f; } } @Override @Deprecated public double doubleValue(int dimension) { int pos = Arrays.binarySearch(this.indexes, dimension); if(pos >= 0) { return values[pos]; } else { return 0.0; } } @Override @Deprecated public long longValue(int dimension) { int pos = Arrays.binarySearch(this.indexes, dimension); if(pos >= 0) { return (long) values[pos]; } else { return 0; } } @Override public Vector getColumnVector() { return new Vector(getValues()); } /** * Create a String representation of this SparseFloatVector as suitable for * {@link de.lmu.ifi.dbs.elki.datasource.parser.SparseNumberVectorLabelParser} * . * * The returned String is a single line with entries separated by * {@link AbstractNumberVector#ATTRIBUTE_SEPARATOR}. The first entry gives the * number of values actually not zero. Following entries are pairs of Integer * and Float where the Integer gives the index of the dimensionality and the * Float gives the corresponding value. * * Example: a vector (0,1.2,1.3,0)T would result in the String
* 2 2 1.2 3 1.3
* * @return a String representation of this SparseFloatVector */ @Override public String toString() { StringBuilder featureLine = new StringBuilder(); featureLine.append(this.indexes.length); for(int i = 0; i < this.indexes.length; i++) { featureLine.append(ATTRIBUTE_SEPARATOR); featureLine.append(this.indexes[i]); featureLine.append(ATTRIBUTE_SEPARATOR); featureLine.append(this.values[i]); } return featureLine.toString(); } /** * Returns an array consisting of the values of this feature vector. * * @return an array consisting of the values of this feature vector */ private double[] getValues() { double[] vals = new double[dimensionality]; for(int i = 0; i < indexes.length; i++) { vals[this.indexes[i]] = this.values[i]; } return vals; } @Override public int iter() { return 0; } @Override public int iterDim(int iter) { return indexes[iter]; } @Override public int iterAdvance(int iter) { return iter + 1; } @Override public boolean iterValid(int iter) { return iter < indexes.length; } @Override public double iterDoubleValue(int iter) { return (double) values[iter]; } @Override public float iterFloatValue(int iter) { return values[iter]; } @Override public int iterIntValue(int iter) { return (int) values[iter]; } @Override public short iterShortValue(int iter) { return (short) values[iter]; } @Override public long iterLongValue(int iter) { return (long) values[iter]; } @Override public byte iterByteValue(int iter) { return (byte) values[iter]; } /** * Factory class. * * @author Erich Schubert * * @apiviz.has SparseFloatVector */ public static class Factory extends AbstractNumberVector.Factory implements SparseNumberVector.Factory { @Override public SparseFloatVector newFeatureVector(A array, ArrayAdapter adapter) { int dim = adapter.size(array); float[] values = new float[dim]; for(int i = 0; i < dim; i++) { values[i] = adapter.get(array, i).floatValue(); } // TODO: inefficient return new SparseFloatVector(values); } @Override public SparseFloatVector newNumberVector(A array, NumberArrayAdapter adapter) { int dim = adapter.size(array); float[] values = new float[dim]; for(int i = 0; i < dim; i++) { values[i] = adapter.getFloat(array, i); } // TODO: inefficient return new SparseFloatVector(values); } @Override public SparseFloatVector newNumberVector(TIntDoubleMap dvalues, int maxdim) { int[] indexes = new int[dvalues.size()]; float[] values = new float[dvalues.size()]; // Import and sort the indexes TIntDoubleIterator iter = dvalues.iterator(); for(int i = 0; iter.hasNext(); i++) { iter.advance(); indexes[i] = iter.key(); } Arrays.sort(indexes); // Import the values accordingly for(int i = 0; i < dvalues.size(); i++) { values[i] = (float) dvalues.get(indexes[i]); } return new SparseFloatVector(indexes, values, maxdim); } @Override public ByteBufferSerializer getDefaultSerializer() { return VARIABLE_SERIALIZER; } @Override public Class getRestrictionClass() { return SparseFloatVector.class; } /** * Parameterization class. * * @author Erich Schubert * * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { @Override protected SparseFloatVector.Factory makeInstance() { return FACTORY; } } } /** * Serialization class using VarInt encodings. * * @author Erich Schubert * * @apiviz.uses SparseFloatVector - - «serializes» */ public static class VariableSerializer implements ByteBufferSerializer { @Override public SparseFloatVector fromByteBuffer(ByteBuffer buffer) throws IOException { final int dimensionality = ByteArrayUtil.readUnsignedVarint(buffer); final int nonzero = ByteArrayUtil.readUnsignedVarint(buffer); final int[] dims = new int[nonzero]; final float[] values = new float[nonzero]; for(int i = 0; i < nonzero; i++) { dims[i] = ByteArrayUtil.readUnsignedVarint(buffer); values[i] = buffer.getFloat(); } return new SparseFloatVector(dims, values, dimensionality); } @Override public void toByteBuffer(ByteBuffer buffer, SparseFloatVector vec) throws IOException { ByteArrayUtil.writeUnsignedVarint(buffer, vec.dimensionality); ByteArrayUtil.writeUnsignedVarint(buffer, vec.values.length); for(int i = 0; i < vec.values.length; i++) { ByteArrayUtil.writeUnsignedVarint(buffer, vec.indexes[i]); buffer.putFloat(vec.values[i]); } } @Override public int getByteSize(SparseFloatVector vec) { int sum = 0; sum += ByteArrayUtil.getUnsignedVarintSize(vec.dimensionality); sum += ByteArrayUtil.getUnsignedVarintSize(vec.values.length); for(int d : vec.indexes) { sum += ByteArrayUtil.getUnsignedVarintSize(d); } sum += vec.values.length * ByteArrayUtil.SIZE_FLOAT; return sum; } } }