summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java119
1 files changed, 45 insertions, 74 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java
index 994d854d..f39e1fdc 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.external;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,63 +23,43 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.external;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import gnu.trove.map.TLongFloatMap;
+import gnu.trove.map.hash.TLongFloatHashMap;
+
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.util.Map;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractDBIDDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.FloatDistance;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractDBIDRangeDistanceFunction;
import de.lmu.ifi.dbs.elki.utilities.FileUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
- * Provides a DistanceFunction that is based on float distances given by a
- * distance matrix of an external file.
+ * Distance function that is based on float distances given by a
+ * distance matrix of an external ascii file.
+ *
+ * See {@link AsciiDistanceParser} for the default input format.
*
- * See {@link NumberDistanceParser} for the default input format.
+ * TODO: use a {@code float[]} instead of the hash map.
*
* @author Elke Achtert
+ * @author Erich Schubert
*/
@Title("File based float distance for database objects.")
@Description("Loads float distance values from an external text file.")
-public class FileBasedFloatDistanceFunction extends AbstractDBIDDistanceFunction<FloatDistance> {
- /**
- * Parameter that specifies the name of the distance matrix file.
- * <p>
- * Key: {@code -distance.matrix}
- * </p>
- */
- public static final OptionID MATRIX_ID = new OptionID("distance.matrix", "The name of the file containing the distance matrix.");
-
- /**
- * Optional parameter to specify the parsers to provide a database, must
- * extend {@link DistanceParser}. If this parameter is not set,
- * {@link NumberDistanceParser} is used as parser for all input files.
- * <p>
- * Key: {@code -distance.parser}
- * </p>
- */
- public static final OptionID PARSER_ID = new OptionID("distance.parser", "Parser used to load the distance matrix.");
-
+public class FileBasedFloatDistanceFunction extends AbstractDBIDRangeDistanceFunction {
/**
* The distance cache
*/
- private Map<DBIDPair, FloatDistance> cache;
+ private TLongFloatMap cache;
/**
* Constructor.
@@ -87,7 +67,7 @@ public class FileBasedFloatDistanceFunction extends AbstractDBIDDistanceFunction
* @param parser Parser
* @param matrixfile input file
*/
- public FileBasedFloatDistanceFunction(DistanceParser<FloatDistance> parser, File matrixfile) {
+ public FileBasedFloatDistanceFunction(DistanceParser parser, File matrixfile) {
super();
try {
loadCache(parser, matrixfile);
@@ -97,44 +77,38 @@ public class FileBasedFloatDistanceFunction extends AbstractDBIDDistanceFunction
}
}
- /**
- * Returns the distance between the two objects specified by their objects
- * ids. If a cache is used, the distance value is looked up in the cache. If
- * the distance does not yet exists in cache, it will be computed an put to
- * cache. If no cache is used, the distance is computed.
- *
- * @param id1 first object id
- * @param id2 second object id
- * @return the distance between the two objects specified by their objects ids
- */
@Override
- public FloatDistance distance(DBIDRef id1, DBIDRef id2) {
- if(id1 == null) {
- return getDistanceFactory().undefinedDistance();
- }
- if(id2 == null) {
- return getDistanceFactory().undefinedDistance();
- }
- // the smaller id is the first key
- if(DBIDUtil.compare(id1, id2) > 0) {
- return distance(id2, id1);
- }
- FloatDistance ret = cache.get(DBIDUtil.newPair(id1, id2));
- if (ret == null && DBIDUtil.equal(id1, id2)) {
- return FloatDistance.ZERO_DISTANCE;
- }
- return ret;
+ public double distance(int i1, int i2) {
+ return (i1 == i2) ? 0. : cache.get(makeKey(i1, i2));
}
- private void loadCache(DistanceParser<FloatDistance> parser, File matrixfile) throws IOException {
+ private void loadCache(DistanceParser parser, File matrixfile) throws IOException {
InputStream in = new BufferedInputStream(FileUtil.tryGzipInput(new FileInputStream(matrixfile)));
- DistanceParsingResult<FloatDistance> res = parser.parse(in);
- cache = res.getDistanceCache();
+ cache = new TLongFloatHashMap();
+ parser.parse(in, new DistanceCacheWriter() {
+ @Override
+ public void put(int id1, int id2, double distance) {
+ cache.put(makeKey(id1, id2), (float) distance);
+ }
+
+ @Override
+ public boolean containsKey(int id1, int id2) {
+ return cache.containsKey(makeKey(id1, id2));
+ }
+ });
}
- @Override
- public FloatDistance getDistanceFactory() {
- return FloatDistance.FACTORY;
+ /**
+ * Combine two integer ids into a long value.
+ *
+ * @param i1 First id
+ * @param i2 Second id
+ * @return Combined value
+ */
+ protected static final long makeKey(int i1, int i2) {
+ return (i1 < i2) //
+ ? ((((long) i1) << 32) | i2)//
+ : ((((long) i2) << 32) | i1);
}
@Override
@@ -159,22 +133,19 @@ public class FileBasedFloatDistanceFunction extends AbstractDBIDDistanceFunction
public static class Parameterizer extends AbstractParameterizer {
protected File matrixfile = null;
- protected DistanceParser<FloatDistance> parser = null;
+ protected DistanceParser parser = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final FileParameter MATRIX_PARAM = new FileParameter(MATRIX_ID, FileParameter.FileType.INPUT_FILE);
+ final FileParameter MATRIX_PARAM = new FileParameter(FileBasedDoubleDistanceFunction.Parameterizer.MATRIX_ID, FileParameter.FileType.INPUT_FILE);
if(config.grab(MATRIX_PARAM)) {
matrixfile = MATRIX_PARAM.getValue();
}
- final ObjectParameter<DistanceParser<FloatDistance>> PARSER_PARAM = new ObjectParameter<>(PARSER_ID, DistanceParser.class, NumberDistanceParser.class);
+
+ final ObjectParameter<DistanceParser> PARSER_PARAM = new ObjectParameter<>(FileBasedDoubleDistanceFunction.Parameterizer.PARSER_ID, DistanceParser.class, AsciiDistanceParser.class);
if(config.grab(PARSER_PARAM)) {
- ListParameterization parserConfig = new ListParameterization();
- parserConfig.addParameter(DistanceParser.DISTANCE_ID, FloatDistance.class);
- ChainedParameterization combinedConfig = new ChainedParameterization(parserConfig, config);
- combinedConfig.errorsTo(config);
- parser = PARSER_PARAM.instantiateClass(combinedConfig);
+ parser = PARSER_PARAM.instantiateClass(config);
}
}