diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java | 119 |
1 files changed, 45 insertions, 74 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java index 994d854d..f39e1fdc 100644 --- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java +++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.external; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,63 +23,43 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.external; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import gnu.trove.map.TLongFloatMap; +import gnu.trove.map.hash.TLongFloatHashMap; + import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; -import java.util.Map; -import de.lmu.ifi.dbs.elki.database.ids.DBIDPair; -import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; -import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractDBIDDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.FloatDistance; +import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractDBIDRangeDistanceFunction; import de.lmu.ifi.dbs.elki.utilities.FileUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; /** - * Provides a DistanceFunction that is based on float distances given by a - * distance matrix of an external file. + * Distance function that is based on float distances given by a + * distance matrix of an external ascii file. + * + * See {@link AsciiDistanceParser} for the default input format. * - * See {@link NumberDistanceParser} for the default input format. + * TODO: use a {@code float[]} instead of the hash map. * * @author Elke Achtert + * @author Erich Schubert */ @Title("File based float distance for database objects.") @Description("Loads float distance values from an external text file.") -public class FileBasedFloatDistanceFunction extends AbstractDBIDDistanceFunction<FloatDistance> { - /** - * Parameter that specifies the name of the distance matrix file. - * <p> - * Key: {@code -distance.matrix} - * </p> - */ - public static final OptionID MATRIX_ID = new OptionID("distance.matrix", "The name of the file containing the distance matrix."); - - /** - * Optional parameter to specify the parsers to provide a database, must - * extend {@link DistanceParser}. If this parameter is not set, - * {@link NumberDistanceParser} is used as parser for all input files. - * <p> - * Key: {@code -distance.parser} - * </p> - */ - public static final OptionID PARSER_ID = new OptionID("distance.parser", "Parser used to load the distance matrix."); - +public class FileBasedFloatDistanceFunction extends AbstractDBIDRangeDistanceFunction { /** * The distance cache */ - private Map<DBIDPair, FloatDistance> cache; + private TLongFloatMap cache; /** * Constructor. @@ -87,7 +67,7 @@ public class FileBasedFloatDistanceFunction extends AbstractDBIDDistanceFunction * @param parser Parser * @param matrixfile input file */ - public FileBasedFloatDistanceFunction(DistanceParser<FloatDistance> parser, File matrixfile) { + public FileBasedFloatDistanceFunction(DistanceParser parser, File matrixfile) { super(); try { loadCache(parser, matrixfile); @@ -97,44 +77,38 @@ public class FileBasedFloatDistanceFunction extends AbstractDBIDDistanceFunction } } - /** - * Returns the distance between the two objects specified by their objects - * ids. If a cache is used, the distance value is looked up in the cache. If - * the distance does not yet exists in cache, it will be computed an put to - * cache. If no cache is used, the distance is computed. - * - * @param id1 first object id - * @param id2 second object id - * @return the distance between the two objects specified by their objects ids - */ @Override - public FloatDistance distance(DBIDRef id1, DBIDRef id2) { - if(id1 == null) { - return getDistanceFactory().undefinedDistance(); - } - if(id2 == null) { - return getDistanceFactory().undefinedDistance(); - } - // the smaller id is the first key - if(DBIDUtil.compare(id1, id2) > 0) { - return distance(id2, id1); - } - FloatDistance ret = cache.get(DBIDUtil.newPair(id1, id2)); - if (ret == null && DBIDUtil.equal(id1, id2)) { - return FloatDistance.ZERO_DISTANCE; - } - return ret; + public double distance(int i1, int i2) { + return (i1 == i2) ? 0. : cache.get(makeKey(i1, i2)); } - private void loadCache(DistanceParser<FloatDistance> parser, File matrixfile) throws IOException { + private void loadCache(DistanceParser parser, File matrixfile) throws IOException { InputStream in = new BufferedInputStream(FileUtil.tryGzipInput(new FileInputStream(matrixfile))); - DistanceParsingResult<FloatDistance> res = parser.parse(in); - cache = res.getDistanceCache(); + cache = new TLongFloatHashMap(); + parser.parse(in, new DistanceCacheWriter() { + @Override + public void put(int id1, int id2, double distance) { + cache.put(makeKey(id1, id2), (float) distance); + } + + @Override + public boolean containsKey(int id1, int id2) { + return cache.containsKey(makeKey(id1, id2)); + } + }); } - @Override - public FloatDistance getDistanceFactory() { - return FloatDistance.FACTORY; + /** + * Combine two integer ids into a long value. + * + * @param i1 First id + * @param i2 Second id + * @return Combined value + */ + protected static final long makeKey(int i1, int i2) { + return (i1 < i2) // + ? ((((long) i1) << 32) | i2)// + : ((((long) i2) << 32) | i1); } @Override @@ -159,22 +133,19 @@ public class FileBasedFloatDistanceFunction extends AbstractDBIDDistanceFunction public static class Parameterizer extends AbstractParameterizer { protected File matrixfile = null; - protected DistanceParser<FloatDistance> parser = null; + protected DistanceParser parser = null; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final FileParameter MATRIX_PARAM = new FileParameter(MATRIX_ID, FileParameter.FileType.INPUT_FILE); + final FileParameter MATRIX_PARAM = new FileParameter(FileBasedDoubleDistanceFunction.Parameterizer.MATRIX_ID, FileParameter.FileType.INPUT_FILE); if(config.grab(MATRIX_PARAM)) { matrixfile = MATRIX_PARAM.getValue(); } - final ObjectParameter<DistanceParser<FloatDistance>> PARSER_PARAM = new ObjectParameter<>(PARSER_ID, DistanceParser.class, NumberDistanceParser.class); + + final ObjectParameter<DistanceParser> PARSER_PARAM = new ObjectParameter<>(FileBasedDoubleDistanceFunction.Parameterizer.PARSER_ID, DistanceParser.class, AsciiDistanceParser.class); if(config.grab(PARSER_PARAM)) { - ListParameterization parserConfig = new ListParameterization(); - parserConfig.addParameter(DistanceParser.DISTANCE_ID, FloatDistance.class); - ChainedParameterization combinedConfig = new ChainedParameterization(parserConfig, config); - combinedConfig.errorsTo(config); - parser = PARSER_PARAM.instantiateClass(combinedConfig); + parser = PARSER_PARAM.instantiateClass(config); } } |