diff options
Diffstat (limited to 'elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external')
7 files changed, 174 insertions, 52 deletions
diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/AsciiDistanceParser.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/AsciiDistanceParser.java index 2122f43b..61552730 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/AsciiDistanceParser.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/AsciiDistanceParser.java @@ -29,6 +29,7 @@ import java.io.InputStream; import de.lmu.ifi.dbs.elki.datasource.parser.CSVReaderFormat; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -41,11 +42,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz * Parser for parsing one distance value per line. * * A line must have the following format: {@code id1 id2 distanceValue}, where - * id1 and id2 are integers representing the two ids belonging to the distance - * value. Lines starting with "#" will be ignored. + * id1 and id2 are integers starting at 0 representing the two ids belonging to + * the distance value. Lines starting with "#" will be ignored. * * @author Elke Achtert * @author Erich Schubert + * @since 0.2 * * @apiviz.uses CSVReaderFormat * @apiviz.composedOf TokenizedReader @@ -54,8 +56,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz */ @Title("Number Distance Parser") @Description("Parser for the following line format:\n" // - + "id1 id2 distanceValue, where id1 and is2 are integers representing the two ids belonging to the distance value.\n" // - + "The ids and the distance value are separated by whitespace. Empty lines and lines beginning with \"#\" will be ignored.") ++ "id1 id2 distanceValue, where id1 and is2 are integers starting at 0 representing the two ids belonging to the distance value.\n" // ++ "The ids and the distance value are separated by whitespace. Empty lines and lines beginning with \"#\" will be ignored.") +@Alias({ "de.lmu.ifi.dbs.elki.datasource.parser.NumberDistanceParser", // +"de.lmu.ifi.dbs.elki.distance.distancefunction.external.NumberDistanceParser", // +"de.lmu.ifi.dbs.elki.parser.NumberDistanceParser" }) public class AsciiDistanceParser implements DistanceParser { /** * The logger for this class. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/DiskCacheBasedDoubleDistanceFunction.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/DiskCacheBasedDoubleDistanceFunction.java index 6bfd6df0..5ad01b72 100755 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/DiskCacheBasedDoubleDistanceFunction.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/DiskCacheBasedDoubleDistanceFunction.java @@ -26,7 +26,9 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.external; import java.io.File; import java.io.IOException; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRange; import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractDBIDRangeDistanceFunction; +import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.persistent.OnDiskUpperTriangleMatrix; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -42,11 +44,15 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; * matrix of an external binary matrix file. * * @author Erich Schubert + * @since 0.2 */ @Title("File based double distance for database objects.") @Description("Loads double distance values from an external matrix.") public class DiskCacheBasedDoubleDistanceFunction extends AbstractDBIDRangeDistanceFunction { - // TODO: constructor with file. + /** + * Class logger. + */ + private static final Logging LOG = Logging.getLogger(DiskCacheBasedDoubleDistanceFunction.class); /** * Magic to identify double cache matrices @@ -68,6 +74,17 @@ public class DiskCacheBasedDoubleDistanceFunction extends AbstractDBIDRangeDista this.cache = cache; } + /** + * Constructor. + * + * @param matrixfile File name + * @throws IOException + */ + public DiskCacheBasedDoubleDistanceFunction(File matrixfile) throws IOException { + super(); + this.cache = new OnDiskUpperTriangleMatrix(matrixfile, DOUBLE_CACHE_MAGIC, 0, ByteArrayUtil.SIZE_DOUBLE, false); + } + @Override public double distance(int i1, int i2) { // the smaller id is the first key @@ -84,6 +101,13 @@ public class DiskCacheBasedDoubleDistanceFunction extends AbstractDBIDRangeDista } @Override + public void checkRange(DBIDRange range) { + if(cache.getMatrixSize() < range.size()) { + LOG.warning("Distance matrix has size " + cache.getMatrixSize() + " but range has size: " + range.size()); + } + } + + @Override public boolean equals(Object obj) { if(obj == null) { return false; diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/DiskCacheBasedFloatDistanceFunction.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/DiskCacheBasedFloatDistanceFunction.java index de8d96b4..b6731a30 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/DiskCacheBasedFloatDistanceFunction.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/DiskCacheBasedFloatDistanceFunction.java @@ -26,7 +26,9 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.external; import java.io.File; import java.io.IOException; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRange; import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractDBIDRangeDistanceFunction; +import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.persistent.OnDiskUpperTriangleMatrix; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -41,11 +43,15 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; * of an external binary matrix file. * * @author Erich Schubert + * @since 0.2 */ @Title("File based float distance for database objects.") @Description("Loads float distance values from an external matrix.") public class DiskCacheBasedFloatDistanceFunction extends AbstractDBIDRangeDistanceFunction { - // TODO: constructor with file. + /** + * Class logger. + */ + private static final Logging LOG = Logging.getLogger(DiskCacheBasedFloatDistanceFunction.class); /** * Magic to identify double cache matrices @@ -67,6 +73,17 @@ public class DiskCacheBasedFloatDistanceFunction extends AbstractDBIDRangeDistan this.cache = cache; } + /** + * Constructor. + * + * @param matrixfile File name + * @throws IOException + */ + public DiskCacheBasedFloatDistanceFunction(File matrixfile) throws IOException { + super(); + this.cache = new OnDiskUpperTriangleMatrix(matrixfile, FLOAT_CACHE_MAGIC, 0, ByteArrayUtil.SIZE_FLOAT, false); + } + @Override public double distance(int i1, int i2) { // the smaller id is the first key @@ -94,6 +111,13 @@ public class DiskCacheBasedFloatDistanceFunction extends AbstractDBIDRangeDistan return this.cache.equals(other.cache); } + @Override + public void checkRange(DBIDRange range) { + if(cache.getMatrixSize() < range.size()) { + LOG.warning("Distance matrix has size " + cache.getMatrixSize() + " but range has size: " + range.size()); + } + } + /** * Parameterization class. * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/DistanceCacheWriter.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/DistanceCacheWriter.java index cd73b73e..3d83db66 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/DistanceCacheWriter.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/DistanceCacheWriter.java @@ -27,6 +27,7 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.external; * Interface to plug in the cache storage into the parser. * * @author Erich Schubert + * @since 0.4.0 */ public interface DistanceCacheWriter { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/DistanceParser.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/DistanceParser.java index 28234d75..d495427b 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/DistanceParser.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/DistanceParser.java @@ -30,6 +30,7 @@ import java.io.InputStream; * InputStream. * * @author Arthur Zimek + * @since 0.2 * * @apiviz.uses DistanceCacheWriter oneway - - «create» */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedDoubleDistanceFunction.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedDoubleDistanceFunction.java index 4f872766..cbbf7fa2 100755 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedDoubleDistanceFunction.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedDoubleDistanceFunction.java @@ -1,5 +1,28 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.external; +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; + +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRange; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractDBIDRangeDistanceFunction; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.utilities.FileUtil; +import de.lmu.ifi.dbs.elki.utilities.documentation.Description; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; +import gnu.trove.impl.Constants; + /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures @@ -26,26 +49,6 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.external; import gnu.trove.map.TLongDoubleMap; import gnu.trove.map.hash.TLongDoubleHashMap; -import java.io.BufferedInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; - -import de.lmu.ifi.dbs.elki.database.ids.DBID; -import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; -import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractDBIDRangeDistanceFunction; -import de.lmu.ifi.dbs.elki.utilities.FileUtil; -import de.lmu.ifi.dbs.elki.utilities.documentation.Description; -import de.lmu.ifi.dbs.elki.utilities.documentation.Title; -import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; - /** * Distance function that is based on double distances given by a distance * matrix of an external ASCII file. @@ -58,6 +61,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * * @author Elke Achtert * @author Erich Schubert + * @since 0.2 * * @apiviz.composedOf DistanceCacheWriter */ @@ -65,6 +69,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; @Description("Loads double distance values from an external text file.") public class FileBasedDoubleDistanceFunction extends AbstractDBIDRangeDistanceFunction { /** + * Class logger. + */ + private static final Logging LOG = Logging.getLogger(FileBasedDoubleDistanceFunction.class); + + /** * The distance cache */ private TLongDoubleMap cache; @@ -80,6 +89,11 @@ public class FileBasedDoubleDistanceFunction extends AbstractDBIDRangeDistanceFu private File matrixfile; /** + * Minimum and maximum IDs seen. + */ + private int min, max; + + /** * Constructor. * * @param parser Parser @@ -106,18 +120,25 @@ public class FileBasedDoubleDistanceFunction extends AbstractDBIDRangeDistanceFu @Override public double distance(int i1, int i2) { - if(i1 == i2) { - return 0.; - } - return cache.get(makeKey(i1, i2)); + return (i1 == i2) ? 0. : cache.get(makeKey(i1 + min, i2 + min)); } private void loadCache(DistanceParser parser, File matrixfile) throws IOException { InputStream in = new BufferedInputStream(FileUtil.tryGzipInput(new FileInputStream(matrixfile))); - cache = new TLongDoubleHashMap(); + cache = new TLongDoubleHashMap(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1L, Double.POSITIVE_INFINITY); + min = Integer.MAX_VALUE; + max = Integer.MIN_VALUE; parser.parse(in, new DistanceCacheWriter() { @Override public void put(int id1, int id2, double distance) { + if(id1 < id2) { + min = id1 < min ? id1 : min; + max = id2 > max ? id2 : max; + } + else { + min = id2 < min ? id2 : min; + max = id1 > max ? id1 : max; + } cache.put(makeKey(id1, id2), distance); } @@ -126,6 +147,9 @@ public class FileBasedDoubleDistanceFunction extends AbstractDBIDRangeDistanceFu return cache.containsKey(makeKey(id1, id2)); } }); + if(min != 0) { + LOG.verbose("Distance matrix is supposed to be 0-indexed. Choosing offset " + min + " to compensate."); + } } /** @@ -142,6 +166,14 @@ public class FileBasedDoubleDistanceFunction extends AbstractDBIDRangeDistanceFu } @Override + public void checkRange(DBIDRange range) { + final int size = max + 1 - min; + if(size < range.size()) { + LOG.warning("Distance matrix has size " + size + " but range has size: " + range.size()); + } + } + + @Override public boolean equals(Object obj) { if(obj == null) { return false; diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java index d14afb5d..51ae874e 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/distance/distancefunction/external/FileBasedFloatDistanceFunction.java @@ -1,5 +1,27 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.external; +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; + +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRange; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractDBIDRangeDistanceFunction; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.utilities.FileUtil; +import de.lmu.ifi.dbs.elki.utilities.documentation.Description; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; +import gnu.trove.impl.Constants; + /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures @@ -26,25 +48,6 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.external; import gnu.trove.map.TLongFloatMap; import gnu.trove.map.hash.TLongFloatHashMap; -import java.io.BufferedInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; - -import de.lmu.ifi.dbs.elki.database.ids.DBID; -import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; -import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractDBIDRangeDistanceFunction; -import de.lmu.ifi.dbs.elki.utilities.FileUtil; -import de.lmu.ifi.dbs.elki.utilities.documentation.Description; -import de.lmu.ifi.dbs.elki.utilities.documentation.Title; -import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; - /** * Distance function that is based on float distances given by a distance matrix * of an external ASCII file. @@ -57,6 +60,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * * @author Elke Achtert * @author Erich Schubert + * @since 0.2 * * @apiviz.composedOf DistanceCacheWriter */ @@ -64,6 +68,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; @Description("Loads float distance values from an external text file.") public class FileBasedFloatDistanceFunction extends AbstractDBIDRangeDistanceFunction { /** + * Class logger. + */ + private static final Logging LOG = Logging.getLogger(FileBasedFloatDistanceFunction.class); + + /** * The distance cache */ private TLongFloatMap cache; @@ -79,6 +88,11 @@ public class FileBasedFloatDistanceFunction extends AbstractDBIDRangeDistanceFun private File matrixfile; /** + * Minimum and maximum IDs seen. + */ + private int min, max; + + /** * Constructor. * * @param parser Parser @@ -105,15 +119,25 @@ public class FileBasedFloatDistanceFunction extends AbstractDBIDRangeDistanceFun @Override public double distance(int i1, int i2) { - return (i1 == i2) ? 0. : cache.get(makeKey(i1, i2)); + return (i1 == i2) ? 0. : cache.get(makeKey(i1 + min, i2 + min)); } private void loadCache(DistanceParser parser, File matrixfile) throws IOException { InputStream in = new BufferedInputStream(FileUtil.tryGzipInput(new FileInputStream(matrixfile))); - cache = new TLongFloatHashMap(); + cache = new TLongFloatHashMap(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1L, Float.POSITIVE_INFINITY); + min = Integer.MAX_VALUE; + max = Integer.MIN_VALUE; parser.parse(in, new DistanceCacheWriter() { @Override public void put(int id1, int id2, double distance) { + if(id1 < id2) { + min = id1 < min ? id1 : min; + max = id2 > max ? id2 : max; + } + else { + min = id2 < min ? id2 : min; + max = id1 > max ? id1 : max; + } cache.put(makeKey(id1, id2), (float) distance); } @@ -122,6 +146,9 @@ public class FileBasedFloatDistanceFunction extends AbstractDBIDRangeDistanceFun return cache.containsKey(makeKey(id1, id2)); } }); + if(min != 0) { + LOG.verbose("Distance matrix is supposed to be 0-indexed. Choosing offset " + min + " to compensate."); + } } /** @@ -138,6 +165,14 @@ public class FileBasedFloatDistanceFunction extends AbstractDBIDRangeDistanceFun } @Override + public void checkRange(DBIDRange range) { + final int size = max + 1 - min; + if(size < range.size()) { + LOG.warning("Distance matrix has size " + size + " but range has size: " + range.size()); + } + } + + @Override public boolean equals(Object obj) { if(obj == null) { return false; |