diff options
author | Erich Schubert <erich@debian.org> | 2014-10-31 03:43:51 +0100 |
---|---|---|
committer | Andrej Shadura <andrewsh@debian.org> | 2019-03-09 22:30:40 +0000 |
commit | 596d8876dca5627dd76e8c23bf40a24cc305eeed (patch) | |
tree | d269ddb46561469f6b1fff67b19e0cd2b4608f5b /src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/AsciiDistanceParser.java | |
parent | ee31d687b1a0e2f2f1e6e71375c7cc3b094919b8 (diff) | |
parent | 337087b668d3a54f3afee3a9adb597a32e9f7e94 (diff) |
Import Debian changes 0.6.5~20141030-1
elki (0.6.5~20141030-1) unstable; urgency=medium
* New upstream beta release
* Urgency medium: 0.6.0 suffers from a performance issue with duplicates.
* Repackaged tarball from .jar to .tar.bz2
* Add dependency on libsvm3-java
* Enable line numbers for debugging (ant debuglevel)
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/AsciiDistanceParser.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/AsciiDistanceParser.java | 174 |
1 files changed, 174 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/AsciiDistanceParser.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/AsciiDistanceParser.java new file mode 100644 index 00000000..4eaeca2e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/AsciiDistanceParser.java @@ -0,0 +1,174 @@ +package de.lmu.ifi.dbs.elki.distance.distancefunction.external; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.regex.Pattern; + +import de.lmu.ifi.dbs.elki.datasource.parser.AbstractParser; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress; +import de.lmu.ifi.dbs.elki.utilities.documentation.Description; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; + +/** + * Parser for parsing one distance value per line. + * + * A line must have the following format: {@code id1 id2 distanceValue}, where + * id1 and id2 are integers representing the two ids belonging to the distance + * value. Lines starting with "#" will be ignored. + * + * @author Elke Achtert + * @author Erich Schubert + */ +@Title("Number Distance Parser") +@Description("Parser for the following line format:\n" // + + "id1 id2 distanceValue, where id1 and is2 are integers representing the two ids belonging to the distance value.\n" // + + "The ids and the distance value are separated by whitespace. Empty lines and lines beginning with \"#\" will be ignored.") +public class AsciiDistanceParser extends AbstractParser implements DistanceParser { + /** + * The logger for this class. + */ + private static final Logging LOG = Logging.getLogger(AsciiDistanceParser.class); + + /** + * Constructor. + * + * @param colSep Column separator pattern + * @param quoteChars Quote characters + * @param comment Comment pattern + */ + public AsciiDistanceParser(Pattern colSep, String quoteChars, Pattern comment) { + super(colSep, quoteChars, comment); + } + + @Override + public void parse(InputStream in, DistanceCacheWriter cache) { + BufferedReader reader = new BufferedReader(new InputStreamReader(in)); + int lineNumber = 1; + + int min = Integer.MAX_VALUE, max = Integer.MIN_VALUE; + IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("Parsing distance matrix", LOG) : null; + try { + for(String line; (line = reader.readLine()) != null; lineNumber++) { + LOG.incrementProcessed(prog); + // Skip empty lines and comments + if(line.length() <= 0 || isComment(line)) { + continue; + } + tokenizer.initialize(line, 0, lengthWithoutLinefeed(line)); + + if(!tokenizer.valid()) { + throw new IllegalArgumentException("Less than three values in line " + lineNumber); + } + int id1, id2; + try { + id1 = (int) tokenizer.getLongBase10(); + tokenizer.advance(); + } + catch(NumberFormatException e) { + throw new IllegalArgumentException("Error in line " + lineNumber + ": id1 is not an integer!"); + } + if(!tokenizer.valid()) { + throw new IllegalArgumentException("Less than three values in line " + lineNumber); + } + + try { + id2 = (int) tokenizer.getLongBase10(); + tokenizer.advance(); + } + catch(NumberFormatException e) { + throw new IllegalArgumentException("Error in line " + lineNumber + ": id2 is not an integer!"); + } + if(!tokenizer.valid()) { + throw new IllegalArgumentException("Less than three values in line " + lineNumber); + } + + // Track minimum and maximum + if(id1 < id2) { + min = (id1 < min) ? id1 : min; + max = (id2 > min) ? id2 : max; + } + else { + min = (id2 < min) ? id2 : min; + max = (id1 > min) ? id1 : max; + } + + try { + double distance = tokenizer.getDouble(); + cache.put(id1, id2, distance); + } + catch(IllegalArgumentException e) { + throw new IllegalArgumentException("Error in line " + lineNumber + ":" + e.getMessage(), e); + } + tokenizer.advance(); + if(tokenizer.valid()) { + throw new IllegalArgumentException("More than three values in line " + lineNumber); + } + } + } + catch(IOException e) { + throw new IllegalArgumentException("Error while parsing line " + lineNumber + "."); + } + + LOG.setCompleted(prog); + + // check if all distance values are specified + for(int i1 = min; i1 <= max; i1++) { + for(int i2 = i1 + 1; i2 <= max; i2++) { + if(!cache.containsKey(i1, i2)) { + throw new IllegalArgumentException("Distance value for " + i1 + " to " + i2 + " is missing!"); + } + } + } + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParser.Parameterizer { + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + } + + @Override + protected AsciiDistanceParser makeInstance() { + return new AsciiDistanceParser(colSep, quoteChars, comment); + } + } +} |