package de.lmu.ifi.dbs.elki.distance.distancefunction.external;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
import gnu.trove.map.TLongDoubleMap;
import gnu.trove.map.hash.TLongDoubleHashMap;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractDBIDRangeDistanceFunction;
import de.lmu.ifi.dbs.elki.utilities.FileUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
* Distance function that is based on double distances given by a distance
* matrix of an external ascii file.
*
* See {@link AsciiDistanceParser} for the default input format.
*
* TODO: use a {@code double[]} instead of the hash map.
*
* @author Elke Achtert
* @author Erich Schubert
*/
@Title("File based double distance for database objects.")
@Description("Loads double distance values from an external text file.")
public class FileBasedDoubleDistanceFunction extends AbstractDBIDRangeDistanceFunction {
/**
* The distance cache
*/
private TLongDoubleMap cache;
/**
* Constructor.
*
* @param parser Parser
* @param matrixfile input file
*/
public FileBasedDoubleDistanceFunction(DistanceParser parser, File matrixfile) {
super();
try {
loadCache(parser, matrixfile);
}
catch(IOException e) {
throw new AbortException("Could not load external distance file: " + matrixfile.toString(), e);
}
}
@Override
public double distance(int i1, int i2) {
if(i1 == i2) {
return 0.;
}
return cache.get(makeKey(i1, i2));
}
private void loadCache(DistanceParser parser, File matrixfile) throws IOException {
InputStream in = new BufferedInputStream(FileUtil.tryGzipInput(new FileInputStream(matrixfile)));
cache = new TLongDoubleHashMap();
parser.parse(in, new DistanceCacheWriter() {
@Override
public void put(int id1, int id2, double distance) {
cache.put(makeKey(id1, id2), distance);
}
@Override
public boolean containsKey(int id1, int id2) {
return cache.containsKey(makeKey(id1, id2));
}
});
}
/**
* Combine two integer ids into a long value.
*
* @param i1 First id
* @param i2 Second id
* @return Combined value
*/
protected static final long makeKey(int i1, int i2) {
return (i1 < i2) //
? ((((long) i1) << 32) | i2)//
: ((((long) i2) << 32) | i1);
}
@Override
public boolean equals(Object obj) {
if(obj == null) {
return false;
}
if(getClass() != obj.getClass()) {
return false;
}
FileBasedDoubleDistanceFunction other = (FileBasedDoubleDistanceFunction) obj;
return this.cache.equals(other.cache);
}
/**
* Parameterization class.
*
* @author Erich Schubert
*
* @apiviz.exclude
*/
public static class Parameterizer extends AbstractParameterizer {
/**
* Parameter that specifies the name of the distance matrix file.
*
* Key: {@code -distance.matrix}
*
*/
public static final OptionID MATRIX_ID = new OptionID("distance.matrix", //
"The name of the file containing the distance matrix.");
/**
* Optional parameter to specify the parsers to provide a database, must
* extend {@link DistanceParser}. If this parameter is not set,
* {@link AsciiDistanceParser} is used as parser for all input files.
*
* Key: {@code -distance.parser}
*
*/
public static final OptionID PARSER_ID = new OptionID("distance.parser", //
"Parser used to load the distance matrix.");
protected File matrixfile = null;
protected DistanceParser parser = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final FileParameter MATRIX_PARAM = new FileParameter(MATRIX_ID, FileParameter.FileType.INPUT_FILE);
if(config.grab(MATRIX_PARAM)) {
matrixfile = MATRIX_PARAM.getValue();
}
final ObjectParameter PARSER_PARAM = new ObjectParameter<>(PARSER_ID, DistanceParser.class, AsciiDistanceParser.class);
if(config.grab(PARSER_PARAM)) {
parser = PARSER_PARAM.instantiateClass(config);
}
}
@Override
protected FileBasedDoubleDistanceFunction makeInstance() {
return new FileBasedDoubleDistanceFunction(parser, matrixfile);
}
}
}