package de.lmu.ifi.dbs.elki.database.lucene;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
Copyright (C) 2015
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.similar.MoreLikeThis;
import org.apache.lucene.util.Version;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRange;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.AbstractDistanceRangeQuery;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
/**
* Perform range similarity search using lucene.
*
* @author Erich Schubert
*/
public class LuceneDistanceRangeQuery extends AbstractDistanceRangeQuery {
/**
* Lucene search function.
*/
MoreLikeThis mlt;
/**
* Index searcher.
*/
IndexSearcher is;
/**
* DBID range.
*/
DBIDRange ids;
/**
* Constructor.
*
* @param distanceQuery Distance query
*/
public LuceneDistanceRangeQuery(DistanceQuery distanceQuery, IndexReader ir, DBIDRange ids) {
super(distanceQuery);
this.ids = ids;
this.mlt = new MoreLikeThis(ir);
this.is = new IndexSearcher(ir);
mlt.setAnalyzer(new StandardAnalyzer(Version.LUCENE_36));
}
@Override
public void getRangeForObject(DBID obj, double range, ModifiableDoubleDBIDList neighbors) {
try {
Query query = mlt.like(ids.getOffset(obj));
is.search(query, new DocumentsCollector(ids, neighbors, range));
}
catch(IOException e) {
throw new AbortException("I/O error in lucene.", e);
}
}
@Override
public DoubleDBIDList getRangeForDBID(DBIDRef id, double range) {
ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
getRangeForDBID(id, range, result);
return result;
}
@Override
public DoubleDBIDList getRangeForObject(DBID obj, double range) {
return getRangeForDBID(obj, range);
}
/**
* Class to collect Lucene results.
*
* @author Erich Schubert
*/
private class DocumentsCollector extends Collector {
/**
* Iterator to convert docid to DBIDs.
*/
private final DBIDArrayIter iter;
/**
* Offset for docid conversion.
*/
private int docBase = 0;
/**
* Scorer class
*/
private Scorer scorer = null;
/**
* Result collector.
*/
final private ModifiableDoubleDBIDList result;
/**
* Threshold range.
*/
final private double range;
/**
* Constructor.
*
* @param ids IDs
* @param result Result collection
* @param range Radius
*/
public DocumentsCollector(DBIDRange ids, ModifiableDoubleDBIDList result, double range) {
super();
this.iter = ids.iter();
this.result = result;
this.range = range;
}
@Override
public boolean acceptsDocsOutOfOrder() {
return true;
}
@Override
public void collect(int docid) throws IOException {
double score = scorer.score();
double dist = (score > 0.) ? (1. / score) : Double.POSITIVE_INFINITY;
if(dist <= range) {
iter.seek(docBase + docid);
result.add(dist, iter);
}
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
this.docBase = docBase;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
}
}