diff options
Diffstat (limited to 'elki/src/test/java/de/lmu/ifi/dbs/elki/index/preprocessed/MaterializedKNNAndRKNNPreprocessorTest.java')
-rw-r--r-- | elki/src/test/java/de/lmu/ifi/dbs/elki/index/preprocessed/MaterializedKNNAndRKNNPreprocessorTest.java | 213 |
1 files changed, 213 insertions, 0 deletions
diff --git a/elki/src/test/java/de/lmu/ifi/dbs/elki/index/preprocessed/MaterializedKNNAndRKNNPreprocessorTest.java b/elki/src/test/java/de/lmu/ifi/dbs/elki/index/preprocessed/MaterializedKNNAndRKNNPreprocessorTest.java new file mode 100644 index 00000000..e5deb7fd --- /dev/null +++ b/elki/src/test/java/de/lmu/ifi/dbs/elki/index/preprocessed/MaterializedKNNAndRKNNPreprocessorTest.java @@ -0,0 +1,213 @@ +package de.lmu.ifi.dbs.elki.index.preprocessed; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2015 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.VectorUtil; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.HashmapDatabase; +import de.lmu.ifi.dbs.elki.database.UpdatableDatabase; +import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.LinearScanDistanceKNNQuery; +import de.lmu.ifi.dbs.elki.database.query.rknn.LinearScanRKNNQuery; +import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; +import de.lmu.ifi.dbs.elki.datasource.FileBasedDatabaseConnection; +import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle; +import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNAndRKNNPreprocessor; +import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Test case to validate the dynamic updates of materialized kNN and RkNN + * preprocessors. + * + * + * some index structures for accuracy. For a known data set and query point, the + * top 10 nearest neighbors are queried and verified. + * + * + * @author Elke Achtert + * @since 0.4.0 + */ +public class MaterializedKNNAndRKNNPreprocessorTest implements JUnit4Test { + // the following values depend on the data set used! + static String dataset = "data/testdata/unittests/3clusters-and-noise-2d.csv"; + + // number of kNN to query + int k = 10; + + // the size of objects inserted and deleted + int updatesize = 12; + + int seed = 5; + + // size of the data set + int shoulds = 330; + + /** + * Actual test routine. + * + * @throws ParameterException + * @throws UnableToComplyException + */ + @Test + public void testPreprocessor() throws ParameterException, UnableToComplyException { + ListParameterization params = new ListParameterization(); + params.addParameter(FileBasedDatabaseConnection.Parameterizer.INPUT_ID, dataset); + + // get database + UpdatableDatabase db = ClassGenericsUtil.parameterizeOrAbort(HashmapDatabase.class, params); + db.initialize(); + Relation<DoubleVector> rep = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD); + DistanceQuery<DoubleVector> distanceQuery = db.getDistanceQuery(rep, EuclideanDistanceFunction.STATIC); + + // verify data set size. + assertEquals("Data set size doesn't match parameters.", shoulds, rep.size()); + + // get linear queries + LinearScanDistanceKNNQuery<DoubleVector> lin_knn_query = new LinearScanDistanceKNNQuery<>(distanceQuery); + LinearScanRKNNQuery<DoubleVector> lin_rknn_query = new LinearScanRKNNQuery<>(distanceQuery, lin_knn_query, k); + + // get preprocessed queries + ListParameterization config = new ListParameterization(); + config.addParameter(MaterializeKNNPreprocessor.Factory.DISTANCE_FUNCTION_ID, distanceQuery.getDistanceFunction()); + config.addParameter(MaterializeKNNPreprocessor.Factory.K_ID, k); + MaterializeKNNAndRKNNPreprocessor<DoubleVector> preproc = new MaterializeKNNAndRKNNPreprocessor<>(rep, distanceQuery.getDistanceFunction(), k); + KNNQuery<DoubleVector> preproc_knn_query = preproc.getKNNQuery(distanceQuery, k); + RKNNQuery<DoubleVector> preproc_rknn_query = preproc.getRKNNQuery(distanceQuery); + // add as index + db.getHierarchy().add(rep, preproc); + assertTrue("Preprocessor knn query class incorrect.", !(preproc_knn_query instanceof LinearScanDistanceKNNQuery)); + assertTrue("Preprocessor rknn query class incorrect.", !(preproc_rknn_query instanceof LinearScanDistanceKNNQuery)); + + // test queries + testKNNQueries(rep, lin_knn_query, preproc_knn_query, k); + testRKNNQueries(rep, lin_rknn_query, preproc_rknn_query, k); + // also test partial queries, forward only + testKNNQueries(rep, lin_knn_query, preproc_knn_query, k / 2); + + // insert new objects + List<DoubleVector> insertions = new ArrayList<>(); + NumberVector.Factory<DoubleVector> o = RelationUtil.getNumberVectorFactory(rep); + int dim = RelationUtil.dimensionality(rep); + Random random = new Random(seed); + for(int i = 0; i < updatesize; i++) { + DoubleVector obj = VectorUtil.randomVector(o, dim, random); + insertions.add(obj); + } + System.out.println("Insert " + insertions); + System.out.println(); + DBIDs deletions = db.insert(MultipleObjectsBundle.makeSimple(rep.getDataTypeInformation(), insertions)); + + // test queries + testKNNQueries(rep, lin_knn_query, preproc_knn_query, k); + testRKNNQueries(rep, lin_rknn_query, preproc_rknn_query, k); + + // delete objects + System.out.println("Delete " + deletions); + db.delete(deletions); + + // test queries + testKNNQueries(rep, lin_knn_query, preproc_knn_query, k); + testRKNNQueries(rep, lin_rknn_query, preproc_rknn_query, k); + } + + private void testKNNQueries(Relation<DoubleVector> rep, KNNQuery<DoubleVector> lin_knn_query, KNNQuery<DoubleVector> preproc_knn_query, int k) { + ArrayDBIDs sample = DBIDUtil.ensureArray(rep.getDBIDs()); + List<? extends KNNList> lin_knn_ids = lin_knn_query.getKNNForBulkDBIDs(sample, k); + List<? extends KNNList> preproc_knn_ids = preproc_knn_query.getKNNForBulkDBIDs(sample, k); + for(int i = 0; i < rep.size(); i++) { + KNNList lin_knn = lin_knn_ids.get(i); + KNNList pre_knn = preproc_knn_ids.get(i); + DoubleDBIDListIter lin = lin_knn.iter(), pre = pre_knn.iter(); + for(; lin.valid() && pre.valid(); lin.advance(), pre.advance(), i++) { + if(!DBIDUtil.equal(lin, pre) && lin.doubleValue() != pre.doubleValue()) { + System.out.print("LIN kNN #" + i + " " + lin.getPair()); + System.out.print(" <=> "); + System.out.print("PRE kNN #" + i + " " + pre.getPair()); + System.out.println(); + break; + } + } + assertEquals("kNN sizes do not agree.", lin_knn.size(), pre_knn.size()); + for(int j = 0; j < lin_knn.size(); j++) { + assertTrue("kNNs of linear scan and preprocessor do not match!", DBIDUtil.equal(lin_knn.get(j), pre_knn.get(j))); + assertTrue("kNNs of linear scan and preprocessor do not match!", lin_knn.get(j).doubleValue() == pre_knn.get(j).doubleValue()); + } + } + System.out.println("knns ok"); + } + + private void testRKNNQueries(Relation<DoubleVector> rep, RKNNQuery<DoubleVector> lin_rknn_query, RKNNQuery<DoubleVector> preproc_rknn_query, int k) { + ArrayDBIDs sample = DBIDUtil.ensureArray(rep.getDBIDs()); + List<? extends DoubleDBIDList> lin_rknn_ids = lin_rknn_query.getRKNNForBulkDBIDs(sample, k); + List<? extends DoubleDBIDList> preproc_rknn_ids = preproc_rknn_query.getRKNNForBulkDBIDs(sample, k); + for(int i = 0; i < rep.size(); i++) { + DoubleDBIDList lin_rknn = lin_rknn_ids.get(i); + DoubleDBIDList pre_rknn = preproc_rknn_ids.get(i); + + DoubleDBIDListIter lin = lin_rknn.iter(), pre = pre_rknn.iter(); + for(; lin.valid() && pre.valid(); lin.advance(), pre.advance(), i++) { + if(!DBIDUtil.equal(lin, pre) || lin.doubleValue() != pre.doubleValue()) { + System.out.print("LIN RkNN #" + i + " " + lin); + System.out.print(" <=> "); + System.out.print("PRE RkNN #" + i + " " + pre); + System.out.println(); + break; + } + } + assertEquals("rkNN sizes do not agree for k=" + k, lin_rknn.size(), pre_rknn.size()); + for(int j = 0; j < lin_rknn.size(); j++) { + assertTrue("rkNNs of linear scan and preprocessor do not match!", DBIDUtil.equal(lin_rknn.get(j), pre_rknn.get(j))); + assertTrue("rkNNs of linear scan and preprocessor do not match!", lin_rknn.get(j).doubleValue() == pre_rknn.get(j).doubleValue()); + } + } + System.out.println("rknns ok"); + System.out.println(); + } +}
\ No newline at end of file |