package de.lmu.ifi.dbs.elki.index.preprocessed.subspaceproj; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2011 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import java.util.ArrayList; import java.util.List; import de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractProjectedDBSCAN; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.preprocessed.AbstractPreprocessorIndex; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.linearalgebra.ProjectionResult; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; /** * Abstract base class for a local PCA based index. * * @author Elke Achtert * @author Erich Schubert * * @apiviz.has DistanceFunction * * @param Vector type */ @Title("Local PCA Preprocessor") @Description("Materializes the local PCA and the locally weighted matrix of objects of a database.") public abstract class AbstractSubspaceProjectionIndex, D extends Distance, P extends ProjectionResult> extends AbstractPreprocessorIndex implements SubspaceProjectionIndex { /** * Contains the value of parameter epsilon; */ protected D epsilon; /** * The distance function for the variance analysis. */ protected DistanceFunction rangeQueryDistanceFunction; /** * Holds the value of parameter minpts. */ protected int minpts; /** * Constructor. * * @param relation Relation * @param epsilon Maximum Epsilon * @param rangeQueryDistanceFunction range query * @param minpts Minpts */ public AbstractSubspaceProjectionIndex(Relation relation, D epsilon, DistanceFunction rangeQueryDistanceFunction, int minpts) { super(relation); this.epsilon = epsilon; this.rangeQueryDistanceFunction = rangeQueryDistanceFunction; this.minpts = minpts; } /** * Preprocessing step. */ protected void preprocess() { if(relation == null || relation.size() <= 0) { throw new IllegalArgumentException(ExceptionMessages.DATABASE_EMPTY); } if(storage != null) { // Preprocessor was already run. return; } storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, ProjectionResult.class); long start = System.currentTimeMillis(); RangeQuery rangeQuery = QueryUtil.getRangeQuery(relation, rangeQueryDistanceFunction); FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress(this.getClass().getName(), relation.size(), getLogger()) : null; for(DBID id : relation.iterDBIDs()) { List> neighbors = rangeQuery.getRangeForDBID(id, epsilon); final P pres; if(neighbors.size() >= minpts) { pres = computeProjection(id, neighbors, relation); } else { DistanceResultPair firstQR = neighbors.get(0); neighbors = new ArrayList>(); neighbors.add(firstQR); pres = computeProjection(id, neighbors, relation); } storage.put(id, pres); if(progress != null) { progress.incrementProcessed(getLogger()); } } if(progress != null) { progress.ensureCompleted(getLogger()); } long end = System.currentTimeMillis(); // TODO: re-add timing code! if(true) { long elapsedTime = end - start; getLogger().verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds."); } } @Override public P getLocalProjection(DBID objid) { if(storage == null) { preprocess(); } return storage.get(objid); } /** * This method implements the type of variance analysis to be computed for a * given point. *

* Example1: for 4C, this method should implement a PCA for the given point. * Example2: for PreDeCon, this method should implement a simple axis-parallel * variance analysis. * * @param id the given point * @param neighbors the neighbors as query results of the given point * @param relation the database for which the preprocessing is performed * * @return local subspace projection */ protected abstract P computeProjection(DBID id, List> neighbors, Relation relation); /** * Factory class * * @author Erich Schubert * * @apiviz.stereotype factory * @apiviz.uses AbstractSubspaceProjectionIndex oneway - - «create» */ public static abstract class Factory, D extends Distance, I extends AbstractSubspaceProjectionIndex> implements SubspaceProjectionIndex.Factory, Parameterizable { /** * Contains the value of parameter epsilon; */ protected D epsilon; /** * The distance function for the variance analysis. */ protected DistanceFunction rangeQueryDistanceFunction; /** * Holds the value of parameter minpts. */ protected int minpts; /** * Constructor. * * @param epsilon * @param rangeQueryDistanceFunction * @param minpts */ public Factory(D epsilon, DistanceFunction rangeQueryDistanceFunction, int minpts) { super(); this.epsilon = epsilon; this.rangeQueryDistanceFunction = rangeQueryDistanceFunction; this.minpts = minpts; } @Override public abstract I instantiate(Relation relation); @Override public TypeInformation getInputTypeRestriction() { return TypeUtil.NUMBER_VECTOR_FIELD; } /** * Parameterization class. * * @author Erich Schubert * * @apiviz.exclude */ public static abstract class Parameterizer, D extends Distance, C> extends AbstractParameterizer { /** * Contains the value of parameter epsilon; */ protected D epsilon = null; /** * The distance function for the variance analysis. */ protected DistanceFunction rangeQueryDistanceFunction = null; /** * Holds the value of parameter minpts. */ protected int minpts = 0; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); configRangeQueryDistanceFunction(config); configEpsilon(config, rangeQueryDistanceFunction); configMinPts(config); } protected void configRangeQueryDistanceFunction(Parameterization config) { ObjectParameter> rangeQueryDistanceP = new ObjectParameter>(AbstractProjectedDBSCAN.INNER_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class); if(config.grab(rangeQueryDistanceP)) { rangeQueryDistanceFunction = rangeQueryDistanceP.instantiateClass(config); } } protected void configEpsilon(Parameterization config, DistanceFunction rangeQueryDistanceFunction) { D distanceParser = rangeQueryDistanceFunction != null ? rangeQueryDistanceFunction.getDistanceFactory() : null; DistanceParameter epsilonP = new DistanceParameter(AbstractProjectedDBSCAN.EPSILON_ID, distanceParser); // parameter epsilon if(config.grab(epsilonP)) { epsilon = epsilonP.getValue(); } } protected void configMinPts(Parameterization config) { IntParameter minptsP = new IntParameter(AbstractProjectedDBSCAN.MINPTS_ID, new GreaterConstraint(0)); if(config.grab(minptsP)) { minpts = minptsP.getValue(); } } } } }