diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood')
10 files changed, 1350 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java new file mode 100644 index 00000000..92d20789 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java @@ -0,0 +1,84 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.database.datastore.DataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.logging.Logging; + +/** + * Abstract base class for precomputed neighborhoods. + * + * @author Erich Schubert + */ +public abstract class AbstractPrecomputedNeighborhood implements NeighborSetPredicate { + /** + * The data + */ + protected DataStore<DBIDs> store; + + /** + * Constructor. + * + * @param store the actual data. + */ + public AbstractPrecomputedNeighborhood(DataStore<DBIDs> store) { + super(); + this.store = store; + } + + @Override + public DBIDs getNeighborDBIDs(DBID reference) { + DBIDs neighbors = store.get(reference); + if(neighbors != null) { + return neighbors; + } + else { + // Use just the object itself. + if(getLogger().isDebugging()) { + getLogger().warning("No neighbors for object " + reference); + } + return reference; + } + } + + /** + * The logger to use for error reporting. + * + * @return Logger + */ + abstract protected Logging getLogger(); + + /** + * Factory class. + * + * @author Erich Schubert + * + * @apiviz.stereotype factory + * @apiviz.has AbstractPrecomputedNeighborhood + */ + public abstract static class Factory<O> implements NeighborSetPredicate.Factory<O> { + // Nothing to add. + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java new file mode 100644 index 00000000..a18d8bc4 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java @@ -0,0 +1,247 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.database.datastore.DataStore; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; +import de.lmu.ifi.dbs.elki.result.Result; +import de.lmu.ifi.dbs.elki.result.ResultHierarchy; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Neighborhood obtained by computing the k-fold closure of an existing + * neighborhood. + * + * @author Erich Schubert + */ +public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood implements Result { + /** + * The logger to use. + */ + private static final Logging logger = Logging.getLogger(ExtendedNeighborhood.class); + + /** + * Constructor. + * + * @param store The materialized data. + */ + public ExtendedNeighborhood(DataStore<DBIDs> store) { + super(store); + } + + @Override + protected Logging getLogger() { + return logger; + } + + @Override + public String getLongName() { + return "Extended Neighborhood"; + } + + @Override + public String getShortName() { + return "extended-neighborhood"; + } + + /** + * Factory class. + * + * @author Erich Schubert + * + * @apiviz.stereotype factory + * @apiviz.has ExtendedNeighborhood oneway - - «produces» + */ + public static class Factory<O> extends AbstractPrecomputedNeighborhood.Factory<O> { + /** + * Logger + */ + private static final Logging logger = Logging.getLogger(ExtendedNeighborhood.class); + + /** + * Inner neighbor set predicate + */ + private NeighborSetPredicate.Factory<O> inner; + + /** + * Number of steps to do + */ + private int steps; + + /** + * Constructor. + * + * @param inner Inner neighbor set predicate + * @param steps Number of steps to do + */ + public Factory(NeighborSetPredicate.Factory<O> inner, int steps) { + super(); + this.inner = inner; + this.steps = steps; + } + + @Override + public NeighborSetPredicate instantiate(Relation<? extends O> database) { + DataStore<DBIDs> store = extendNeighborhood(database); + ExtendedNeighborhood neighborhood = new ExtendedNeighborhood(store); + ResultHierarchy hier = database.getHierarchy(); + if(hier != null) { + hier.add(database, neighborhood); + } + return neighborhood; + } + + @Override + public TypeInformation getInputTypeRestriction() { + return inner.getInputTypeRestriction(); + } + + /** + * Method to load the external neighbors. + */ + private DataStore<DBIDs> extendNeighborhood(Relation<? extends O> database) { + NeighborSetPredicate innerinst = inner.instantiate(database); + + final WritableDataStore<DBIDs> store = DataStoreUtil.makeStorage(database.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_TEMP, DBIDs.class); + + // Expand multiple steps + FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Expanding neighborhoods", database.size(), logger) : null; + for(final DBID id : database.iterDBIDs()) { + ModifiableDBIDs res = DBIDUtil.newHashSet(id); + DBIDs todo = id; + for(int i = 0; i < steps; i++) { + ModifiableDBIDs ntodo = DBIDUtil.newHashSet(); + for(final DBID oid : todo) { + DBIDs add = innerinst.getNeighborDBIDs(oid); + if(add != null) { + for (DBID nid: add) { + if (res.contains(add)) { + continue; + } + ntodo.add(nid); + res.add(nid); + } + } + } + if (ntodo.size() == 0) { + continue; + } + todo = ntodo; + } + store.put(id, res); + if(progress != null) { + progress.incrementProcessed(logger); + } + } + if(progress != null) { + progress.ensureCompleted(logger); + } + + return store; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer<O> extends AbstractParameterizer { + /** + * Parameter to specify the neighborhood predicate to use. + */ + public static final OptionID NEIGHBORHOOD_ID = OptionID.getOrCreateOptionID("extendedneighbors.neighborhood", "The inner neighborhood predicate to use."); + + /** + * Parameter to specify the number of steps allowed + */ + public static final OptionID STEPS_ID = OptionID.getOrCreateOptionID("extendedneighbors.steps", "The number of steps allowed in the neighborhood graph."); + + /** + * The number of steps to do. + */ + private int steps; + + /** + * Inner neighbor set predicate + */ + private NeighborSetPredicate.Factory<O> inner; + + /** + * Inner neighborhood parameter. + * + * @param config Parameterization + * @return Inner neighborhood. + */ + protected static <O> NeighborSetPredicate.Factory<O> getParameterInnerNeighborhood(Parameterization config) { + final ObjectParameter<NeighborSetPredicate.Factory<O>> param = new ObjectParameter<NeighborSetPredicate.Factory<O>>(NEIGHBORHOOD_ID, NeighborSetPredicate.Factory.class); + if(config.grab(param)) { + return param.instantiateClass(config); + } + return null; + } + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + inner = getParameterInnerNeighborhood(config); + steps = getParameterSteps(config); + } + + /** + * Get the number of steps to do in the neighborhood graph. + * + * @param config Parameterization + * @return number of steps, default 1 + */ + public static int getParameterSteps(Parameterization config) { + final IntParameter param = new IntParameter(STEPS_ID, new GreaterEqualConstraint(1)); + if(config.grab(param)) { + return param.getValue(); + } + return 1; + } + + @Override + protected ExtendedNeighborhood.Factory<O> makeInstance() { + return new ExtendedNeighborhood.Factory<O>(inner, steps); + } + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java new file mode 100644 index 00000000..f70c7471 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java @@ -0,0 +1,260 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.HashMap; +import java.util.Map; + +import de.lmu.ifi.dbs.elki.data.ExternalID; +import de.lmu.ifi.dbs.elki.data.LabelList; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.datastore.DataStore; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; +import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.result.Result; +import de.lmu.ifi.dbs.elki.result.ResultHierarchy; +import de.lmu.ifi.dbs.elki.utilities.FileUtil; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; + +/** + * A precomputed neighborhood, loaded from an external file. + * + * @author Erich Schubert + */ +public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood implements Result { + /** + * Logger + */ + private static final Logging logger = Logging.getLogger(ExternalNeighborhood.class); + + /** + * Parameter to specify the neighborhood file + */ + public static final OptionID NEIGHBORHOOD_FILE_ID = OptionID.getOrCreateOptionID("externalneighbors.file", "The file listing the neighbors."); + + /** + * Constructor. + * + * @param store Store to access + */ + public ExternalNeighborhood(DataStore<DBIDs> store) { + super(store); + } + + @Override + public String getLongName() { + return "External Neighborhood"; + } + + @Override + public String getShortName() { + return "external-neighborhood"; + } + + @Override + protected Logging getLogger() { + return logger; + } + + /** + * Factory class. + * + * @author Erich Schubert + * + * @apiviz.stereotype factory + * @apiviz.has ExternalNeighborhood oneway - - «produces» + */ + public static class Factory extends AbstractPrecomputedNeighborhood.Factory<Object> { + /** + * Logger + */ + private static final Logging logger = Logging.getLogger(ExternalNeighborhood.class); + + /** + * The input file. + */ + private File file; + + /** + * Constructor. + * + * @param file File to load + */ + public Factory(File file) { + super(); + this.file = file; + } + + @Override + public NeighborSetPredicate instantiate(Relation<?> database) { + DataStore<DBIDs> store = loadNeighbors(database); + ExternalNeighborhood neighborhood = new ExternalNeighborhood(store); + ResultHierarchy hier = database.getHierarchy(); + if(hier != null) { + hier.add(database, neighborhood); + } + return neighborhood; + } + + @Override + public TypeInformation getInputTypeRestriction() { + return TypeUtil.ANY; + } + + /** + * Method to load the external neighbors. + */ + private DataStore<DBIDs> loadNeighbors(Relation<?> database) { + final WritableDataStore<DBIDs> store = DataStoreUtil.makeStorage(database.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_TEMP, DBIDs.class); + + if(logger.isVerbose()) { + logger.verbose("Loading external neighborhoods."); + } + + if(logger.isDebugging()) { + logger.verbose("Building reverse label index..."); + } + // Build a map label/ExternalId -> DBID + // (i.e. a reverse index!) + // TODO: move this into the database layer to share? + Map<String, DBID> lblmap = new HashMap<String, DBID>(database.size() * 2); + { + Relation<LabelList> olq = database.getDatabase().getRelation(TypeUtil.LABELLIST); + Relation<ExternalID> eidq = database.getDatabase().getRelation(TypeUtil.EXTERNALID); + for(DBID id : database.iterDBIDs()) { + if(eidq != null) { + ExternalID eid = eidq.get(id); + if(eid != null) { + lblmap.put(eid.toString(), id); + } + } + if(olq != null) { + LabelList label = olq.get(id); + if(label != null) { + for(String lbl : label) { + lblmap.put(lbl, id); + } + } + } + } + } + + try { + if(logger.isDebugging()) { + logger.verbose("Loading neighborhood file."); + } + InputStream in = new FileInputStream(file); + in = FileUtil.tryGzipInput(in); + BufferedReader br = new BufferedReader(new InputStreamReader(in)); + for(String line; (line = br.readLine()) != null;) { + ArrayModifiableDBIDs neighbours = DBIDUtil.newArray(); + String[] entries = line.split(" "); + DBID id = lblmap.get(entries[0]); + if(id != null) { + for(int i = 0; i < entries.length; i++) { + final DBID neigh = lblmap.get(entries[i]); + if(neigh != null) { + neighbours.add(neigh); + } + else { + if(logger.isDebugging()) { + logger.debug("No object found for label " + entries[i]); + } + } + } + store.put(id, neighbours); + } + else { + if(logger.isDebugging()) { + logger.warning("No object found for label " + entries[0]); + } + } + } + br.close(); + in.close(); + + return store; + } + catch(IOException e) { + throw new AbortException("Loading of external neighborhood failed.", e); + } + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * The input file. + */ + File file; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + file = getParameterNeighborhoodFile(config); + } + + /** + * Get the neighborhood parameter. + * + * @param config Parameterization + * @return Instance or null + */ + protected static File getParameterNeighborhoodFile(Parameterization config) { + final FileParameter param = new FileParameter(NEIGHBORHOOD_FILE_ID, FileParameter.FileType.INPUT_FILE); + if(config.grab(param)) { + return param.getValue(); + } + return null; + } + + @Override + protected ExternalNeighborhood.Factory makeInstance() { + return new ExternalNeighborhood.Factory(file); + } + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java new file mode 100644 index 00000000..3d565767 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java @@ -0,0 +1,72 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; + +/** + * Predicate to obtain the neighbors of a reference object as set. + * + * @author Erich Schubert + */ +public interface NeighborSetPredicate { + /** + * Get the neighbors of a reference object for DBSCAN. + * + * @param reference Reference object + * @return Neighborhood + */ + public DBIDs getNeighborDBIDs(DBID reference); + + /** + * Factory interface to produce instances. + * + * @author Erich Schubert + * + * @apiviz.stereotype factory + * @apiviz.has NeighborSetPredicate + * + * @param <O> Input relation object type restriction + */ + public static interface Factory<O> extends Parameterizable { + /** + * Instantiation method. + * + * @param relation Relation to instantiate for. + * + * @return instance + */ + public NeighborSetPredicate instantiate(Relation<? extends O> relation); + + /** + * Get the input type information + * + * @return input type + */ + public TypeInformation getInputTypeRestriction(); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java new file mode 100644 index 00000000..d820a318 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java @@ -0,0 +1,190 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
+/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Neighborhoods based on k nearest neighbors.
+ *
+ * @author Ahmed Hettab
+ *
+ * @param <D> Distance to use
+ */
+public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> extends AbstractPrecomputedNeighborhood implements Result {
+ /**
+ * Logger
+ */
+ private static final Logging logger = Logging.getLogger(PrecomputedKNearestNeighborNeighborhood.class);
+
+ /**
+ * Constructor.
+ *
+ * @param store Store to access
+ */
+ public PrecomputedKNearestNeighborNeighborhood(DataStore<DBIDs> store) {
+ super(store);
+ }
+
+ @Override
+ public String getLongName() {
+ return "K Nearest Neighbors Neighborhood";
+ }
+
+ @Override
+ public String getShortName() {
+ return "knn-neighborhood";
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return logger;
+ }
+
+ /**
+ * Factory class to instantiate for a particular relation.
+ *
+ * @author Ahmed Hettab
+ *
+ * @apiviz.stereotype factory
+ * @apiviz.has PrecomputedKNearestNeighborNeighborhood
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+ public static class Factory<O, D extends Distance<D>> implements NeighborSetPredicate.Factory<O> {
+ /**
+ * parameter k
+ */
+ private int k;
+
+ /**
+ * distance function to use
+ */
+ private DistanceFunction<? super O, D> distFunc;
+
+ /**
+ * Factory Constructor
+ */
+ public Factory(int k, DistanceFunction<? super O, D> distFunc) {
+ super();
+ this.k = k;
+ this.distFunc = distFunc;
+ }
+
+ @Override
+ public NeighborSetPredicate instantiate(Relation<? extends O> relation) {
+ KNNQuery<?, D> knnQuery = QueryUtil.getKNNQuery(relation, distFunc);
+
+ // TODO: use bulk?
+ WritableDataStore<DBIDs> s = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, DBIDs.class);
+ for(DBID id : relation.iterDBIDs()) {
+ List<DistanceResultPair<D>> neighbors = knnQuery.getKNNForDBID(id, k);
+ ArrayModifiableDBIDs neighbours = DBIDUtil.newArray(neighbors.size());
+ for(DistanceResultPair<D> dpair : neighbors) {
+ neighbours.add(dpair.getDBID());
+ }
+ s.put(id, neighbours);
+ }
+ return new PrecomputedKNearestNeighborNeighborhood<D>(s);
+ }
+
+ @Override
+ public TypeInformation getInputTypeRestriction() {
+ return distFunc.getInputTypeRestriction();
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+ public static class Parameterizer<O, D extends Distance<D>> extends AbstractParameterizer {
+ /**
+ * Parameter k
+ */
+ public static final OptionID K_ID = OptionID.getOrCreateOptionID("neighborhood.k", "the number of neighbors");
+
+ /**
+ * Parameter to specify the distance function to use
+ */
+ public static final OptionID DISTANCEFUNCTION_ID = OptionID.getOrCreateOptionID("neighborhood.distancefunction", "the distance function to use");
+
+ /**
+ * Parameter k
+ */
+ int k;
+
+ /**
+ * Distance function
+ */
+ DistanceFunction<? super O, D> distFunc;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final IntParameter kP = new IntParameter(K_ID);
+ if(config.grab(kP)) {
+ k = kP.getValue();
+ }
+ final ObjectParameter<DistanceFunction<? super O, D>> distP = new ObjectParameter<DistanceFunction<? super O, D>>(DISTANCEFUNCTION_ID, DistanceFunction.class);
+ if(config.grab(distP)) {
+ distFunc = distP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected PrecomputedKNearestNeighborNeighborhood.Factory<O, D> makeInstance() {
+ return new PrecomputedKNearestNeighborNeighborhood.Factory<O, D>(k, distFunc);
+ }
+ }
+ }
+}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java new file mode 100644 index 00000000..eb490642 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Spatial outlier neighborhood classes</p> + */ +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java new file mode 100644 index 00000000..1e1debdf --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java @@ -0,0 +1,229 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; + +/** + * Neighborhood obtained by computing the k-fold closure of an existing + * neighborhood. Objects are weighted linearly by their distance: the object + * itself has a weight of 1 and this decreases linearly to 1/(n+1) for the + * nth-step neighbors. + * + * TODO: make actual weighting parameterizable? + * + * @author Erich Schubert + */ +public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPredicate { + /** + * The data store to use + */ + private NeighborSetPredicate inner; + + /** + * The number of steps to extend to. + */ + private int steps; + + /** + * Constructor. + * + * @param inner Inner neighborhood + * @param steps Number of steps to expand + */ + public LinearWeightedExtendedNeighborhood(NeighborSetPredicate inner, int steps) { + super(); + this.inner = inner; + this.steps = steps; + } + + /** + * Compute the weight from the number of steps needed. + * + * @param tsteps steps to target + * @return weight + */ + private double computeWeight(int tsteps) { + return 1.0 - (tsteps / (float) (steps + 1)); + } + + @Override + public Collection<DoubleObjPair<DBID>> getWeightedNeighbors(DBID reference) { + ModifiableDBIDs seen = DBIDUtil.newHashSet(); + List<DoubleObjPair<DBID>> result = new ArrayList<DoubleObjPair<DBID>>(); + + // Add starting object + result.add(new DoubleObjPair<DBID>(computeWeight(0), reference)); + seen.add(reference); + // Extend. + DBIDs cur = reference; + for(int i = 1; i <= steps; i++) { + final double weight = computeWeight(i); + // Collect newly discovered IDs + ModifiableDBIDs add = DBIDUtil.newHashSet(); + for(DBID id : cur) { + for(DBID nid : inner.getNeighborDBIDs(id)) { + // Seen before? + if(seen.contains(nid)) { + continue; + } + add.add(nid); + result.add(new DoubleObjPair<DBID>(weight, nid)); + } + } + if(add.size() == 0) { + break; + } + cur = add; + } + return result; + } + + /** + * Factory class. + * + * @author Erich Schubert + * + * @apiviz.stereotype factory + * @apiviz.has LinearWeightedExtendedNeighborhood oneway - - «produces» + */ + public static class Factory<O> implements WeightedNeighborSetPredicate.Factory<O> { + /** + * Inner neighbor set predicate + */ + private NeighborSetPredicate.Factory<O> inner; + + /** + * Number of steps to do + */ + private int steps; + + /** + * Constructor. + * + * @param inner Inner neighbor set predicate + * @param steps Number of steps to do + */ + public Factory(NeighborSetPredicate.Factory<O> inner, int steps) { + super(); + this.inner = inner; + this.steps = steps; + } + + @Override + public LinearWeightedExtendedNeighborhood instantiate(Relation<? extends O> database) { + return new LinearWeightedExtendedNeighborhood(inner.instantiate(database), steps); + } + + @Override + public TypeInformation getInputTypeRestriction() { + return inner.getInputTypeRestriction(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer<O> extends AbstractParameterizer { + /** + * Parameter to specify the neighborhood predicate to use. + */ + public static final OptionID NEIGHBORHOOD_ID = OptionID.getOrCreateOptionID("extendedneighbors.neighborhood", "The inner neighborhood predicate to use."); + + /** + * Parameter to specify the number of steps allowed + */ + public static final OptionID STEPS_ID = OptionID.getOrCreateOptionID("extendedneighbors.steps", "The number of steps allowed in the neighborhood graph."); + + /** + * The number of steps to do. + */ + private int steps; + + /** + * Inner neighbor set predicate + */ + private NeighborSetPredicate.Factory<O> inner; + + /** + * Inner neighborhood parameter. + * + * @param config Parameterization + * @return Inner neighborhood. + */ + protected static <O> NeighborSetPredicate.Factory<O> getParameterInnerNeighborhood(Parameterization config) { + final ObjectParameter<NeighborSetPredicate.Factory<O>> param = new ObjectParameter<NeighborSetPredicate.Factory<O>>(NEIGHBORHOOD_ID, NeighborSetPredicate.Factory.class); + if(config.grab(param)) { + return param.instantiateClass(config); + } + return null; + } + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + inner = getParameterInnerNeighborhood(config); + steps = getParameterSteps(config); + } + + /** + * Get the number of steps to do in the neighborhood graph. + * + * @param config Parameterization + * @return number of steps, default 1 + */ + public static int getParameterSteps(Parameterization config) { + final IntParameter param = new IntParameter(STEPS_ID, new GreaterEqualConstraint(1)); + if(config.grab(param)) { + return param.getValue(); + } + return 1; + } + + @Override + protected LinearWeightedExtendedNeighborhood.Factory<O> makeInstance() { + return new LinearWeightedExtendedNeighborhood.Factory<O>(inner, steps); + } + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java new file mode 100644 index 00000000..98daac11 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java @@ -0,0 +1,142 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.ArrayList; +import java.util.Collection; + +import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; + +/** + * Adapter to use unweighted neighborhoods in an algorithm that requires + * weighted neighborhoods. + * + * @author Erich Schubert + */ +public class UnweightedNeighborhoodAdapter implements WeightedNeighborSetPredicate { + /** + * Actual predicate + */ + NeighborSetPredicate inner; + + /** + * Constructor. + * + * @param inner Actual neighborhood + */ + public UnweightedNeighborhoodAdapter(NeighborSetPredicate inner) { + super(); + this.inner = inner; + } + + @Override + public Collection<DoubleObjPair<DBID>> getWeightedNeighbors(DBID reference) { + DBIDs neighbors = inner.getNeighborDBIDs(reference); + ArrayList<DoubleObjPair<DBID>> adapted = new ArrayList<DoubleObjPair<DBID>>(neighbors.size()); + for(DBID id : neighbors) { + adapted.add(new DoubleObjPair<DBID>(1.0, id)); + } + return adapted; + } + + /** + * Factory class + * + * @author Erich Schubert + * + * @apiviz.stereotype factory + * @apiviz.has UnweightedNeighborhoodAdapter + * + * @param <O> Input object type + */ + public static class Factory<O> implements WeightedNeighborSetPredicate.Factory<O> { + /** + * The inner predicate factory + */ + NeighborSetPredicate.Factory<O> inner; + + /** + * Constructor. + * + * @param inner Actual (unweighted) predicate + */ + public Factory(NeighborSetPredicate.Factory<O> inner) { + super(); + this.inner = inner; + } + + @Override + public UnweightedNeighborhoodAdapter instantiate(Relation<? extends O> relation) { + return new UnweightedNeighborhoodAdapter(inner.instantiate(relation)); + } + + @Override + public TypeInformation getInputTypeRestriction() { + return inner.getInputTypeRestriction(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> Input object type + */ + public static class Parameterizer<O> extends AbstractParameterizer { + /** + * The parameter to give the non-weighted neighborhood to use. + */ + public static final OptionID INNER_ID = OptionID.getOrCreateOptionID("neighborhood.inner", "Parameter for the non-weighted neighborhood to use."); + + /** + * The actual predicate. + */ + NeighborSetPredicate.Factory<O> inner; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + ObjectParameter<NeighborSetPredicate.Factory<O>> innerP = new ObjectParameter<NeighborSetPredicate.Factory<O>>(INNER_ID, NeighborSetPredicate.Factory.class); + if(config.grab(innerP)) { + inner = innerP.instantiateClass(config); + } + } + + @Override + protected UnweightedNeighborhoodAdapter.Factory<O> makeInstance() { + return new UnweightedNeighborhoodAdapter.Factory<O>(inner); + } + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java new file mode 100644 index 00000000..a063185a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java @@ -0,0 +1,74 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.Collection; + +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; + +/** + * Neighbor predicate with weight support. + * + * @author Erich Schubert + */ +public interface WeightedNeighborSetPredicate { + /** + * Get the neighbors of a reference object for DBSCAN. + * + * @param reference Reference object + * @return Weighted Neighborhood + */ + public Collection<DoubleObjPair<DBID>> getWeightedNeighbors(DBID reference); + + /** + * Factory interface to produce instances. + * + * @author Erich Schubert + * + * @apiviz.stereotype factory + * @apiviz.has WeightedNeighborSetPredicate + * + * @param <O> Input relation object type restriction + */ + public static interface Factory<O> extends Parameterizable { + /** + * Instantiation method. + * + * @param relation Relation to instantiate for. + * + * @return instance + */ + public WeightedNeighborSetPredicate instantiate(Relation<? extends O> relation); + + /** + * Get the input type information + * + * @return input type + */ + public TypeInformation getInputTypeRestriction(); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java new file mode 100644 index 00000000..ff82dbee --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Weighted Neighborhood definitions.</p> + */ +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted;
\ No newline at end of file |