summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/APRIORI.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/APRIORI.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/APRIORI.java619
1 files changed, 619 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/APRIORI.java b/src/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/APRIORI.java
new file mode 100644
index 00000000..31279b9b
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/APRIORI.java
@@ -0,0 +1,619 @@
+package de.lmu.ifi.dbs.elki.algorithm.itemsetmining;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import gnu.trove.iterator.TLongIntIterator;
+import gnu.trove.map.hash.TLongIntHashMap;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.data.BitVector;
+import de.lmu.ifi.dbs.elki.data.SparseFeatureVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.statistics.Duration;
+import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic;
+import de.lmu.ifi.dbs.elki.result.AprioriResult;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
+/**
+ * The APRIORI algorithm for Mining Association Rules.
+ *
+ * Reference:
+ * <p>
+ * R. Agrawal, R. Srikant:<br />
+ * Fast Algorithms for Mining Association Rules<br />
+ * In Proc. 20th Int. Conf. on Very Large Data Bases (VLDB '94), Santiago de
+ * Chile, Chile 1994.
+ * </p>
+ *
+ * This implementation uses some simple optimizations for 1- and 2-itemsets.
+ *
+ * Note: this algorithm scales well to a large number of transactions, but not
+ * so well to a large number of frequent itemsets (items). For best results, use
+ * domain-specific preprocessing to aggregate items into groups. Use statistics
+ * logging to keep track of candidate set sizes.
+ *
+ * @author Arthur Zimek
+ * @author Erich Schubert
+ *
+ * @apiviz.has Itemset
+ * @apiviz.uses BitVector
+ */
+@Title("APRIORI: Algorithm for Mining Association Rules")
+@Description("Searches for frequent itemsets")
+@Reference(authors = "R. Agrawal, R. Srikant", //
+title = "Fast Algorithms for Mining Association Rules", //
+booktitle = "Proc. 20th Int. Conf. on Very Large Data Bases (VLDB '94), Santiago de Chile, Chile 1994", //
+url = "http://www.vldb.org/conf/1994/P487.PDF")
+public class APRIORI extends AbstractAlgorithm<AprioriResult> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(APRIORI.class);
+
+ /**
+ * Statistics logging prefix.
+ */
+ private final String STAT = this.getClass().getName() + ".";
+
+ /**
+ * Minimum support. If less than 1, considered to be a relative frequency,
+ * otherwise an absolute count.
+ */
+ private double minfreq;
+
+ /**
+ * Constructor with minimum frequency.
+ *
+ * @param minfreq Minimum frequency
+ */
+ public APRIORI(double minfreq) {
+ super();
+ this.minfreq = minfreq;
+ }
+
+ /**
+ * Performs the APRIORI algorithm on the given database.
+ *
+ * @param relation the Relation to process
+ * @return the AprioriResult learned by this APRIORI
+ */
+ public AprioriResult run(Relation<BitVector> relation) {
+ DBIDs ids = relation.getDBIDs();
+ List<Itemset> solution = new ArrayList<>();
+ final int size = ids.size();
+ final int needed = (int) ((minfreq < 1.) ? Math.ceil(minfreq * size) : minfreq);
+
+ // TODO: we don't strictly require a vector field.
+ // We could work with knowing just the maximum dimensionality beforehand.
+ VectorFieldTypeInformation<BitVector> meta = RelationUtil.assumeVectorField(relation);
+ if(size > 0) {
+ final int dim = meta.getDimensionality();
+ Duration timeone = LOG.newDuration(STAT + "1-items.time").begin();
+ List<OneItemset> oneitems = buildFrequentOneItemsets(relation, dim, needed);
+ LOG.statistics(timeone.end());
+ if(LOG.isStatistics()) {
+ LOG.statistics(new LongStatistic(STAT + "1-items.frequent", oneitems.size()));
+ LOG.statistics(new LongStatistic(STAT + "1-items.transactions", ids.size()));
+ }
+ if(LOG.isDebuggingFine()) {
+ LOG.debugFine(debugDumpCandidates(new StringBuilder(), oneitems, meta));
+ }
+ solution.addAll(oneitems);
+ if(oneitems.size() >= 2) {
+ Duration timetwo = LOG.newDuration(STAT + "2-items.time").begin();
+ ArrayModifiableDBIDs survivors = DBIDUtil.newArray(ids.size());
+ List<? extends Itemset> candidates = buildFrequentTwoItemsets(oneitems, relation, dim, needed, ids, survivors);
+ ids = survivors; // Continue with reduced set of transactions.
+ LOG.statistics(timetwo.end());
+ if(LOG.isStatistics()) {
+ LOG.statistics(new LongStatistic(STAT + "2-items.frequent", candidates.size()));
+ LOG.statistics(new LongStatistic(STAT + "2-items.transactions", ids.size()));
+ }
+ if(LOG.isDebuggingFine()) {
+ LOG.debugFine(debugDumpCandidates(new StringBuilder(), candidates, meta));
+ }
+ solution.addAll(candidates);
+ for(int length = 3; candidates.size() >= length; length++) {
+ Duration timel = LOG.newDuration(STAT + length + "-items.time").begin();
+ // Join to get the new candidates
+ candidates = aprioriGenerate(candidates, length, dim);
+ if(LOG.isDebuggingFinest()) {
+ LOG.debugFinest(debugDumpCandidates(new StringBuilder().append("Before pruning: "), candidates, meta));
+ }
+ survivors = DBIDUtil.newArray(ids.size());
+ candidates = frequentItemsets(candidates, relation, needed, ids, survivors, length);
+ ids = survivors; // Continue with reduced set of transactions.
+ LOG.statistics(timel.end());
+ if(LOG.isStatistics()) {
+ LOG.statistics(new LongStatistic(STAT + length + "-items.frequent", candidates.size()));
+ LOG.statistics(new LongStatistic(STAT + length + "-items.transactions", ids.size()));
+ }
+ if(LOG.isDebuggingFine()) {
+ LOG.debugFine(debugDumpCandidates(new StringBuilder(), candidates, meta));
+ }
+ solution.addAll(candidates);
+ }
+ }
+ }
+ return new AprioriResult("APRIORI", "apriori", solution, meta);
+ }
+
+ /**
+ * Build the 1-itemsets.
+ *
+ * @param relation Data relation
+ * @param dim Maximum dimensionality
+ * @param needed Minimum support needed
+ * @return 1-itemsets
+ */
+ protected List<OneItemset> buildFrequentOneItemsets(final Relation<? extends SparseFeatureVector<?>> relation, final int dim, final int needed) {
+ // TODO: use TIntList and prefill appropriately to avoid knowing "dim"
+ // beforehand?
+ int[] counts = new int[dim];
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ SparseFeatureVector<?> bv = relation.get(iditer);
+ for(int it = bv.iter(); bv.iterValid(it); it = bv.iterAdvance(it)) {
+ counts[bv.iterDim(it)]++;
+ }
+ }
+ if(LOG.isStatistics()) {
+ LOG.statistics(new LongStatistic(STAT + "1-items.candidates", dim));
+ }
+ // Generate initial candidates of length 1.
+ List<OneItemset> frequent = new ArrayList<>(dim);
+ for(int i = 0; i < dim; i++) {
+ if(counts[i] >= needed) {
+ frequent.add(new OneItemset(i, counts[i]));
+ }
+ }
+ return frequent;
+ }
+
+ /**
+ * Build the 2-itemsets.
+ *
+ * @param oneitems Frequent 1-itemsets
+ * @param relation Data relation
+ * @param dim Maximum dimensionality
+ * @param needed Minimum support needed
+ * @param ids Objects to process
+ * @param survivors Output: objects that had at least two 1-frequent items.
+ * @return Frequent 2-itemsets
+ */
+ protected List<SparseItemset> buildFrequentTwoItemsets(List<OneItemset> oneitems, final Relation<BitVector> relation, final int dim, final int needed, DBIDs ids, ArrayModifiableDBIDs survivors) {
+ int f1 = 0;
+ long[] mask = BitsUtil.zero(dim);
+ for(OneItemset supported : oneitems) {
+ BitsUtil.setI(mask, supported.item);
+ f1++;
+ }
+ if(LOG.isStatistics()) {
+ LOG.statistics(new LongStatistic(STAT + "2-items.candidates", f1 * (long) (f1 - 1)));
+ }
+ // We quite aggressively size the map, assuming that almost each combination
+ // is present somewhere. If this won't fit into memory, we're likely running
+ // OOM somewhere later anyway!
+ TLongIntHashMap map = new TLongIntHashMap((f1 * (f1 - 1)) >>> 1);
+ final long[] scratch = BitsUtil.zero(dim);
+ for(DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
+ BitsUtil.setI(scratch, mask);
+ relation.get(iditer).andOnto(scratch);
+ int lives = 0;
+ for(int i = BitsUtil.nextSetBit(scratch, 0); i >= 0; i = BitsUtil.nextSetBit(scratch, i + 1)) {
+ for(int j = BitsUtil.nextSetBit(scratch, i + 1); j >= 0; j = BitsUtil.nextSetBit(scratch, j + 1)) {
+ long key = (((long) i) << 32) | j;
+ map.put(key, 1 + map.get(key));
+ ++lives;
+ }
+ }
+ if(lives > 2) {
+ survivors.add(iditer);
+ }
+ }
+ // Generate candidates of length 2.
+ List<SparseItemset> frequent = new ArrayList<>(f1 * (int) Math.sqrt(f1));
+ for(TLongIntIterator iter = map.iterator(); iter.hasNext();) {
+ iter.advance(); // Trove style iterator - advance first.
+ if(iter.value() >= needed) {
+ int ii = (int) (iter.key() >>> 32);
+ int ij = (int) (iter.key() & -1L);
+ frequent.add(new SparseItemset(new int[] { ii, ij }, iter.value()));
+ }
+ }
+ // The hashmap may produce them out of order.
+ Collections.sort(frequent);
+ if(LOG.isStatistics()) {
+ LOG.statistics(new LongStatistic(STAT + "2-items.frequent", frequent.size()));
+ }
+ return frequent;
+ }
+
+ /**
+ * Prunes a given set of candidates to keep only those BitSets where all
+ * subsets of bits flipping one bit are frequent already.
+ *
+ * @param supported Support map
+ * @param length Itemset length
+ * @param dim Dimensionality
+ * @return itemsets that cannot be pruned by apriori
+ */
+ protected List<Itemset> aprioriGenerate(List<? extends Itemset> supported, int length, int dim) {
+ if(supported.size() < length) {
+ return Collections.emptyList();
+ }
+ long joined = 0L;
+ final int ssize = supported.size();
+ List<Itemset> candidateList = new ArrayList<>();
+
+ Itemset ref = supported.get(0);
+ if(ref instanceof SparseItemset) {
+ // TODO: we currently never switch to DenseItemSet. This may however be
+ // beneficial when we have few dimensions and many candidates.
+ // E.g. when length > 32 and dim < 100. But this needs benchmarking!
+ // For length < 5 and dim > 3000, SparseItemset unsurprisingly was faster
+
+ // Scratch item to use for searching.
+ SparseItemset scratch = new SparseItemset(new int[length - 1]);
+
+ for(int i = 0; i < ssize; i++) {
+ SparseItemset ii = (SparseItemset) supported.get(i);
+ prefix: for(int j = i + 1; j < ssize; j++) {
+ SparseItemset ij = (SparseItemset) supported.get(j);
+ if(!ii.prefixTest(ij)) {
+ break prefix; // Prefix doesn't match
+ }
+ joined++;
+ // Test subsets (re-) using scratch object
+ System.arraycopy(ii.indices, 1, scratch.indices, 0, length - 2);
+ scratch.indices[length - 2] = ij.indices[length - 2];
+ for(int k = length - 3; k >= 0; k--) {
+ scratch.indices[k] = ii.indices[k + 1];
+ int pos = Collections.binarySearch(supported, scratch);
+ if(pos < 0) {
+ // Prefix was okay, but one other subset was not frequent
+ continue prefix;
+ }
+ }
+ int[] items = new int[length];
+ System.arraycopy(ii.indices, 0, items, 0, length - 1);
+ items[length - 1] = ij.indices[length - 2];
+ candidateList.add(new SparseItemset(items));
+ }
+ }
+ }
+ else if(ref instanceof DenseItemset) {
+ // Scratch item to use for searching.
+ DenseItemset scratch = new DenseItemset(BitsUtil.zero(dim), length - 1);
+
+ for(int i = 0; i < ssize; i++) {
+ DenseItemset ii = (DenseItemset) supported.get(i);
+ prefix: for(int j = i + 1; j < ssize; j++) {
+ DenseItemset ij = (DenseItemset) supported.get(j);
+ // Prefix test via "|i1 ^ i2| = 2"
+ System.arraycopy(ii.items, 0, scratch.items, 0, ii.items.length);
+ BitsUtil.xorI(scratch.items, ij.items);
+ if(BitsUtil.cardinality(scratch.items) != 2) {
+ break prefix; // No prefix match; since sorted, no more can follow!
+ }
+ ++joined;
+ // Ensure that the first difference is the last item in ii:
+ int first = BitsUtil.nextSetBit(scratch.items, 0);
+ if(BitsUtil.nextSetBit(ii.items, first + 1) > -1) {
+ break prefix; // Different overlap by chance?
+ }
+ BitsUtil.orI(scratch.items, ij.items);
+
+ // Test subsets.
+ for(int l = length, b = BitsUtil.nextSetBit(scratch.items, 0); l > 2; l--, b = BitsUtil.nextSetBit(scratch.items, b + 1)) {
+ BitsUtil.clearI(scratch.items, b);
+ int pos = Collections.binarySearch(supported, scratch);
+ if(pos < 0) {
+ continue prefix;
+ }
+ BitsUtil.setI(scratch.items, b);
+ }
+ candidateList.add(new DenseItemset(scratch.items.clone(), length));
+ }
+ }
+ }
+ else {
+ throw new AbortException("Unexpected itemset type " + ref.getClass());
+ }
+ if(LOG.isStatistics()) {
+ // Naive pairwise approach
+ LOG.statistics(new LongStatistic(STAT + length + "-items.pairwise", (ssize * ((long) ssize - 1))));
+ LOG.statistics(new LongStatistic(STAT + length + "-items.joined", joined));
+ LOG.statistics(new LongStatistic(STAT + length + "-items.candidates", candidateList.size()));
+ }
+ // Note: candidates should have been generated in strictly ascending order
+ // So we do not need to sort here.
+ return candidateList;
+ }
+
+ /**
+ * Returns the frequent BitSets out of the given BitSets with respect to the
+ * given database.
+ *
+ * @param candidates the candidates to be evaluated
+ * @param relation the database to evaluate the candidates on
+ * @param needed Minimum support needed
+ * @param ids Objects to process
+ * @param survivors Output: objects that had at least two 1-frequent items.
+ * @param length Itemset length
+ * @return Itemsets with sufficient support
+ */
+ protected List<? extends Itemset> frequentItemsets(List<? extends Itemset> candidates, Relation<BitVector> relation, int needed, DBIDs ids, ArrayModifiableDBIDs survivors, int length) {
+ if(candidates.size() < 1) {
+ return Collections.emptyList();
+ }
+ Itemset first = candidates.get(0);
+ // We have an optimized codepath for large and sparse itemsets.
+ // It probably pays off when #cands >> (avlen choose length) but we do not
+ // currently have the average number of items. These thresholds yield
+ // 2700, 6400, 12500, ... and thus will almost always be met until the
+ // number of frequent itemsets is about to break down to 0.
+ if(candidates.size() > length * length * length * 100 && first instanceof SparseItemset) {
+ // Assume that all itemsets are sparse itemsets!
+ @SuppressWarnings("unchecked")
+ List<SparseItemset> sparsecand = (List<SparseItemset>) candidates;
+ return frequentItemsetsSparse(sparsecand, relation, needed, ids, survivors, length);
+ }
+ for(DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
+ BitVector bv = relation.get(iditer);
+ // TODO: exploit that the candidate set it sorted?
+ int lives = 0;
+ for(Itemset candidate : candidates) {
+ if(candidate.containedIn(bv)) {
+ candidate.increaseSupport();
+ ++lives;
+ }
+ }
+ if(lives > length) {
+ survivors.add(iditer);
+ }
+ }
+ // Retain only those with minimum support:
+ List<Itemset> frequent = new ArrayList<>(candidates.size());
+ for(Iterator<? extends Itemset> iter = candidates.iterator(); iter.hasNext();) {
+ final Itemset candidate = iter.next();
+ if(candidate.getSupport() >= needed) {
+ frequent.add(candidate);
+ }
+ }
+ return frequent;
+ }
+
+ /**
+ * Returns the frequent BitSets out of the given BitSets with respect to the
+ * given database. Optimized implementation for SparseItemset.
+ *
+ * @param candidates the candidates to be evaluated
+ * @param relation the database to evaluate the candidates on
+ * @param needed Minimum support needed
+ * @param ids Objects to process
+ * @param survivors Output: objects that had at least two 1-frequent items.
+ * @param length Itemset length
+ * @return Itemsets with sufficient support
+ */
+ protected List<SparseItemset> frequentItemsetsSparse(List<SparseItemset> candidates, Relation<BitVector> relation, int needed, DBIDs ids, ArrayModifiableDBIDs survivors, int length) {
+ // Current search interval:
+ int begin = 0, end = candidates.size();
+ int[] scratchi = new int[length], iters = new int[length];
+ SparseItemset scratch = new SparseItemset(scratchi);
+ for(DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
+ BitVector bv = relation.get(iditer);
+ if(!initializeSearchItemset(bv, scratchi, iters)) {
+ continue;
+ }
+ int lives = 0;
+ while(begin < end) {
+ begin = binarySearch(candidates, scratch, begin, end);
+ if(begin > 0) {
+ candidates.get(begin).increaseSupport();
+ ++lives;
+ }
+ else {
+ begin = (-begin) - 1;
+ }
+ if(begin >= end || !nextSearchItemset(bv, scratchi, iters)) {
+ break;
+ }
+ }
+ for(Itemset candidate : candidates) {
+ if(candidate.containedIn(bv)) {
+ candidate.increaseSupport();
+ ++lives;
+ }
+ }
+ if(lives > length) {
+ survivors.add(iditer);
+ }
+ }
+ // Retain only those with minimum support:
+ List<SparseItemset> frequent = new ArrayList<>(candidates.size());
+ for(Iterator<SparseItemset> iter = candidates.iterator(); iter.hasNext();) {
+ final SparseItemset candidate = iter.next();
+ if(candidate.getSupport() >= needed) {
+ frequent.add(candidate);
+ }
+ }
+ return frequent;
+ }
+
+ /**
+ * Initialize the scratch itemset.
+ *
+ * @param bv Bit vector data source
+ * @param scratchi Scratch itemset
+ * @param iters Iterator array
+ * @return {@code true} if the itemset had minimum length
+ */
+ private boolean initializeSearchItemset(BitVector bv, int[] scratchi, int[] iters) {
+ for(int i = 0; i < scratchi.length; i++) {
+ iters[i] = (i == 0) ? bv.iter() : bv.iterAdvance(iters[i - 1]);
+ if(iters[i] < 0) {
+ return false;
+ }
+ scratchi[i] = bv.iterDim(iters[i]);
+ }
+ return true;
+ }
+
+ /**
+ * Advance scratch itemset to the next.
+ *
+ * @param bv Bit vector data source
+ * @param scratchi Scratch itemset
+ * @param iters Iterator array
+ * @return {@code true} if the itemset had minimum length
+ */
+ private boolean nextSearchItemset(BitVector bv, int[] scratchi, int[] iters) {
+ final int last = scratchi.length - 1;
+ for(int j = last; j >= 0; j--) {
+ int n = bv.iterAdvance(iters[j]);
+ if(n >= 0 && (j == last || n != iters[j + 1])) {
+ iters[j] = n;
+ scratchi[j] = bv.iterDim(n);
+ return true; // Success
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Binary-search for the next-larger element.
+ *
+ * @param candidates Candidates to search for
+ * @param scratch Scratch space
+ * @param begin Search interval begin
+ * @param end Search interval end
+ * @return Position of first equal-or-larger element
+ */
+ private int binarySearch(List<SparseItemset> candidates, SparseItemset scratch, int begin, int end) {
+ --end;
+ while(begin < end) {
+ final int mid = (begin + end) >>> 1;
+ SparseItemset midVal = candidates.get(mid);
+ int cmp = midVal.compareTo(scratch);
+
+ if(cmp < 0) {
+ begin = mid + 1;
+ }
+ else if(cmp > 0) {
+ end = mid - 1;
+ }
+ else {
+ return mid; // key found
+ }
+ }
+ return -(begin + 1); // key not found, return next
+ }
+
+ /**
+ * Debug method: output all itemsets.
+ *
+ * @param msg Output buffer
+ * @param candidates Itemsets to dump
+ * @param meta Metadata for item labels
+ * @return Output buffer
+ */
+ private StringBuilder debugDumpCandidates(StringBuilder msg, List<? extends Itemset> candidates, VectorFieldTypeInformation<BitVector> meta) {
+ msg.append(':');
+ for(Itemset itemset : candidates) {
+ msg.append(" [");
+ itemset.appendTo(msg, meta);
+ msg.append(']');
+ }
+ return msg;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.BIT_VECTOR_FIELD);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Parameter to specify the minimum support, in absolute or relative terms.
+ */
+ public static final OptionID MINSUPP_ID = new OptionID("apriori.minsupp", //
+ "Threshold for minimum support as minimally required number of transactions (if > 1) " //
+ + "or the minimum frequency (if <= 1).");
+
+ /**
+ * Parameter for minimum support.
+ */
+ protected double minsupp;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ DoubleParameter minsuppP = new DoubleParameter(MINSUPP_ID);
+ minsuppP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(minsuppP)) {
+ minsupp = minsuppP.getValue();
+ }
+ }
+
+ @Override
+ protected APRIORI makeInstance() {
+ return new APRIORI(minsupp);
+ }
+ }
+} \ No newline at end of file