From 5bb7578c91807292b6046c1168bd9531f45fa342 Mon Sep 17 00:00:00 2001 From: Clifford Wolf Date: Sat, 2 Mar 2013 17:44:17 +0100 Subject: More fun with subcircuit mining --- libs/subcircuit/README | 15 +++++++----- libs/subcircuit/subcircuit.cc | 56 +++++++++++++++++++++++++++++-------------- 2 files changed, 47 insertions(+), 24 deletions(-) diff --git a/libs/subcircuit/README b/libs/subcircuit/README index a2467f6a..f304d2a5 100644 --- a/libs/subcircuit/README +++ b/libs/subcircuit/README @@ -333,16 +333,19 @@ at most maxNodes nodes that occurs at least minMatches times: std::vector results; mySolver.mine(results, minNodes, maxNodes, minMatches); -The mine() method has an optional fifth parameter that limits the number -of matches counted in one graph. This can be useful when mining for circuits -that are found in at least a number of graphs. E.g. the following call -would find all subcircuits with 5 nodes that are found in at least 7 of -the registered graphs: +The mine() method has an optional fifth parameter that limits the number of +matches counted in one graph. This can be useful when mining for circuits that +are found in at least a number of graphs. E.g. the following call would find +all subcircuits with 5 nodes that are found in at least 7 of the registered +graphs: mySolver.mine(results, 5, 5, 7, 1); Note that this miner is not very efficient and therefore its use is not -recommended for large circuits. +recommended for large circuits. Also note that the miner is working under the +assumption that subgraph isomorphism is bidirectional. This is not the case in +circuits with gates with shorted pins. This can result in undetected frequent +subcircuits in some corner cases. Debugging diff --git a/libs/subcircuit/subcircuit.cc b/libs/subcircuit/subcircuit.cc index bfabd473..5a3d6132 100644 --- a/libs/subcircuit/subcircuit.cc +++ b/libs/subcircuit/subcircuit.cc @@ -1176,7 +1176,7 @@ class SubCircuit::SolverWorker verbose = backupVerbose; } - int testForMining(std::vector &results, std::set &usedSets, std::vector> &nextPool, NodeSet &testSet, + int testForMining(std::vector &results, std::set &usedSets, std::set &nextPool, NodeSet &testSet, const std::string &graphId, const Graph &graph, int minNodes, int minMatches, int limitMatchesPerGraph) { // printf("test: %s\n", testSet.to_string().c_str()); @@ -1205,10 +1205,21 @@ class SubCircuit::SolverWorker // printf("match: %s%s\n", resultSet.to_string().c_str(), usedSets.count(resultSet) > 0 ? " (dup)" : ""); +#if 0 if (usedSets.count(resultSet) > 0) { - // FIXME: assert(thisNodeSetSet.count(resultSet) > 0); + // Because of shorted pins isomorphisim is not always bidirectional! + // + // This means that the following assert is not true in all cases and subgraph A might + // show up in the matches for subgraph B but not vice versa... This also means that the + // order in which subgraphs are processed has an impact on the results set. + // + assert(thisNodeSetSet.count(resultSet) > 0); continue; } +#else + if (thisNodeSetSet.count(resultSet) > 0) + continue; +#endif usedSets.insert(resultSet); thisNodeSetSet.insert(resultSet); @@ -1236,16 +1247,18 @@ class SubCircuit::SolverWorker results.push_back(result); } - nextPool.push_back(thisNodeSetSet); + nextPool.insert(thisNodeSetSet.begin(), thisNodeSetSet.end()); return matches; } - void findNodePairs(std::vector &results, std::vector> &nodePairs, int minNodes, int minMatches, int limitMatchesPerGraph) + void findNodePairs(std::vector &results, std::set &nodePairs, int minNodes, int minMatches, int limitMatchesPerGraph) { + int groupCounter = 0; std::set usedPairs; + nodePairs.clear(); if (verbose) - printf("\nFind frequent node pairs:\n"); + printf("\nMining for frequent node pairs:\n"); for (auto &graph_it : graphData) for (int node1 = 0; node1 < int(graph_it.second.graph.nodes.size()); node1++) @@ -1263,25 +1276,28 @@ class SubCircuit::SolverWorker if (verbose) printf("Pair %s[%s,%s] -> %d%s\n", graphId.c_str(), graph.nodes[node1].nodeId.c_str(), - graph.nodes[node2].nodeId.c_str(), matches, matches < minMatches ? " *min*" : ""); + graph.nodes[node2].nodeId.c_str(), matches, matches < minMatches ? " *purge*" : ""); + + if (minMatches <= matches) + groupCounter++; } if (verbose) - printf("found %d.\n", int(nodePairs.size())); + printf("Found a total of %d subgraphs in %d groups.\n", int(nodePairs.size()), groupCounter); } - void findNextPool(std::vector &results, std::vector> &pool, + void findNextPool(std::vector &results, std::set &pool, int oldSetSize, int increment, int minNodes, int minMatches, int limitMatchesPerGraph) { - std::vector> nextPool; + int groupCounter = 0; std::map> poolPerGraph; + std::set nextPool; - for (auto &i1 : pool) - for (auto &i2 : i1) - poolPerGraph[i2.graphId].push_back(&i2); + for (auto &it : pool) + poolPerGraph[it.graphId].push_back(&it); if (verbose) - printf("\nFind frequent subcircuits of size %d using increment %d:\n", oldSetSize+increment, increment); + printf("\nMining for frequent subcircuits of size %d using increment %d:\n", oldSetSize+increment, increment); std::set usedSets; for (auto &it : poolPerGraph) @@ -1309,16 +1325,20 @@ class SubCircuit::SolverWorker printf("%s%s", first ? "" : ",", graph.nodes[nodeIdx].nodeId.c_str()); first = false; } - printf("] -> %d%s\n", matches, matches < minMatches ? " *min*" : ""); + printf("] -> %d%s\n", matches, matches < minMatches ? " *purge*" : ""); } + + if (minMatches <= matches) + groupCounter++; } - if (verbose) - printf("found %d.\n", int(nextPool.size())); pool.swap(nextPool); + + if (verbose) + printf("Found a total of %d subgraphs in %d groups.\n", int(pool.size()), groupCounter); } - // interface to the public Solver class + // interface to the public solver class protected: SolverWorker(Solver *userSolver) : userSolver(userSolver), verbose(false) @@ -1400,7 +1420,7 @@ protected: void mine(std::vector &results, int minNodes, int maxNodes, int minMatches, int limitMatchesPerGraph) { int nodeSetSize = 2; - std::vector> pool; + std::set pool; findNodePairs(results, pool, minNodes, minMatches, limitMatchesPerGraph); while ((maxNodes < 0 || nodeSetSize < maxNodes) && pool.size() > 0) -- cgit v1.2.3