#!/usr/bin/env python3 # simple check if two build or source graphs are equal # # two graphs are equal if they contain the same set of nodes and edges from __future__ import print_function import sys sys.path.append('/usr/share/botch') from util import read_graph def graph_difference(g, h, verbose=False): if g.number_of_edges() != h.number_of_edges(): print("g has %d edges and h as %d edges" % (g.number_of_edges(), h.number_of_edges())) return False if g.number_of_nodes() != h.number_of_nodes(): print("g has %d nodes and h as %d nodes" % (g.number_of_nodes(), h.number_of_nodes())) return False def normalize_node(attr): if not attr.get('kind'): raise Exception("need kind node attribute") # We delete the cudfversion attribute because vertices should be unique # by their name and version and not by the cudfversion which may have # been differently assigned if attr.get('version') is not None: del attr['cudfversion'] if attr['kind'] == "SCC": if not attr.get('binaries'): raise Exception("nodes of kind SCC need the sources attribute") attr['sources'] = frozenset( [s for s in attr['sources'].split(',')]) elif attr['kind'] == "InstSet": if not attr.get('binaries'): raise Exception( "nodes of kind InstSet need the binaries attribute") attr['binaries'] = frozenset( [p for p in attr['binaries'].split(',')]) elif attr['kind'] == "SrcPkg": pass else: raise Exception("unknown node type %s" % attr['kind']) return frozenset(attr.items()) def normalize_edge(attr): # we delete the id attribute as it has no meaning. Edges are unique by # the vertices they connect because there can be no multi-edges del attr['id'] # only buildgraphs have the kind attribute if attr.get('kind'): if attr['kind'] == "buildsfrom": if not attr.get('binaries'): raise Exception( "edges of kind buildsfrom need the binaries attribute") attr['binaries'] = frozenset( [s for s in attr['binaries'].split(',')]) elif attr['kind'] == "builddep": pass else: raise Exception("unknown edge type %s" % attr['kind']) else: # this is a srcgraph if attr.get('binaries'): attr['binaries'] = frozenset( [s for s in attr['binaries'].split(',')]) else: raise Exception( "edge must have binaries attribute in srcgraph") if attr.get('strong'): attr['strong'] = frozenset( [s for s in attr['strong'].split(',')]) if attr.get('strong_direct'): attr['strong_direct'] = frozenset( [s for s in attr['strong_direct'].split(',')]) if attr.get("annotation"): attr['annotation'] = frozenset( [s for s in attr['annotation'].split(',')]) return frozenset(attr.items()) g_nodes = set([(n, normalize_node(attr)) for n, attr in g.nodes(data=True)]) h_nodes = set([(n, normalize_node(attr)) for n, attr in h.nodes(data=True)]) g_edges = set([(n1, n2, normalize_edge(attr)) for n1, n2, attr in g.edges(data=True)]) h_edges = set([(n1, n2, normalize_edge(attr)) for n1, n2, attr in h.edges(data=True)]) if h_nodes == g_nodes and g_edges == h_edges: return True print("the graphs disagree on either the node naming, their values or " + "the graph structure", file=sys.stderr) # if this test did not succeed, then maybe just the node names are # different but their content actually matches: g_node_vals = set([a for _, a in g_nodes]) h_node_vals = set([a for _, a in h_nodes]) if g_node_vals != h_node_vals: print("the set of node attributes of g and h differ") if g_node_vals - h_node_vals: print("nodes in g but not in h", g_node_vals - h_node_vals) if h_node_vals - g_node_vals: print("nodes in h but not in g", h_node_vals - g_node_vals) return False if (len(g_nodes) != len(g_node_vals) or len(h_nodes) != len(h_node_vals)): print("there exist duplicate node attributes - this should not " + "happen for either build or source graphs", file=sys.stderr) return False print("the graphs disagree on either the node naming or the graph " + "structure", file=sys.stderr) # we now know that the set of node attributes is equal and that there are # no duplicates # thus we can now create a mapping between node ids of the two graphs # this dictionary stores for each attribute its node id in h node_mapping_h = {attr: n for n, attr in h_nodes} # this dictionary stores for each node id in g the node id in h node_mapping = {n: node_mapping_h[attr] for n, attr in g_nodes} # now go over all edges in g and check if they are in h as well # also check whether the attributes are equal # we do not check the other way round because we know that the number of # edges in both graphs is equal for n1, n2, attr in g_edges: n3 = node_mapping[n1] n4 = node_mapping[n2] if not h.has_edge(n3, n4): print("edge in g but not in h", file=sys.stderr) print(g.adj[n1][n2]) return False if attr != frozenset(h.adj[n3][n4].items()): print("edge attributes do not agree", file=sys.stderr) print(attr) print(h.adj[n3][n4]) return False return True if __name__ == '__main__': import argparse parser = argparse.ArgumentParser( description=("Check if two graphs are the same")) parser.add_argument( "g", type=read_graph, help="graph g in GraphML or dot format") parser.add_argument( "h", type=read_graph, help="graph h in GraphML or dot format") parser.add_argument( '-v', '--verbose', action='store_true', help='be verbose') args = parser.parse_args() # TODO: also support graphs that are neither buildgraph nor srcgraph ret = graph_difference(args.g, args.h, args.verbose) exit(not ret)