summaryrefslogtreecommitdiff
path: root/tools/graph-difference.py
blob: 1e6e7516cc4c8a5dd34c60ab739089c4e27209c8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/env python3
# simple check if two build or source graphs are equal
#
# two graphs are equal if they contain the same set of nodes and edges

from __future__ import print_function
import sys
sys.path.append('/usr/share/botch')
from util import read_graph


def graph_difference(g, h, verbose=False):
    if g.number_of_edges() != h.number_of_edges():
        print("g has %d edges and h as %d edges" %
              (g.number_of_edges(), h.number_of_edges()))
        return False

    if g.number_of_nodes() != h.number_of_nodes():
        print("g has %d nodes and h as %d nodes" %
              (g.number_of_nodes(), h.number_of_nodes()))
        return False

    def normalize_node(attr):
        if not attr.get('kind'):
            raise Exception("need kind node attribute")
        # We delete the cudfversion attribute because vertices should be unique
        # by their name and version and not by the cudfversion which may have
        # been differently assigned
        if attr.get('version') is not None:
            del attr['cudfversion']
        if attr['kind'] == "SCC":
            if not attr.get('binaries'):
                raise Exception("nodes of kind SCC need the sources attribute")
            attr['sources'] = frozenset(
                [s for s in attr['sources'].split(',')])
        elif attr['kind'] == "InstSet":
            if not attr.get('binaries'):
                raise Exception(
                    "nodes of kind InstSet need the binaries attribute")
            attr['binaries'] = frozenset(
                [p for p in attr['binaries'].split(',')])
        elif attr['kind'] == "SrcPkg":
            pass
        else:
            raise Exception("unknown node type %s" % attr['kind'])
        return frozenset(attr.items())

    def normalize_edge(attr):
        # we delete the id attribute as it has no meaning. Edges are unique by
        # the vertices they connect because there can be no multi-edges
        del attr['id']
        # only buildgraphs have the kind attribute
        if attr.get('kind'):
            if attr['kind'] == "buildsfrom":
                if not attr.get('binaries'):
                    raise Exception(
                        "edges of kind buildsfrom need the binaries attribute")
                attr['binaries'] = frozenset(
                    [s for s in attr['binaries'].split(',')])
            elif attr['kind'] == "builddep":
                pass
            else:
                raise Exception("unknown edge type %s" % attr['kind'])
        else:
            # this is a srcgraph
            if attr.get('binaries'):
                attr['binaries'] = frozenset(
                    [s for s in attr['binaries'].split(',')])
            else:
                raise Exception(
                    "edge must have binaries attribute in srcgraph")
            if attr.get('strong'):
                attr['strong'] = frozenset(
                    [s for s in attr['strong'].split(',')])
            if attr.get('strong_direct'):
                attr['strong_direct'] = frozenset(
                    [s for s in attr['strong_direct'].split(',')])
        if attr.get("annotation"):
            attr['annotation'] = frozenset(
                [s for s in attr['annotation'].split(',')])
        return frozenset(attr.items())

    g_nodes = set([(n, normalize_node(attr))
                  for n, attr in g.nodes(data=True)])
    h_nodes = set([(n, normalize_node(attr))
                  for n, attr in h.nodes(data=True)])
    g_edges = set([(n1, n2, normalize_edge(attr))
                  for n1, n2, attr in g.edges(data=True)])
    h_edges = set([(n1, n2, normalize_edge(attr))
                  for n1, n2, attr in h.edges(data=True)])

    if h_nodes == g_nodes and g_edges == h_edges:
        return True

    print("the graphs disagree on either the node naming, their values or " +
          "the graph structure", file=sys.stderr)

    # if this test did not succeed, then maybe just the node names are
    # different but their content actually matches:

    g_node_vals = set([a for _, a in g_nodes])
    h_node_vals = set([a for _, a in h_nodes])

    if g_node_vals != h_node_vals:
        print("the set of node attributes of g and h differ")
        if g_node_vals - h_node_vals:
            print("nodes in g but not in h", g_node_vals - h_node_vals)
        if h_node_vals - g_node_vals:
            print("nodes in h but not in g", h_node_vals - g_node_vals)
        return False

    if (len(g_nodes) != len(g_node_vals) or len(h_nodes) != len(h_node_vals)):
        print("there exist duplicate node attributes - this should not " +
              "happen for either build or source graphs", file=sys.stderr)
        return False

    print("the graphs disagree on either the node naming or the graph " +
          "structure", file=sys.stderr)

    # we now know that the set of node attributes is equal and that there are
    # no duplicates
    # thus we can now create a mapping between node ids of the two graphs

    # this dictionary stores for each attribute its node id in h
    node_mapping_h = {attr: n for n, attr in h_nodes}

    # this dictionary stores for each node id in g the node id in h
    node_mapping = {n: node_mapping_h[attr] for n, attr in g_nodes}

    # now go over all edges in g and check if they are in h as well
    # also check whether the attributes are equal
    # we do not check the other way round because we know that the number of
    # edges in both graphs is equal
    for n1, n2, attr in g_edges:
        n3 = node_mapping[n1]
        n4 = node_mapping[n2]
        if not h.has_edge(n3, n4):
            print("edge in g but not in h", file=sys.stderr)
            print(g.adj[n1][n2])
            return False
        if attr != frozenset(h.adj[n3][n4].items()):
            print("edge attributes do not agree", file=sys.stderr)
            print(attr)
            print(h.adj[n3][n4])
            return False

    return True


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(
        description=("Check if two graphs are the same"))
    parser.add_argument(
        "g", type=read_graph, help="graph g in GraphML or dot format")
    parser.add_argument(
        "h", type=read_graph, help="graph h in GraphML or dot format")
    parser.add_argument(
        '-v', '--verbose', action='store_true', help='be verbose')
    args = parser.parse_args()
    # TODO: also support graphs that are neither buildgraph nor srcgraph
    ret = graph_difference(args.g, args.h, args.verbose)
    exit(not ret)