#!/usr/bin/env python # # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # # # # Generate a report of each area each committer has touched over all time. # # $ svn log -v ^/ > svnlogdata # $ ./analyze-svnlogs.py < svnlogdata > report.txt # # NOTE: ./logdata.py is written with a cached version of the data extracted # from 'svnlogdata'. That data can be analyzed in many ways, beyond # what this script is reporting. # import sys import re RE_LOG_HEADER = re.compile('^(r[0-9]+) ' '\| ([^|]+) ' '\| ([^|]+) ' '\| ([0-9]+) line') RE_PATH = re.compile(r' [MARD] (.*?)( \(from .*\))?$') SEPARATOR = '-' * 72 def parse_one_commit(logfile): line = logfile.readline().strip() if line != SEPARATOR: raise ParseError('missing separator: %s' % line) line = logfile.readline() if not line: # end of file! return None, None m = RE_LOG_HEADER.match(line) if not m: raise ParseError('could not match log header') revision = m.group(1) author = m.group(2) num_lines = int(m.group(4)) paths = set() # skip "Changed paths:" line = logfile.readline().strip() if not line: # there were no paths. just a blank before the log message. continue on. sys.stderr.write('Funny revision: %s\n' % revision) else: if not line.startswith('Changed'): raise ParseError('log not run with -v. paths missing in %s' % revision) # gather all the affected paths while 1: line = logfile.readline().rstrip() if not line: # just hit end of the changed paths break m = RE_PATH.match(line) if not m: raise ParseError('bad path in %s: %s' % (revision, line)) paths.add(m.group(1)) # suck up the log message for i in range(num_lines): logfile.readline() return author, paths def parse_file(logfile): authors = { } while True: author, paths = parse_one_commit(logfile) if author is None: return authors if author in authors: authors[author] = authors[author].union(paths) else: authors[author] = paths def write_logdata(authors): out = open('logdata.py', 'w') out.write('authors = {\n') for author, paths in authors.items(): out.write(" '%s': set([\n" % author) for path in paths: out.write(' %s,\n' % repr(path)) out.write(' ]),\n') out.write('}\n') def get_key(sectionroots, path): key = None for section in sectionroots: if path.startswith(section): # add one path element below top section to the key. elmts = len(section.split('/')) + 1 # strip first element (always empty because path starts with '/') key = tuple(path.split('/', elmts)[1:elmts]) break if key == None: # strip first element (always empty because path starts with '/') key = tuple(path.split('/', 3)[1:3]) return key def print_report(authors, sectionroots=[ ]): for author, paths in sorted(authors.items()): topdirs = { } for path in paths: key = get_key(sectionroots, path) if key in topdirs: topdirs[key] += 1 else: topdirs[key] = 1 print(author) tags = [ ] branches = [ ] for topdir in sorted(topdirs): if len(topdir) == 1: assert topdirs[topdir] == 1 print(' %s (ROOT)' % topdir[0]) else: if topdir[0] == 'tags': if not topdir[1] in tags: tags.append(topdir[1]) elif topdir[0] == 'branches': if not topdir[1] in branches: branches.append(topdir[1]) else: print(' %s (%d items)' % ('/'.join(topdir), topdirs[topdir])) if tags: print(' TAGS: %s' % ', '.join(tags)) if branches: print(' BRANCHES: %s' % ', '.join(branches)) print('') def run(logfile): try: import logdata authors = logdata.authors except ImportError: authors = parse_file(logfile) write_logdata(authors) sectionroots = [ '/trunk/subversion/include/private', '/trunk/subversion/include', '/trunk/subversion/tests', '/trunk/subversion', '/trunk/tools', '/trunk/contrib', '/trunk/doc', ]; print_report(authors, sectionroots) class ParseError(Exception): pass if __name__ == '__main__': if len(sys.argv) > 1: logfile = open(sys.argv[1]) else: logfile = sys.stdin run(logfile)