diff options
author | James McCoy <jamessan@debian.org> | 2018-07-31 22:26:52 -0400 |
---|---|---|
committer | James McCoy <jamessan@debian.org> | 2018-07-31 22:26:52 -0400 |
commit | e20a507113ff1126aeb4a97b806390ea377fe292 (patch) | |
tree | 0260b3a40387d7f994fbadaf22f1e9d3c080b09f /tools/dev/trails.py | |
parent | c64debffb81d2fa17e9a72af7199ccf88b3cc556 (diff) |
New upstream version 1.10.2
Diffstat (limited to 'tools/dev/trails.py')
-rwxr-xr-x | tools/dev/trails.py | 229 |
1 files changed, 229 insertions, 0 deletions
diff --git a/tools/dev/trails.py b/tools/dev/trails.py new file mode 100755 index 0000000..917d234 --- /dev/null +++ b/tools/dev/trails.py @@ -0,0 +1,229 @@ +#!/usr/bin/env python +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +## See the usage() function for operating instructions. ## + +import re +try: + # Python >=2.6 + from functools import reduce +except ImportError: + # Python <2.6 + pass +import sys +import operator + +_re_trail = re.compile('\((?P<txn_body>[a-z_]*), (?P<filename>[a-z_\-./]*), (?P<lineno>[0-9]*), (?P<txn>0|1)\): (?P<ops>.*)') +_re_table_op = re.compile('\(([a-z]*), ([a-z]*)\)') + +_separator = '------------------------------------------------------------\n' + +def parse_trails_log(infile): + trails = [] + lineno = 0 + for line in infile.readlines(): + m = _re_trail.match(line) + + lineno = lineno + 1 + + if not m: + sys.stderr.write('Invalid input, line %u:\n%s\n' % (lineno, line)) + sys.exit(1) + + txn = int(m.group('txn')) + if not txn: + ### We're not interested in trails that don't use txns at this point. + continue + + txn_body = (m.group('txn_body'), m.group('filename'), + int(m.group('lineno'))) + trail = _re_table_op.findall(m.group('ops')) + trail.reverse() + + if not trail: + sys.stderr.write('Warning! Empty trail at line %u:\n%s' % (lineno, line)) + + trails.append((txn_body, trail)) + + return trails + + +def output_summary(trails, outfile): + ops = [] + for (txn_body, trail) in trails: + ops.append(len(trail)) + ops.sort() + + total_trails = len(ops) + total_ops = reduce(operator.add, ops) + max_ops = ops[-1] + median_ops = ops[total_trails / 2] + average_ops = float(total_ops) / total_trails + + outfile.write(_separator) + outfile.write('Summary\n') + outfile.write(_separator) + outfile.write('Total number of trails: %10i\n' % total_trails) + outfile.write('Total number of ops: %10i\n' % total_ops) + outfile.write('max ops/trail: %10i\n' % max_ops) + outfile.write('median ops/trail: %10i\n' % median_ops) + outfile.write('average ops/trail: %10.2f\n' % average_ops) + outfile.write('\n') + + +# custom compare function +def _freqtable_cmp(a_b, c_d): + (a, b) = a_b + (c, d) = c_d + c = cmp(d, b) + if not c: + c = cmp(a, c) + return c + +def list_frequencies(list): + """ + Given a list, return a list composed of (item, frequency) + in sorted order + """ + + counter = {} + for item in list: + counter[item] = counter.get(item, 0) + 1 + + frequencies = list(counter.items()) + frequencies.sort(_freqtable_cmp) + + return frequencies + + +def output_trail_length_frequencies(trails, outfile): + ops = [] + for (txn_body, trail) in trails: + ops.append(len(trail)) + + total_trails = len(ops) + frequencies = list_frequencies(ops) + + outfile.write(_separator) + outfile.write('Trail length frequencies\n') + outfile.write(_separator) + outfile.write('ops/trail frequency percentage\n') + for (r, f) in frequencies: + p = float(f) * 100 / total_trails + outfile.write('%4i %6i %5.2f\n' % (r, f, p)) + outfile.write('\n') + + +def output_trail(outfile, trail, column = 0): + ### Output the trail itself, in its own column + + if len(trail) == 0: + outfile.write('<empty>\n') + return + + line = str(trail[0]) + for op in trail[1:]: + op_str = str(op) + if len(line) + len(op_str) > 75 - column: + outfile.write('%s,\n' % line) + outfile.write(''.join(' ' * column)) + line = op_str + else: + line = line + ', ' + op_str + outfile.write('%s\n' % line) + + outfile.write('\n') + + +def output_trail_frequencies(trails, outfile): + + total_trails = len(trails) + + ttrails = [] + for (txn_body, trail) in trails: + ttrails.append((txn_body, tuple(trail))) + + frequencies = list_frequencies(ttrails) + + outfile.write(_separator) + outfile.write('Trail frequencies\n') + outfile.write(_separator) + outfile.write('frequency percentage ops/trail trail\n') + for (((txn_body, file, line), trail), f) in frequencies: + p = float(f) * 100 / total_trails + outfile.write('-- %s - %s:%u --\n' % (txn_body, file, line)) + outfile.write('%6i %5.2f %4i ' % (f, p, len(trail))) + output_trail(outfile, trail, 37) + + +def output_txn_body_frequencies(trails, outfile): + bodies = [] + for (txn_body, trail) in trails: + bodies.append(txn_body) + + total_trails = len(trails) + frequencies = list_frequencies(bodies) + + outfile.write(_separator) + outfile.write('txn_body frequencies\n') + outfile.write(_separator) + outfile.write('frequency percentage txn_body\n') + for ((txn_body, file, line), f) in frequencies: + p = float(f) * 100 / total_trails + outfile.write('%6i %5.2f %s - %s:%u\n' + % (f, p, txn_body, file, line)) + + +def usage(pgm): + w = sys.stderr.write + w("%s: a program for analyzing Subversion trail usage statistics.\n" % pgm) + w("\n") + w("Usage:\n") + w("\n") + w(" Compile Subversion with -DSVN_FS__TRAIL_DEBUG, which will cause it\n") + w(" it to print trail statistics to stderr. Save the stats to a file,\n") + w(" invoke %s on the file, and ponder the output.\n" % pgm) + w("\n") + + +if __name__ == '__main__': + if len(sys.argv) > 2: + sys.stderr.write("Error: too many arguments\n\n") + usage(sys.argv[0]) + sys.exit(1) + + if len(sys.argv) == 1: + infile = sys.stdin + else: + try: + infile = open(sys.argv[1]) + except (IOError): + sys.stderr.write("Error: unable to open '%s'\n\n" % sys.argv[1]) + usage(sys.argv[0]) + sys.exit(1) + + trails = parse_trails_log(infile) + + output_summary(trails, sys.stdout) + output_trail_length_frequencies(trails, sys.stdout) + output_trail_frequencies(trails, sys.stdout) + output_txn_body_frequencies(trails, sys.stdout) |