summaryrefslogtreecommitdiff
path: root/tools/dev/trails.py
diff options
context:
space:
mode:
authorJames McCoy <jamessan@debian.org>2018-07-31 22:26:52 -0400
committerJames McCoy <jamessan@debian.org>2018-07-31 22:26:52 -0400
commite20a507113ff1126aeb4a97b806390ea377fe292 (patch)
tree0260b3a40387d7f994fbadaf22f1e9d3c080b09f /tools/dev/trails.py
parentc64debffb81d2fa17e9a72af7199ccf88b3cc556 (diff)
New upstream version 1.10.2
Diffstat (limited to 'tools/dev/trails.py')
-rwxr-xr-xtools/dev/trails.py229
1 files changed, 229 insertions, 0 deletions
diff --git a/tools/dev/trails.py b/tools/dev/trails.py
new file mode 100755
index 0000000..917d234
--- /dev/null
+++ b/tools/dev/trails.py
@@ -0,0 +1,229 @@
+#!/usr/bin/env python
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+
+## See the usage() function for operating instructions. ##
+
+import re
+try:
+ # Python >=2.6
+ from functools import reduce
+except ImportError:
+ # Python <2.6
+ pass
+import sys
+import operator
+
+_re_trail = re.compile('\((?P<txn_body>[a-z_]*), (?P<filename>[a-z_\-./]*), (?P<lineno>[0-9]*), (?P<txn>0|1)\): (?P<ops>.*)')
+_re_table_op = re.compile('\(([a-z]*), ([a-z]*)\)')
+
+_separator = '------------------------------------------------------------\n'
+
+def parse_trails_log(infile):
+ trails = []
+ lineno = 0
+ for line in infile.readlines():
+ m = _re_trail.match(line)
+
+ lineno = lineno + 1
+
+ if not m:
+ sys.stderr.write('Invalid input, line %u:\n%s\n' % (lineno, line))
+ sys.exit(1)
+
+ txn = int(m.group('txn'))
+ if not txn:
+ ### We're not interested in trails that don't use txns at this point.
+ continue
+
+ txn_body = (m.group('txn_body'), m.group('filename'),
+ int(m.group('lineno')))
+ trail = _re_table_op.findall(m.group('ops'))
+ trail.reverse()
+
+ if not trail:
+ sys.stderr.write('Warning! Empty trail at line %u:\n%s' % (lineno, line))
+
+ trails.append((txn_body, trail))
+
+ return trails
+
+
+def output_summary(trails, outfile):
+ ops = []
+ for (txn_body, trail) in trails:
+ ops.append(len(trail))
+ ops.sort()
+
+ total_trails = len(ops)
+ total_ops = reduce(operator.add, ops)
+ max_ops = ops[-1]
+ median_ops = ops[total_trails / 2]
+ average_ops = float(total_ops) / total_trails
+
+ outfile.write(_separator)
+ outfile.write('Summary\n')
+ outfile.write(_separator)
+ outfile.write('Total number of trails: %10i\n' % total_trails)
+ outfile.write('Total number of ops: %10i\n' % total_ops)
+ outfile.write('max ops/trail: %10i\n' % max_ops)
+ outfile.write('median ops/trail: %10i\n' % median_ops)
+ outfile.write('average ops/trail: %10.2f\n' % average_ops)
+ outfile.write('\n')
+
+
+# custom compare function
+def _freqtable_cmp(a_b, c_d):
+ (a, b) = a_b
+ (c, d) = c_d
+ c = cmp(d, b)
+ if not c:
+ c = cmp(a, c)
+ return c
+
+def list_frequencies(list):
+ """
+ Given a list, return a list composed of (item, frequency)
+ in sorted order
+ """
+
+ counter = {}
+ for item in list:
+ counter[item] = counter.get(item, 0) + 1
+
+ frequencies = list(counter.items())
+ frequencies.sort(_freqtable_cmp)
+
+ return frequencies
+
+
+def output_trail_length_frequencies(trails, outfile):
+ ops = []
+ for (txn_body, trail) in trails:
+ ops.append(len(trail))
+
+ total_trails = len(ops)
+ frequencies = list_frequencies(ops)
+
+ outfile.write(_separator)
+ outfile.write('Trail length frequencies\n')
+ outfile.write(_separator)
+ outfile.write('ops/trail frequency percentage\n')
+ for (r, f) in frequencies:
+ p = float(f) * 100 / total_trails
+ outfile.write('%4i %6i %5.2f\n' % (r, f, p))
+ outfile.write('\n')
+
+
+def output_trail(outfile, trail, column = 0):
+ ### Output the trail itself, in its own column
+
+ if len(trail) == 0:
+ outfile.write('<empty>\n')
+ return
+
+ line = str(trail[0])
+ for op in trail[1:]:
+ op_str = str(op)
+ if len(line) + len(op_str) > 75 - column:
+ outfile.write('%s,\n' % line)
+ outfile.write(''.join(' ' * column))
+ line = op_str
+ else:
+ line = line + ', ' + op_str
+ outfile.write('%s\n' % line)
+
+ outfile.write('\n')
+
+
+def output_trail_frequencies(trails, outfile):
+
+ total_trails = len(trails)
+
+ ttrails = []
+ for (txn_body, trail) in trails:
+ ttrails.append((txn_body, tuple(trail)))
+
+ frequencies = list_frequencies(ttrails)
+
+ outfile.write(_separator)
+ outfile.write('Trail frequencies\n')
+ outfile.write(_separator)
+ outfile.write('frequency percentage ops/trail trail\n')
+ for (((txn_body, file, line), trail), f) in frequencies:
+ p = float(f) * 100 / total_trails
+ outfile.write('-- %s - %s:%u --\n' % (txn_body, file, line))
+ outfile.write('%6i %5.2f %4i ' % (f, p, len(trail)))
+ output_trail(outfile, trail, 37)
+
+
+def output_txn_body_frequencies(trails, outfile):
+ bodies = []
+ for (txn_body, trail) in trails:
+ bodies.append(txn_body)
+
+ total_trails = len(trails)
+ frequencies = list_frequencies(bodies)
+
+ outfile.write(_separator)
+ outfile.write('txn_body frequencies\n')
+ outfile.write(_separator)
+ outfile.write('frequency percentage txn_body\n')
+ for ((txn_body, file, line), f) in frequencies:
+ p = float(f) * 100 / total_trails
+ outfile.write('%6i %5.2f %s - %s:%u\n'
+ % (f, p, txn_body, file, line))
+
+
+def usage(pgm):
+ w = sys.stderr.write
+ w("%s: a program for analyzing Subversion trail usage statistics.\n" % pgm)
+ w("\n")
+ w("Usage:\n")
+ w("\n")
+ w(" Compile Subversion with -DSVN_FS__TRAIL_DEBUG, which will cause it\n")
+ w(" it to print trail statistics to stderr. Save the stats to a file,\n")
+ w(" invoke %s on the file, and ponder the output.\n" % pgm)
+ w("\n")
+
+
+if __name__ == '__main__':
+ if len(sys.argv) > 2:
+ sys.stderr.write("Error: too many arguments\n\n")
+ usage(sys.argv[0])
+ sys.exit(1)
+
+ if len(sys.argv) == 1:
+ infile = sys.stdin
+ else:
+ try:
+ infile = open(sys.argv[1])
+ except (IOError):
+ sys.stderr.write("Error: unable to open '%s'\n\n" % sys.argv[1])
+ usage(sys.argv[0])
+ sys.exit(1)
+
+ trails = parse_trails_log(infile)
+
+ output_summary(trails, sys.stdout)
+ output_trail_length_frequencies(trails, sys.stdout)
+ output_trail_frequencies(trails, sys.stdout)
+ output_txn_body_frequencies(trails, sys.stdout)