#!/usr/bin/python # ==================================================================== # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # ==================================================================== # TODO: Teach parse_open about capabilities, rather than allowing any # words at all. """Parse subversion server operational logs. SVN-ACTION strings ------------------ Angle brackets denote a variable, e.g. 'commit r' means you'll see lines like 'commit r17' for this action. and are revision numbers. , , and mean a URI-encoded path relative to the repository root, including a leading '/'. means a revision property, e.g. 'svn:log'. represents a svn_mergeinfo_inheritance_t value and is one of these words: explicit inherited nearest-ancestor. represents a svn_depth_t value and is one of these words: empty files immediates infinity. If the depth value for the operation was svn_depth_unknown, the depth= portion is absent entirely. The get-mergeinfo and log actions use lists for paths and revprops. The lists are enclosed in parentheses and each item is separated by a space (spaces in paths are encoded as %20). The words will *always* be in this order, though some may be absent. General:: change-rev-prop r commit r get-dir r text? props? get-file r text? props? lock ( ...) steal? rev-proplist r unlock ( ...) break? Reports:: get-file-revs r: include-merged-revisions? get-mergeinfo ( ...) include-descendants? log ( ...) r: limit=? discover-changed-paths? strict? include-merged-revisions? revprops=all|( ...)? replay r The update report:: checkout-or-export r depth=? diff @ @ depth=? ignore-ancestry? diff r: depth=? ignore-ancestry? status r depth=? switch @ depth=? update r depth=? send-copyfrom-args? """ import re try: # Python >=3.0 from urllib.parse import unquote as urllib_parse_unquote except ImportError: # Python <3.0 from urllib import unquote as urllib_parse_unquote import svn.core # # Valid words for _parse_depth and _parse_mergeinfo_inheritance # DEPTH_WORDS = ['empty', 'files', 'immediates', 'infinity'] INHERITANCE_WORDS = { 'explicit': svn.core.svn_mergeinfo_explicit, 'inherited': svn.core.svn_mergeinfo_inherited, 'nearest-ancestor': svn.core.svn_mergeinfo_nearest_ancestor, } # # Patterns for _match # # pPATH = r'(/\S*)' # ( ...) pPATHS = r'\(([^)]*)\)' # r pREVNUM = r'r(\d+)' # ( ...) pREVNUMS = r'\(((\d+\s*)*)\)' # r: pREVRANGE = r'r(-?\d+):(-?\d+)' # @ pPATHREV = pPATH + r'@(\d+)' pWORD = r'(\S+)' pPROPERTY = pWORD # depth=? pDEPTH = 'depth=' + pWORD # # Exceptions # class Error(Exception): pass class BadDepthError(Error): def __init__(self, value): Error.__init__(self, 'bad svn_depth_t value ' + value) class BadMergeinfoInheritanceError(Error): def __init__(self, value): Error.__init__(self, 'bad svn_mergeinfo_inheritance_t value ' + value) class MatchError(Error): def __init__(self, pattern, line): Error.__init__(self, '/%s/ does not match log line:\n%s' % (pattern, line)) # # Helper functions # # TODO: Move to kitchensink.c like svn_depth_from_word? try: from svn.core import svn_inheritance_from_word except ImportError: def svn_inheritance_from_word(word): try: return INHERITANCE_WORDS[word] except KeyError: # XXX svn_inheritance_to_word uses explicit as default so... return svn.core.svn_mergeinfo_explicit def _parse_depth(word): if word is None: return svn.core.svn_depth_unknown if word not in DEPTH_WORDS: raise BadDepthError(word) return svn.core.svn_depth_from_word(word) def _parse_mergeinfo_inheritance(word): if word not in INHERITANCE_WORDS: raise BadMergeinfoInheritanceError(word) return svn_inheritance_from_word(word) def _match(line, *patterns): """Return a re.match object from matching patterns against line. All optional arguments must be strings suitable for ''.join()ing into a single pattern string for re.match. The last optional argument may instead be a list of such strings, which will be joined into the final pattern as *optional* matches. Raises: Error -- if re.match returns None (i.e. no match) """ if isinstance(patterns[-1], list): optional = patterns[-1] patterns = patterns[:-1] else: optional = [] pattern = r'\s+'.join(patterns) pattern += ''.join([r'(\s+' + x + ')?' for x in optional]) m = re.match(pattern, line) if m is None: raise MatchError(pattern, line) return m class Parser(object): """Subclass this and define the handle_ methods according to the "SVN-ACTION strings" section of this module's documentation. For example, "lock steal?" => def handle_lock(self, path, steal) where steal will be True if "steal" was present. See the end of test_svn_server_log_parse.py for a complete example. """ def parse(self, line): """Parse line and call appropriate handle_ method. Returns one of: - line remaining after the svn action, if one was parsed - whatever your handle_unknown implementation returns Raises: BadDepthError -- for bad svn_depth_t values BadMergeinfoInheritanceError -- for bad svn_mergeinfo_inheritance_t values Error -- any other parse error """ self.line = line words = self.split_line = line.split(' ') try: method = getattr(self, '_parse_' + words[0].replace('-', '_')) except AttributeError: return self.handle_unknown(self.line) return method(' '.join(words[1:])) def _parse_commit(self, line): m = _match(line, pREVNUM) self.handle_commit(int(m.group(1))) return line[m.end():] def _parse_open(self, line): pINT = r'(\d+)' pCAP = r'cap=\(([^)]*)\)' pCLIENT = pWORD m = _match(line, pINT, pCAP, pPATH, pCLIENT, pCLIENT) protocol = int(m.group(1)) if m.group(2) is None: capabilities = [] else: capabilities = m.group(2).split() path = m.group(3) ra_client = urllib_parse_unquote(m.group(4)) client = urllib_parse_unquote(m.group(5)) self.handle_open(protocol, capabilities, path, ra_client, client) return line[m.end():] def _parse_reparent(self, line): m = _match(line, pPATH) self.handle_reparent(urllib_parse_unquote(m.group(1))) return line[m.end():] def _parse_get_latest_rev(self, line): self.handle_get_latest_rev() return line def _parse_get_dated_rev(self, line): m = _match(line, pWORD) self.handle_get_dated_rev(m.group(1)) return line[m.end():] def _parse_get_dir(self, line): m = _match(line, pPATH, pREVNUM, ['text', 'props']) self.handle_get_dir(urllib_parse_unquote(m.group(1)), int(m.group(2)), m.group(3) is not None, m.group(4) is not None) return line[m.end():] def _parse_get_file(self, line): m = _match(line, pPATH, pREVNUM, ['text', 'props']) self.handle_get_file(urllib_parse_unquote(m.group(1)), int(m.group(2)), m.group(3) is not None, m.group(4) is not None) return line[m.end():] def _parse_lock(self, line): m = _match(line, pPATHS, ['steal']) paths = [urllib_parse_unquote(x) for x in m.group(1).split()] self.handle_lock(paths, m.group(2) is not None) return line[m.end():] def _parse_change_rev_prop(self, line): m = _match(line, pREVNUM, pPROPERTY) self.handle_change_rev_prop(int(m.group(1)), urllib_parse_unquote(m.group(2))) return line[m.end():] def _parse_rev_proplist(self, line): m = _match(line, pREVNUM) self.handle_rev_proplist(int(m.group(1))) return line[m.end():] def _parse_rev_prop(self, line): m = _match(line, pREVNUM, pPROPERTY) self.handle_rev_prop(int(m.group(1)), urllib_parse_unquote(m.group(2))) return line[m.end():] def _parse_unlock(self, line): m = _match(line, pPATHS, ['break']) paths = [urllib_parse_unquote(x) for x in m.group(1).split()] self.handle_unlock(paths, m.group(2) is not None) return line[m.end():] def _parse_get_lock(self, line): m = _match(line, pPATH) self.handle_get_lock(urllib_parse_unquote(m.group(1))) return line[m.end():] def _parse_get_locks(self, line): m = _match(line, pPATH) self.handle_get_locks(urllib_parse_unquote(m.group(1))) return line[m.end():] def _parse_get_locations(self, line): m = _match(line, pPATH, pREVNUMS) path = urllib_parse_unquote(m.group(1)) revnums = [int(x) for x in m.group(2).split()] self.handle_get_locations(path, revnums) return line[m.end():] def _parse_get_location_segments(self, line): m = _match(line, pPATHREV, pREVRANGE) path = urllib_parse_unquote(m.group(1)) peg = int(m.group(2)) left = int(m.group(3)) right = int(m.group(4)) self.handle_get_location_segments(path, peg, left, right) return line[m.end():] def _parse_get_file_revs(self, line): m = _match(line, pPATH, pREVRANGE, ['include-merged-revisions']) path = urllib_parse_unquote(m.group(1)) left = int(m.group(2)) right = int(m.group(3)) include_merged_revisions = m.group(4) is not None self.handle_get_file_revs(path, left, right, include_merged_revisions) return line[m.end():] def _parse_get_mergeinfo(self, line): # pMERGEINFO_INHERITANCE = pWORD pINCLUDE_DESCENDANTS = pWORD m = _match(line, pPATHS, pMERGEINFO_INHERITANCE, ['include-descendants']) paths = [urllib_parse_unquote(x) for x in m.group(1).split()] inheritance = _parse_mergeinfo_inheritance(m.group(2)) include_descendants = m.group(3) is not None self.handle_get_mergeinfo(paths, inheritance, include_descendants) return line[m.end():] def _parse_log(self, line): # limit=? pLIMIT = r'limit=(\d+)' # revprops=all|( ...)? pREVPROPS = r'revprops=(all|\(([^)]+)\))' m = _match(line, pPATHS, pREVRANGE, [pLIMIT, 'discover-changed-paths', 'strict', 'include-merged-revisions', pREVPROPS]) paths = [urllib_parse_unquote(x) for x in m.group(1).split()] left = int(m.group(2)) right = int(m.group(3)) if m.group(5) is None: limit = 0 else: limit = int(m.group(5)) discover_changed_paths = m.group(6) is not None strict = m.group(7) is not None include_merged_revisions = m.group(8) is not None if m.group(10) == 'all': revprops = None else: if m.group(11) is None: revprops = [] else: revprops = [urllib_parse_unquote(x) for x in m.group(11).split()] self.handle_log(paths, left, right, limit, discover_changed_paths, strict, include_merged_revisions, revprops) return line[m.end():] def _parse_check_path(self, line): m = _match(line, pPATHREV) path = urllib_parse_unquote(m.group(1)) revnum = int(m.group(2)) self.handle_check_path(path, revnum) return line[m.end():] def _parse_stat(self, line): m = _match(line, pPATHREV) path = urllib_parse_unquote(m.group(1)) revnum = int(m.group(2)) self.handle_stat(path, revnum) return line[m.end():] def _parse_replay(self, line): m = _match(line, pPATH, pREVNUM) path = urllib_parse_unquote(m.group(1)) revision = int(m.group(2)) self.handle_replay(path, revision) return line[m.end():] # the update report def _parse_checkout_or_export(self, line): m = _match(line, pPATH, pREVNUM, [pDEPTH]) path = urllib_parse_unquote(m.group(1)) revision = int(m.group(2)) depth = _parse_depth(m.group(4)) self.handle_checkout_or_export(path, revision, depth) return line[m.end():] def _parse_diff(self, line): # First, try 1-path form. try: m = _match(line, pPATH, pREVRANGE, [pDEPTH, 'ignore-ancestry']) f = self._parse_diff_1path except Error: # OK, how about 2-path form? m = _match(line, pPATHREV, pPATHREV, [pDEPTH, 'ignore-ancestry']) f = self._parse_diff_2paths return f(line, m) def _parse_diff_1path(self, line, m): path = urllib_parse_unquote(m.group(1)) left = int(m.group(2)) right = int(m.group(3)) depth = _parse_depth(m.group(5)) ignore_ancestry = m.group(6) is not None self.handle_diff_1path(path, left, right, depth, ignore_ancestry) return line[m.end():] def _parse_diff_2paths(self, line, m): from_path = urllib_parse_unquote(m.group(1)) from_rev = int(m.group(2)) to_path = urllib_parse_unquote(m.group(3)) to_rev = int(m.group(4)) depth = _parse_depth(m.group(6)) ignore_ancestry = m.group(7) is not None self.handle_diff_2paths(from_path, from_rev, to_path, to_rev, depth, ignore_ancestry) return line[m.end():] def _parse_status(self, line): m = _match(line, pPATH, pREVNUM, [pDEPTH]) path = urllib_parse_unquote(m.group(1)) revision = int(m.group(2)) depth = _parse_depth(m.group(4)) self.handle_status(path, revision, depth) return line[m.end():] def _parse_switch(self, line): m = _match(line, pPATH, pPATHREV, [pDEPTH]) from_path = urllib_parse_unquote(m.group(1)) to_path = urllib_parse_unquote(m.group(2)) to_rev = int(m.group(3)) depth = _parse_depth(m.group(5)) self.handle_switch(from_path, to_path, to_rev, depth) return line[m.end():] def _parse_update(self, line): m = _match(line, pPATH, pREVNUM, [pDEPTH, 'send-copyfrom-args']) path = urllib_parse_unquote(m.group(1)) revision = int(m.group(2)) depth = _parse_depth(m.group(4)) send_copyfrom_args = m.group(5) is not None self.handle_update(path, revision, depth, send_copyfrom_args) return line[m.end():]