diff options
Diffstat (limited to 'tools/server-side')
32 files changed, 6921 insertions, 0 deletions
diff --git a/tools/server-side/fsfs-reshard.py b/tools/server-side/fsfs-reshard.py new file mode 100755 index 0000000..d9937ad --- /dev/null +++ b/tools/server-side/fsfs-reshard.py @@ -0,0 +1,399 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# fsfs-reshard.py REPOS_PATH MAX_FILES_PER_SHARD +# +# Perform an offline conversion of an FSFS repository between linear (format +# 2, usable by Subversion 1.4+) and sharded (format 3, usable by Subversion +# 1.5+) layouts. +# +# The MAX_FILES_PER_SHARD argument specifies the maximum number of files +# that will be stored in each shard (directory), or zero to specify a linear +# layout. Subversion 1.5 uses a default value of 1000 files per shard. +# +# As the repository will not be valid while the conversion is in progress, +# the repository administrator must ensure that access to the repository is +# blocked for the duration of the conversion. +# +# In the event that the conversion is interrupted, the repository will be in +# an inconsistent state. The repository administrator should then re-run +# this tool to completion. +# +# +# Note that, currently, resharding from one sharded layout to another is +# likely to be an extremely slow process. To reshard, we convert from a +# sharded to linear layout and then to the new sharded layout. The problem +# is that the initial conversion to the linear layout triggers exactly the +# same 'large number of files in a directory' problem that sharding is +# intended to solve. +# +# ==================================================================== +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ==================================================================== +# +# $HeadURL: https://svn.apache.org/repos/asf/subversion/branches/1.10.x/tools/server-side/fsfs-reshard.py $ +# $LastChangedDate: 2016-04-30 08:16:53 +0000 (Sat, 30 Apr 2016) $ +# $LastChangedBy: stefan2 $ +# $LastChangedRevision: 1741723 $ + +import os, stat, sys + +from errno import EEXIST + +def usage(): + """Print a usage message and exit.""" + print("""usage: %s REPOS_PATH MAX_FILES_PER_SHARD [START END] + +Perform an offline conversion of an FSFS repository between linear +(readable by Subversion 1.4 or later) and sharded (readable by +Subversion 1.5 or later) layouts. + +The MAX_FILES_PER_SHARD argument specifies the maximum number of +files that will be stored in each shard (directory), or zero to +specify a linear layout. Subversion 1.5 uses a default value of +1000 files per shard. + +Convert revisions START through END inclusive if specified, or all +revisions if unspecified. +""" % sys.argv[0]) + sys.exit(1) + +def incompatible_repos_format(repos_path, format): + """Print an error saying that REPOS_PATH is a repository with an + incompatible repository format FORMAT, then exit.""" + sys.stderr.write("""error: unable to convert repository '%s'. + +This repository is not compatible with this tool. Valid +repository formats are '3' or '5'; this repository is +format '%s'. + +""" % (repos_path, format)) + sys.stderr.flush() + sys.exit(1) + +def incompatible_fs_format(repos_path, format): + """Print an error saying that REPOS_PATH is a repository with an + incompatible filesystem format FORMAT, then exit.""" + sys.stderr.write("""error: unable to convert repository '%s'. + +This repository contains a filesystem that is not compatible with +this tool. Valid filesystem formats are '1', '2', or '3'; this +repository contains a filesystem with format '%s'. + +""" % (repos_path, format)) + sys.stderr.flush() + sys.exit(1) + +def unexpected_fs_format_options(repos_path): + """Print an error saying that REPOS_PATH is a repository with + unexpected filesystem format options, then exit.""" + sys.stderr.write("""error: unable to convert repository '%s'. + +This repository contains a filesystem that appears to be invalid - +there is unexpected data after the filesystem format number. + +""" % repos_path) + sys.stderr.flush() + sys.exit(1) + +def incompatible_fs_format_option(repos_path, option): + """Print an error saying that REPOS_PATH is a repository with an + incompatible filesystem format option OPTION, then exit.""" + sys.stderr.write("""error: unable to convert repository '%s'. + +This repository contains a filesystem that is not compatible with +this tool. This tool recognises the 'layout' option but the +filesystem uses the '%s' option. + +""" % (repos_path, option)) + sys.stderr.flush() + sys.exit(1) + +def warn_about_fs_format_1(repos_path, format_path): + """Print a warning saying that REPOS_PATH contains a format 1 FSFS + filesystem that we can't reconstruct, then exit.""" + sys.stderr.write("""warning: conversion of '%s' will be one-way. + +This repository is currently readable by Subversion 1.1 or later. +This tool can convert this repository to one that is readable by +either Subversion 1.4 (or later) or Subversion 1.5 (or later), +but it is not able to convert it back to the original format - a +separate dump/load step would be required. + +If you would like to upgrade this repository anyway, delete the +file '%s' and re-run this tool. + +""" % (repos_path, format_path)) + sys.stderr.flush() + sys.exit(1) + +def check_repos_format(repos_path): + """Check that REPOS_PATH contains a repository with a suitable format; + print a message and exit if not.""" + format_path = os.path.join(repos_path, 'format') + try: + format_file = open(format_path) + format = format_file.readline() + if not format.endswith('\n'): + incompatible_repos_format(repos_path, format + ' <missing newline>') + format = format.rstrip('\n') + if format == '3' or format == '5': + pass + else: + incompatible_repos_format(repos_path, format) + except IOError: + # In all likelihood, the file doesn't exist. + incompatible_repos_format(repos_path, '<unreadable>') + +def check_fs_format(repos_path): + """Check that REPOS_PATH contains a filesystem with a suitable format, + or that it contains no format file; print a message and exit if neither + is true. Return bool whether the filesystem is sharded.""" + sharded = False + db_path = os.path.join(repos_path, 'db') + format_path = os.path.join(db_path, 'format') + try: + format_file = open(format_path) + format = format_file.readline() + if not format.endswith('\n'): + incompatible_fs_format(repos_path, format + ' <missing newline>') + format = format.rstrip('\n') + if format == '1': + # This is a format 1 (svndiff0 only) filesystem. We can upgrade it, + # but we can't downgrade again (since we can't uncompress any of the + # svndiff1 deltas that may have been written). Warn the user and exit. + warn_about_fs_format_1(repos_path, format_path) + if format == '2': + pass + elif format == '3': + pass + else: + incompatible_fs_format(repos_path, format) + + for line in format_file: + if format == '2': + unexpected_fs_format_options(repos_path) + + line = line.rstrip('\n') + if line == 'layout linear': + pass + elif line.startswith('layout sharded '): + sharded = True + else: + incompatible_fs_format_option(repos_path, line) + + format_file.close() + except IOError: + # The format file might not exist if we've previously been interrupted, + # or if the user is following our advice about upgrading a format 1 + # repository. In both cases, we'll just assume the format was + # compatible. + pass + + return sharded + +def current_file(repos_path): + """Return triple of (revision, next_node_id, next_copy_id) from + REPOS_PATH/db/current .""" + return open(os.path.join(repos_path, 'db', 'current')).readline().split() + +def remove_fs_format(repos_path): + """Remove the filesystem format file for repository REPOS_PATH. + Do not raise an error if the file is already missing.""" + format_path = os.path.join(repos_path, 'db', 'format') + try: + statinfo = os.stat(format_path) + except OSError: + # The file probably doesn't exist. + return + + # On Windows, we need to ensure the file is writable before we can + # remove it. + os.chmod(format_path, statinfo.st_mode | stat.S_IWUSR) + os.remove(format_path) + +def write_fs_format(repos_path, contents): + """Write a new filesystem format file for repository REPOS_PATH containing + CONTENTS.""" + format_path = os.path.join(repos_path, 'db', 'format') + f = open(format_path, 'wb') + f.write(contents) + f.close() + os.chmod(format_path, stat.S_IRUSR | stat.S_IRGRP) + +def linearise(path): + """Move all the files in subdirectories of PATH into PATH, and remove the + subdirectories. Handle conflicts between subdirectory names and files + contained in subdirectories by ensuring subdirectories have a '.shard' + suffix prior to moving (the files are assumed not to have this suffix. + Abort if a subdirectory is found to contain another subdirectory.""" + # First enumerate all subdirectories of DIR and rename where necessary + # to include a .shard suffix. + for name in os.listdir(path): + if name.endswith('.shard'): + continue + subdir_path = os.path.join(path, name) + if not os.path.isdir(subdir_path): + continue + os.rename(subdir_path, subdir_path + '.shard') + + # Now move all the subdirectory contents into the parent and remove + # the subdirectories. + for root_path, dirnames, filenames in os.walk(path): + if root_path == path: + continue + if len(dirnames) > 0: + sys.stderr.write("error: directory '%s' contains other unexpected directories.\n" \ + % root_path) + sys.stderr.flush() + sys.exit(1) + for name in filenames: + from_path = os.path.join(root_path, name) + to_path = os.path.join(path, name) + os.rename(from_path, to_path) + os.rmdir(root_path) + +def shard(path, max_files_per_shard, start, end): + """Move the files for revisions START to END inclusive in PATH into + subdirectories of PATH named such that subdirectory '0' contains at most + MAX_FILES_PER_SHARD files, those named [0, MAX_FILES_PER_SHARD). Abort if + PATH is found to contain any entries with non-numeric names.""" + + tmp = path + '.reshard' + try: + os.mkdir(tmp) + except OSError as e: + if e.errno != EEXIST: + raise + + # Move all entries into shards named N.shard. + for rev in range(start, end + 1): + name = str(rev) + shard = rev // max_files_per_shard + shard_name = str(shard) + '.shard' + + from_path = os.path.join(path, name) + to_path = os.path.join(tmp, shard_name, name) + try: + os.rename(from_path, to_path) + except OSError: + # The most likely explanation is that the shard directory doesn't + # exist. Let's create it and retry the rename. + os.mkdir(os.path.join(tmp, shard_name)) + os.rename(from_path, to_path) + + # Now rename all the shards to remove the suffix. + skipped = 0 + for name in os.listdir(tmp): + if not name.endswith('.shard'): + sys.stderr.write("warning: ignoring unexpected subdirectory '%s'.\n" \ + % os.path.join(tmp, name)) + sys.stderr.flush() + skipped += 1 + continue + from_path = os.path.join(tmp, name) + to_path = os.path.join(path, os.path.basename(from_path)[:-6]) + os.rename(from_path, to_path) + skipped == 0 and os.rmdir(tmp) + +def main(): + if len(sys.argv) < 3: + usage() + + repos_path = sys.argv[1] + max_files_per_shard = sys.argv[2] + try: + start = int(sys.argv[3]) + end = int(sys.argv[4]) + except IndexError: + start = 0 + end = int(current_file(repos_path)[0]) + + # Validate the command-line arguments. + db_path = os.path.join(repos_path, 'db') + current_path = os.path.join(db_path, 'current') + if not os.path.exists(current_path): + sys.stderr.write("error: '%s' doesn't appear to be a Subversion FSFS repository.\n" \ + % repos_path) + sys.stderr.flush() + sys.exit(1) + + try: + max_files_per_shard = int(max_files_per_shard) + except ValueError as OverflowError: + sys.stderr.write("error: maximum files per shard ('%s') is not a valid number.\n" \ + % max_files_per_shard) + sys.stderr.flush() + sys.exit(1) + + if max_files_per_shard < 0: + sys.stderr.write("error: maximum files per shard ('%d') must not be negative.\n" \ + % max_files_per_shard) + sys.stderr.flush() + sys.exit(1) + + # Check the format of the repository. + check_repos_format(repos_path) + sharded = check_fs_format(repos_path) + + # Let the user know what's going on. + if max_files_per_shard > 0: + print("Converting '%s' to a sharded structure with %d files per directory" \ + % (repos_path, max_files_per_shard)) + if sharded: + print('(will convert to a linear structure first)') + else: + print("Converting '%s' to a linear structure" % repos_path) + + # Prevent access to the repository for the duration of the conversion. + # There's no clean way to do this, but since the format of the repository + # is indeterminate, let's remove the format file while we're converting. + print('- marking the repository as invalid') + remove_fs_format(repos_path) + + # First, convert to a linear scheme (this makes recovery easier because + # it's easier to reason about the behaviour on restart). + if sharded: + print('- linearising db/revs') + linearise(os.path.join(repos_path, 'db', 'revs')) + print('- linearising db/revprops') + linearise(os.path.join(repos_path, 'db', 'revprops')) + + if max_files_per_shard == 0: + # We're done. Stamp the filesystem with a format 2 db/format file. + print('- marking the repository as a valid linear repository') + write_fs_format(repos_path, '2\n') + else: + print('- sharding db/revs') + shard(os.path.join(repos_path, 'db', 'revs'), max_files_per_shard, + start, end) + print('- sharding db/revprops') + shard(os.path.join(repos_path, 'db', 'revprops'), max_files_per_shard, + start, end) + + # We're done. Stamp the filesystem with a format 3 db/format file. + print('- marking the repository as a valid sharded repository') + write_fs_format(repos_path, '3\nlayout sharded %d\n' % max_files_per_shard) + + print('- done.') + sys.exit(0) + +if __name__ == '__main__': + raise Exception("""This script is unfinished and not ready to be used on live data. + Trust us.""") + main() diff --git a/tools/server-side/mod_dontdothat/README b/tools/server-side/mod_dontdothat/README new file mode 100644 index 0000000..7d4fe36 --- /dev/null +++ b/tools/server-side/mod_dontdothat/README @@ -0,0 +1,53 @@ +mod_dontdothat is an Apache module that allows you to block specific types +of Subversion requests. Specifically, it's designed to keep users from doing +things that are particularly hard on the server, like checking out the root +of the tree, or the tags or branches directories. It works by sticking an +input filter in front of all REPORT requests and looking for dangerous types +of requests. If it finds any, it returns a 403 Forbidden error. + +You can compile and install it via apxs: + +$ apxs -c \ + -I$PREFIX/include/subversion-1 \ + -L$PREFIX/lib -lsvn_subr-1 + mod_dontdothat.c + +$ apxs -i -n dontdothat mod_dontdothat.la + +It is enabled via single httpd.conf directive, DontDoThatConfigFile: + +<Location /svn> + DAV svn + SVNParentPath /path/to/repositories + DontDoThatConfigFile /path/to/config.file + DontDoThatDisallowReplay off +</Location> + +The file you give to DontDoThatConfigFile is a Subversion configuration file +that contains the following sections. + +[recursive-actions] +/*/trunk = allow +/ = deny +/* = deny +/*/tags = deny +/*/branches = deny +/*/* = deny +/*/*/tags = deny +/*/*/branches = deny + +As you might guess, this defines a set of patterns that control what the +user is not allowed to do. Anything with a 'deny' after it is denied, and +as a fallback mechanism anything with an 'allow' after it is special cased +to be allowed, even if it matches something that is denied. + +Note that the wildcard portions of a rule only swallow a single directory, +so /* will match /foo, but not /foo/bar. They also must be at the end of +a directory segment, so /foo* or /* are valid, but /*foo is not. + +These rules are applied to any recursive action, which basically means any +Subversion command that goes through the update-report, like update, diff, +checkout, merge, etc. + +The DontDoThatDisallowReplay option makes mod_dontdothat disallow +replay requests, which is on by default. diff --git a/tools/server-side/mod_dontdothat/mod_dontdothat.c b/tools/server-side/mod_dontdothat/mod_dontdothat.c new file mode 100644 index 0000000..dc63c0a --- /dev/null +++ b/tools/server-side/mod_dontdothat/mod_dontdothat.c @@ -0,0 +1,711 @@ +/* + * mod_dontdothat.c: an Apache filter that allows you to return arbitrary + * errors for various types of Subversion requests. + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include <httpd.h> +#include <http_config.h> +#include <http_protocol.h> +#include <http_request.h> +#include <http_log.h> +#include <util_filter.h> +#include <ap_config.h> +#include <apr_strings.h> +#include <apr_uri.h> + +#include "mod_dav_svn.h" +#include "svn_string.h" +#include "svn_config.h" +#include "svn_path.h" +#include "svn_xml.h" +#include "private/svn_fspath.h" + +extern module AP_MODULE_DECLARE_DATA dontdothat_module; + +typedef struct dontdothat_config_rec { + const char *config_file; + const char *base_path; + int no_replay; +} dontdothat_config_rec; + +static void *create_dontdothat_dir_config(apr_pool_t *pool, char *dir) +{ + dontdothat_config_rec *cfg = apr_pcalloc(pool, sizeof(*cfg)); + + cfg->base_path = dir; + cfg->no_replay = 1; + + return cfg; +} + +static const command_rec dontdothat_cmds[] = +{ + AP_INIT_TAKE1("DontDoThatConfigFile", ap_set_file_slot, + (void *) APR_OFFSETOF(dontdothat_config_rec, config_file), + OR_ALL, + "Text file containing actions to take for specific requests"), + AP_INIT_FLAG("DontDoThatDisallowReplay", ap_set_flag_slot, + (void *) APR_OFFSETOF(dontdothat_config_rec, no_replay), + OR_ALL, "Disallow replay requests as if they are other recursive requests."), + { NULL } +}; + +typedef enum parse_state_t { + STATE_BEGINNING, + STATE_IN_UPDATE, + STATE_IN_SRC_PATH, + STATE_IN_DST_PATH, + STATE_IN_RECURSIVE +} parse_state_t; + +typedef struct dontdothat_filter_ctx { + /* Set to TRUE when we determine that the request is safe and should be + * allowed to continue. */ + svn_boolean_t let_it_go; + + /* Set to TRUE when we determine that the request is unsafe and should be + * stopped in its tracks. */ + svn_boolean_t no_soup_for_you; + + svn_xml_parser_t *xmlp; + + /* The current location in the REPORT body. */ + parse_state_t state; + + /* A buffer to hold CDATA we encounter. */ + svn_stringbuf_t *buffer; + + dontdothat_config_rec *cfg; + + /* An array of wildcards that are special cased to be allowed. */ + apr_array_header_t *allow_recursive_ops; + + /* An array of wildcards where recursive operations are not allowed. */ + apr_array_header_t *no_recursive_ops; + + /* TRUE if a path has failed a test already. */ + svn_boolean_t path_failed; + + /* An error for when we're using this as a baton while parsing config + * files. */ + svn_error_t *err; + + /* The current request. */ + request_rec *r; +} dontdothat_filter_ctx; + +/* Return TRUE if wildcard WC matches path P, FALSE otherwise. */ +static svn_boolean_t +matches(const char *wc, const char *p) +{ + for (;;) + { + switch (*wc) + { + case '*': + if (wc[1] != '/' && wc[1] != '\0') + abort(); /* This was checked for during parsing of the config. */ + + /* It's a wild card, so eat up until the next / in p. */ + while (*p && p[1] != '/') + ++p; + + /* If we ran out of p and we're out of wc then it matched. */ + if (! *p) + { + if (wc[1] == '\0') + return TRUE; + else + return FALSE; + } + break; + + case '\0': + if (*p != '\0') + /* This means we hit the end of wc without running out of p. */ + return FALSE; + else + /* Or they were exactly the same length, so it's not lower. */ + return TRUE; + + default: + if (*wc != *p) + return FALSE; /* If we don't match, then move on to the next + * case. */ + else + break; + } + + ++wc; + ++p; + + if (! *p && *wc) + return FALSE; + } +} + +/* duplicate of dav_svn__log_err() from mod_dav_svn/util.c */ +static void +log_dav_err(request_rec *r, + dav_error *err, + int level) +{ + dav_error *errscan; + + /* Log the errors */ + /* ### should have a directive to log the first or all */ + for (errscan = err; errscan != NULL; errscan = errscan->prev) { + apr_status_t status; + + if (errscan->desc == NULL) + continue; + +#if AP_MODULE_MAGIC_AT_LEAST(20091119,0) + status = errscan->aprerr; +#else + status = errscan->save_errno; +#endif + + ap_log_rerror(APLOG_MARK, level, status, r, + "%s [%d, #%d]", + errscan->desc, errscan->status, errscan->error_id); + } +} + +static svn_boolean_t +is_this_legal(dontdothat_filter_ctx *ctx, const char *uri) +{ + const char *relative_path; + const char *cleaned_uri; + const char *repos_name; + const char *uri_path; + int trailing_slash; + dav_error *derr; + + /* uri can be an absolute uri or just a path, we only want the path to match + * against */ + if (uri && svn_path_is_url(uri)) + { + apr_uri_t parsed_uri; + apr_status_t rv = apr_uri_parse(ctx->r->pool, uri, &parsed_uri); + if (APR_SUCCESS != rv) + { + /* Error parsing the URI, log and reject request. */ + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, ctx->r, + "mod_dontdothat: blocked request after failing " + "to parse uri: '%s'", uri); + return FALSE; + } + uri_path = parsed_uri.path; + } + else + { + uri_path = uri; + } + + if (uri_path) + { + const char *repos_path; + + derr = dav_svn_split_uri(ctx->r, + uri_path, + ctx->cfg->base_path, + &cleaned_uri, + &trailing_slash, + &repos_name, + &relative_path, + &repos_path); + if (! derr) + { + int idx; + + if (! repos_path) + repos_path = ""; + + repos_path = svn_fspath__canonicalize(repos_path, ctx->r->pool); + + /* First check the special cases that are always legal... */ + for (idx = 0; idx < ctx->allow_recursive_ops->nelts; ++idx) + { + const char *wc = APR_ARRAY_IDX(ctx->allow_recursive_ops, + idx, + const char *); + + if (matches(wc, repos_path)) + { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->r, + "mod_dontdothat: rule %s allows %s", + wc, repos_path); + return TRUE; + } + } + + /* Then look for stuff we explicitly don't allow. */ + for (idx = 0; idx < ctx->no_recursive_ops->nelts; ++idx) + { + const char *wc = APR_ARRAY_IDX(ctx->no_recursive_ops, + idx, + const char *); + + if (matches(wc, repos_path)) + { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->r, + "mod_dontdothat: rule %s forbids %s", + wc, repos_path); + return FALSE; + } + } + } + else + { + log_dav_err(ctx->r, derr, APLOG_ERR); + return FALSE; + } + + } + else + { + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->r, + "mod_dontdothat: empty uri passed to is_this_legal(), " + "module bug?"); + return FALSE; + } + + return TRUE; +} + +static apr_status_t +dontdothat_filter(ap_filter_t *f, + apr_bucket_brigade *bb, + ap_input_mode_t mode, + apr_read_type_e block, + apr_off_t readbytes) +{ + dontdothat_filter_ctx *ctx = f->ctx; + apr_status_t rv; + apr_bucket *e; + + if (mode != AP_MODE_READBYTES) + return ap_get_brigade(f->next, bb, mode, block, readbytes); + + rv = ap_get_brigade(f->next, bb, mode, block, readbytes); + if (rv) + return rv; + + for (e = APR_BRIGADE_FIRST(bb); + e != APR_BRIGADE_SENTINEL(bb); + e = APR_BUCKET_NEXT(e)) + { + svn_boolean_t last = APR_BUCKET_IS_EOS(e); + const char *str; + apr_size_t len; + svn_error_t *err; + + if (last) + { + str = ""; + len = 0; + } + else + { + rv = apr_bucket_read(e, &str, &len, APR_BLOCK_READ); + if (rv) + return rv; + } + + err = svn_xml_parse(ctx->xmlp, str, len, last); + if (err) + { + /* let_it_go so we clean up our parser, no_soup_for_you so that we + * bail out before bothering to parse this stuff a second time. */ + ctx->let_it_go = TRUE; + ctx->no_soup_for_you = TRUE; + svn_error_clear(err); + } + + /* If we found something that isn't allowed, set the correct status + * and return an error so it'll bail out before it gets anywhere it + * can do real damage. */ + if (ctx->no_soup_for_you) + { + /* XXX maybe set up the SVN-ACTION env var so that it'll show up + * in the Subversion operational logs? */ + + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, + "mod_dontdothat: client broke the rules, " + "returning error"); + + /* Ok, pass an error bucket and an eos bucket back to the client. + * + * NOTE: The custom error string passed here doesn't seem to be + * used anywhere by httpd. This is quite possibly a bug. + * + * TODO: Try and pass back a custom document body containing a + * serialized svn_error_t so the client displays a better + * error message. */ + bb = apr_brigade_create(f->r->pool, f->c->bucket_alloc); + e = ap_bucket_error_create(403, "No Soup For You!", + f->r->pool, f->c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(bb, e); + e = apr_bucket_eos_create(f->c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(bb, e); + + /* Don't forget to remove us, otherwise recursion blows the stack. */ + ap_remove_input_filter(f); + + return ap_pass_brigade(f->r->output_filters, bb); + } + else if (ctx->let_it_go || last) + { + ap_remove_input_filter(f); + + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, + "mod_dontdothat: letting request go through"); + + return rv; + } + } + + return rv; +} + +/* Implements svn_xml_char_data callback */ +static void +cdata(void *baton, const char *data, apr_size_t len) +{ + dontdothat_filter_ctx *ctx = baton; + + if (ctx->no_soup_for_you || ctx->let_it_go) + return; + + switch (ctx->state) + { + case STATE_IN_SRC_PATH: + /* FALLTHROUGH */ + + case STATE_IN_DST_PATH: + /* FALLTHROUGH */ + + case STATE_IN_RECURSIVE: + if (! ctx->buffer) + ctx->buffer = svn_stringbuf_ncreate(data, len, ctx->r->pool); + else + svn_stringbuf_appendbytes(ctx->buffer, data, len); + break; + + default: + break; + } +} + +/* Implements svn_xml_start_elem callback */ +static void +start_element(void *baton, const char *name, const char **attrs) +{ + dontdothat_filter_ctx *ctx = baton; + const char *sep; + + if (ctx->no_soup_for_you || ctx->let_it_go) + return; + + /* XXX Hack. We should be doing real namespace support, but for now we + * just skip ahead of any namespace prefix. If someone's sending us + * an update-report element outside of the SVN namespace they'll get + * what they deserve... */ + sep = ap_strchr_c(name, ':'); + if (sep) + name = sep + 1; + + switch (ctx->state) + { + case STATE_BEGINNING: + if (strcmp(name, "update-report") == 0) + ctx->state = STATE_IN_UPDATE; + else if (strcmp(name, "replay-report") == 0 && ctx->cfg->no_replay) + { + /* XXX it would be useful if there was a way to override this + * on a per-user basis... */ + if (! is_this_legal(ctx, ctx->r->unparsed_uri)) + ctx->no_soup_for_you = TRUE; + else + ctx->let_it_go = TRUE; + } + else + ctx->let_it_go = TRUE; + break; + + case STATE_IN_UPDATE: + if (strcmp(name, "src-path") == 0) + { + ctx->state = STATE_IN_SRC_PATH; + if (ctx->buffer) + ctx->buffer->len = 0; + } + else if (strcmp(name, "dst-path") == 0) + { + ctx->state = STATE_IN_DST_PATH; + if (ctx->buffer) + ctx->buffer->len = 0; + } + else if (strcmp(name, "recursive") == 0) + { + ctx->state = STATE_IN_RECURSIVE; + if (ctx->buffer) + ctx->buffer->len = 0; + } + else + ; /* XXX Figure out what else we need to deal with... Switch + * has that link-path thing we probably need to look out + * for... */ + break; + + default: + break; + } +} + +/* Implements svn_xml_end_elem callback */ +static void +end_element(void *baton, const char *name) +{ + dontdothat_filter_ctx *ctx = baton; + const char *sep; + + if (ctx->no_soup_for_you || ctx->let_it_go) + return; + + /* XXX Hack. We should be doing real namespace support, but for now we + * just skip ahead of any namespace prefix. If someone's sending us + * an update-report element outside of the SVN namespace they'll get + * what they deserve... */ + sep = ap_strchr_c(name, ':'); + if (sep) + name = sep + 1; + + switch (ctx->state) + { + case STATE_IN_SRC_PATH: + ctx->state = STATE_IN_UPDATE; + + svn_stringbuf_strip_whitespace(ctx->buffer); + + if (! ctx->path_failed && ! is_this_legal(ctx, ctx->buffer->data)) + ctx->path_failed = TRUE; + break; + + case STATE_IN_DST_PATH: + ctx->state = STATE_IN_UPDATE; + + svn_stringbuf_strip_whitespace(ctx->buffer); + + if (! ctx->path_failed && ! is_this_legal(ctx, ctx->buffer->data)) + ctx->path_failed = TRUE; + break; + + case STATE_IN_RECURSIVE: + ctx->state = STATE_IN_UPDATE; + + svn_stringbuf_strip_whitespace(ctx->buffer); + + /* If this isn't recursive we let it go. */ + if (strcmp(ctx->buffer->data, "no") == 0) + { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->r, + "mod_dontdothat: letting nonrecursive request go"); + ctx->let_it_go = TRUE; + } + break; + + case STATE_IN_UPDATE: + if (strcmp(name, "update-report") == 0) + { + /* If we made it here without figuring out that this is + * nonrecursive, then the path check is our final word + * on the subject. */ + + if (ctx->path_failed) + ctx->no_soup_for_you = TRUE; + else + ctx->let_it_go = TRUE; + } + else + ; /* XXX Is there other stuff we care about? */ + break; + + default: + abort(); + } +} + +static svn_boolean_t +is_valid_wildcard(const char *wc) +{ + while (*wc) + { + if (*wc == '*') + { + if (wc[1] && wc[1] != '/') + return FALSE; + } + + ++wc; + } + + return TRUE; +} + +static svn_boolean_t +config_enumerator(const char *wildcard, + const char *action, + void *baton, + apr_pool_t *pool) +{ + dontdothat_filter_ctx *ctx = baton; + + if (strcmp(action, "deny") == 0) + { + if (is_valid_wildcard(wildcard)) + APR_ARRAY_PUSH(ctx->no_recursive_ops, const char *) = wildcard; + else + ctx->err = svn_error_createf(APR_EINVAL, + NULL, + "'%s' is an invalid wildcard", + wildcard); + } + else if (strcmp(action, "allow") == 0) + { + if (is_valid_wildcard(wildcard)) + APR_ARRAY_PUSH(ctx->allow_recursive_ops, const char *) = wildcard; + else + ctx->err = svn_error_createf(APR_EINVAL, + NULL, + "'%s' is an invalid wildcard", + wildcard); + } + else + { + ctx->err = svn_error_createf(APR_EINVAL, + NULL, + "'%s' is not a valid action", + action); + } + + if (ctx->err) + return FALSE; + else + return TRUE; +} + +static void +dontdothat_insert_filters(request_rec *r) +{ + dontdothat_config_rec *cfg = ap_get_module_config(r->per_dir_config, + &dontdothat_module); + + if (! cfg->config_file) + return; + + if (strcmp("REPORT", r->method) == 0) + { + dontdothat_filter_ctx *ctx = apr_pcalloc(r->pool, sizeof(*ctx)); + svn_config_t *config; + svn_error_t *err; + + ctx->r = r; + + ctx->cfg = cfg; + + ctx->allow_recursive_ops = apr_array_make(r->pool, 5, sizeof(char *)); + + ctx->no_recursive_ops = apr_array_make(r->pool, 5, sizeof(char *)); + + /* XXX is there a way to error out from this point? Would be nice... */ + + err = svn_config_read3(&config, cfg->config_file, TRUE, + FALSE, TRUE, r->pool); + if (err) + { + char buff[256]; + + ap_log_rerror(APLOG_MARK, APLOG_ERR, + ((err->apr_err >= APR_OS_START_USERERR && + err->apr_err < APR_OS_START_CANONERR) ? + 0 : err->apr_err), + r, "Failed to load DontDoThatConfigFile: %s", + svn_err_best_message(err, buff, sizeof(buff))); + + svn_error_clear(err); + + return; + } + + svn_config_enumerate2(config, + "recursive-actions", + config_enumerator, + ctx, + r->pool); + if (ctx->err) + { + char buff[256]; + + ap_log_rerror(APLOG_MARK, APLOG_ERR, + ((ctx->err->apr_err >= APR_OS_START_USERERR && + ctx->err->apr_err < APR_OS_START_CANONERR) ? + 0 : ctx->err->apr_err), + r, "Failed to parse DontDoThatConfigFile: %s", + svn_err_best_message(ctx->err, buff, sizeof(buff))); + + svn_error_clear(ctx->err); + + return; + } + + ctx->state = STATE_BEGINNING; + + ctx->xmlp = svn_xml_make_parser(ctx, start_element, end_element, + cdata, r->pool); + + ap_add_input_filter("DONTDOTHAT_FILTER", ctx, r, r->connection); + } +} + +static void +dontdothat_register_hooks(apr_pool_t *pool) +{ + ap_hook_insert_filter(dontdothat_insert_filters, NULL, NULL, APR_HOOK_FIRST); + + ap_register_input_filter("DONTDOTHAT_FILTER", + dontdothat_filter, + NULL, + AP_FTYPE_RESOURCE); +} + +module AP_MODULE_DECLARE_DATA dontdothat_module = +{ + STANDARD20_MODULE_STUFF, + create_dontdothat_dir_config, + NULL, + NULL, + NULL, + dontdothat_cmds, + dontdothat_register_hooks +}; diff --git a/tools/server-side/svn-backup-dumps.py b/tools/server-side/svn-backup-dumps.py new file mode 100755 index 0000000..2f3a231 --- /dev/null +++ b/tools/server-side/svn-backup-dumps.py @@ -0,0 +1,692 @@ +#!/usr/bin/env python +# +# svn-backup-dumps.py -- Create dumpfiles to backup a subversion repository. +# +# ==================================================================== +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ==================================================================== +# +# This script creates dump files from a subversion repository. +# It is intended for use in cron jobs and post-commit hooks. +# +# The basic operation modes are: +# 1. Create a full dump (revisions 0 to HEAD). +# 2. Create incremental dumps containing at most N revisions. +# 3. Create incremental single revision dumps (for use in post-commit). +# 4. Create incremental dumps containing everything since last dump. +# +# All dump files are prefixed with the basename of the repository. All +# examples below assume that the repository '/srv/svn/repos/src' is +# dumped so all dumpfiles start with 'src'. +# +# Optional functionality: +# 5. Create gzipped dump files. +# 6. Create bzipped dump files. +# 7. Transfer the dumpfile to another host using ftp. +# 8. Transfer the dumpfile to another host using smb. +# +# See also 'svn-backup-dumps.py -h'. +# +# +# 1. Create a full dump (revisions 0 to HEAD). +# +# svn-backup-dumps.py <repos> <dumpdir> +# +# <repos> Path to the repository. +# <dumpdir> Directory for storing the dump file. +# +# This creates a dump file named 'src.000000-NNNNNN.svndmp.gz' +# where NNNNNN is the revision number of HEAD. +# +# +# 2. Create incremental dumps containing at most N revisions. +# +# svn-backup-dumps.py -c <count> <repos> <dumpdir> +# +# <count> Count of revisions per dump file. +# <repos> Path to the repository. +# <dumpdir> Directory for storing the dump file. +# +# When started the first time with a count of 1000 and if HEAD is +# at 2923 it creates the following files: +# +# src.000000-000999.svndmp.gz +# src.001000-001999.svndmp.gz +# src.002000-002923.svndmp.gz +# +# Say the next time HEAD is at 3045 it creates these two files: +# +# src.002000-002999.svndmp.gz +# src.003000-003045.svndmp.gz +# +# +# 3. Create incremental single revision dumps (for use in post-commit). +# +# svn-backup-dumps.py -r <revnr> <repos> <dumpdir> +# +# <revnr> A revision number. +# <repos> Path to the repository. +# <dumpdir> Directory for storing the dump file. +# +# This creates a dump file named 'src.NNNNNN.svndmp.gz' where +# NNNNNN is the revision number of HEAD. +# +# +# 4. Create incremental dumps relative to last dump +# +# svn-backup-dumps.py -i <repos> <dumpdir> +# +# <repos> Path to the repository. +# <dumpdir> Directory for storing the dump file. +# +# When if dumps are performed when HEAD is 2923, +# then when HEAD is 3045, is creates these files: +# +# src.000000-002923.svndmp.gz +# src.002924-003045.svndmp.gz +# +# +# 5. Create gzipped dump files. +# +# svn-backup-dumps.py -z ... +# +# ... More options, see 1-4, 7, 8. +# +# +# 6. Create bzipped dump files. +# +# svn-backup-dumps.py -b ... +# +# ... More options, see 1-4, 7, 8. +# +# +# 7. Transfer the dumpfile to another host using ftp. +# +# svn-backup-dumps.py -t ftp:<host>:<user>:<password>:<path> ... +# +# <host> Name of the FTP host. +# <user> Username on the remote host. +# <password> Password for the user. +# <path> Subdirectory on the remote host. +# ... More options, see 1-6. +# +# If <path> contains the string '%r' it is replaced by the +# repository name (basename of the repository path). +# +# +# 8. Transfer the dumpfile to another host using smb. +# +# svn-backup-dumps.py -t smb:<share>:<user>:<password>:<path> ... +# +# <share> Name of an SMB share in the form '//host/share'. +# <user> Username on the remote host. +# <password> Password for the user. +# <path> Subdirectory of the share. +# ... More options, see 1-6. +# +# If <path> contains the string '%r' it is replaced by the +# repository name (basename of the repository path). +# +# +# +# TODO: +# - find out how to report smbclient errors +# - improve documentation +# + +__version = "0.6" + +import sys +import os +if os.name != "nt": + import fcntl + import select +import gzip +import os.path +import re +from optparse import OptionParser +from ftplib import FTP +from subprocess import Popen, PIPE + +try: + import bz2 + have_bz2 = True +except ImportError: + have_bz2 = False + + +class SvnBackupOutput: + + def __init__(self, abspath, filename): + self.__filename = filename + self.__absfilename = os.path.join(abspath, filename) + + def open(self): + pass + + def write(self, data): + pass + + def close(self): + pass + + def get_filename(self): + return self.__filename + + def get_absfilename(self): + return self.__absfilename + + +class SvnBackupOutputPlain(SvnBackupOutput): + + def __init__(self, abspath, filename): + SvnBackupOutput.__init__(self, abspath, filename) + + def open(self): + self.__ofd = open(self.get_absfilename(), "wb") + + def write(self, data): + self.__ofd.write(data) + + def close(self): + self.__ofd.close() + + +class SvnBackupOutputGzip(SvnBackupOutput): + + def __init__(self, abspath, filename): + SvnBackupOutput.__init__(self, abspath, filename + ".gz") + + def open(self): + self.__compressor = gzip.GzipFile(filename=self.get_absfilename(), + mode="wb") + + def write(self, data): + self.__compressor.write(data) + + def close(self): + self.__compressor.flush() + self.__compressor.close() + + +class SvnBackupOutputBzip2(SvnBackupOutput): + + def __init__(self, abspath, filename): + SvnBackupOutput.__init__(self, abspath, filename + ".bz2") + + def open(self): + self.__compressor = bz2.BZ2Compressor() + self.__ofd = open(self.get_absfilename(), "wb") + + def write(self, data): + self.__ofd.write(self.__compressor.compress(data)) + + def close(self): + self.__ofd.write(self.__compressor.flush()) + self.__ofd.close() + +class SvnBackupOutputCommand(SvnBackupOutput): + + def __init__(self, abspath, filename, file_extension, cmd_path, + cmd_options): + SvnBackupOutput.__init__(self, abspath, filename + file_extension) + self.__cmd_path = cmd_path + self.__cmd_options = cmd_options + + def open(self): + cmd = [ self.__cmd_path, self.__cmd_options ] + + self.__ofd = open(self.get_absfilename(), "wb") + try: + proc = Popen(cmd, stdin=PIPE, stdout=self.__ofd, shell=False) + except: + print((256, "", "Popen failed (%s ...):\n %s" % (cmd[0], + str(sys.exc_info()[1])))) + sys.exit(256) + self.__proc = proc + self.__stdin = proc.stdin + + def write(self, data): + self.__stdin.write(data) + + def close(self): + self.__stdin.close() + rc = self.__proc.wait() + self.__ofd.close() + +class SvnBackupException(Exception): + + def __init__(self, errortext): + self.errortext = errortext + + def __str__(self): + return self.errortext + +class SvnBackup: + + def __init__(self, options, args): + # need 3 args: progname, reposname, dumpdir + if len(args) != 3: + if len(args) < 3: + raise SvnBackupException("too few arguments, specify" + " repospath and dumpdir.\nuse -h or" + " --help option to see help.") + else: + raise SvnBackupException("too many arguments, specify" + " repospath and dumpdir only.\nuse" + " -h or --help option to see help.") + self.__repospath = args[1] + self.__dumpdir = args[2] + # check repospath + rpathparts = os.path.split(self.__repospath) + if len(rpathparts[1]) == 0: + # repospath without trailing slash + self.__repospath = rpathparts[0] + if not os.path.exists(self.__repospath): + raise SvnBackupException("repos '%s' does not exist." % self.__repospath) + if not os.path.isdir(self.__repospath): + raise SvnBackupException("repos '%s' is not a directory." % self.__repospath) + for subdir in [ "db", "conf", "hooks" ]: + dir = os.path.join(self.__repospath, subdir) + if not os.path.isdir(dir): + raise SvnBackupException("repos '%s' is not a repository." % self.__repospath) + rpathparts = os.path.split(self.__repospath) + self.__reposname = rpathparts[1] + if self.__reposname in [ "", ".", ".." ]: + raise SvnBackupException("couldn't extract repos name from '%s'." % self.__repospath) + # check dumpdir + if not os.path.exists(self.__dumpdir): + raise SvnBackupException("dumpdir '%s' does not exist." % self.__dumpdir) + elif not os.path.isdir(self.__dumpdir): + raise SvnBackupException("dumpdir '%s' is not a directory." % self.__dumpdir) + # set options + self.__rev_nr = options.rev + self.__count = options.cnt + self.__quiet = options.quiet + self.__deltas = options.deltas + self.__relative_incremental = options.relative_incremental + + # svnadmin/svnlook path + self.__svnadmin_path = "svnadmin" + if options.svnadmin_path: + self.__svnadmin_path = options.svnadmin_path + self.__svnlook_path = "svnlook" + if options.svnlook_path: + self.__svnlook_path = options.svnlook_path + + # check compress option + self.__gzip_path = options.gzip_path + self.__bzip2_path = options.bzip2_path + self.__zip = None + compress_options = 0 + if options.gzip_path != None: + compress_options = compress_options + 1 + if options.bzip2_path != None: + compress_options = compress_options + 1 + if options.bzip2: + compress_options = compress_options + 1 + self.__zip = "bzip2" + if options.gzip: + compress_options = compress_options + 1 + self.__zip = "gzip" + if compress_options > 1: + raise SvnBackupException("--bzip2-path, --gzip-path, -b, -z are " + "mutually exclusive.") + + self.__overwrite = False + self.__overwrite_all = False + if options.overwrite > 0: + self.__overwrite = True + if options.overwrite > 1: + self.__overwrite_all = True + self.__transfer = None + if options.transfer != None: + self.__transfer = options.transfer.split(":") + if len(self.__transfer) != 5: + if len(self.__transfer) < 5: + raise SvnBackupException("too few fields for transfer '%s'." % self.__transfer) + else: + raise SvnBackupException("too many fields for transfer '%s'." % self.__transfer) + if self.__transfer[0] not in [ "ftp", "smb" ]: + raise SvnBackupException("unknown transfer method '%s'." % self.__transfer[0]) + + def set_nonblock(self, fileobj): + fd = fileobj.fileno() + n = fcntl.fcntl(fd, fcntl.F_GETFL) + fcntl.fcntl(fd, fcntl.F_SETFL, n|os.O_NONBLOCK) + + def exec_cmd(self, cmd, output=None, printerr=False): + if os.name == "nt": + return self.exec_cmd_nt(cmd, output, printerr) + else: + return self.exec_cmd_unix(cmd, output, printerr) + + def exec_cmd_unix(self, cmd, output=None, printerr=False): + try: + proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=False) + except: + return (256, "", "Popen failed (%s ...):\n %s" % (cmd[0], + str(sys.exc_info()[1]))) + stdout = proc.stdout + stderr = proc.stderr + self.set_nonblock(stdout) + self.set_nonblock(stderr) + readfds = [ stdout, stderr ] + selres = select.select(readfds, [], []) + bufout = "" + buferr = "" + while len(selres[0]) > 0: + for fd in selres[0]: + buf = fd.read(16384) + if len(buf) == 0: + readfds.remove(fd) + elif fd == stdout: + if output: + output.write(buf) + else: + bufout += buf + else: + if printerr: + sys.stdout.write("%s " % buf) + else: + buferr += buf + if len(readfds) == 0: + break + selres = select.select(readfds, [], []) + rc = proc.wait() + if printerr: + print("") + return (rc, bufout, buferr) + + def exec_cmd_nt(self, cmd, output=None, printerr=False): + try: + proc = Popen(cmd, stdout=PIPE, stderr=None, shell=False) + except: + return (256, "", "Popen failed (%s ...):\n %s" % (cmd[0], + str(sys.exc_info()[1]))) + stdout = proc.stdout + bufout = "" + buferr = "" + buf = stdout.read(16384) + while len(buf) > 0: + if output: + output.write(buf) + else: + bufout += buf + buf = stdout.read(16384) + rc = proc.wait() + return (rc, bufout, buferr) + + def get_head_rev(self): + cmd = [ self.__svnlook_path, "youngest", self.__repospath ] + r = self.exec_cmd(cmd) + if r[0] == 0 and len(r[2]) == 0: + return int(r[1].strip()) + else: + print(r[2]) + return -1 + + def get_last_dumped_rev(self): + filename_regex = re.compile("(.+)\.\d+-(\d+)\.svndmp.*") + # start with -1 so the next one will be rev 0 + highest_rev = -1 + + for filename in os.listdir(self.__dumpdir): + m = filename_regex.match( filename ) + if m and (m.group(1) == self.__reposname): + rev_end = int(m.group(2)) + + if rev_end > highest_rev: + # determine the latest revision dumped + highest_rev = rev_end + + return highest_rev + + def transfer_ftp(self, absfilename, filename): + rc = False + try: + host = self.__transfer[1] + user = self.__transfer[2] + passwd = self.__transfer[3] + destdir = self.__transfer[4].replace("%r", self.__reposname) + ftp = FTP(host, user, passwd) + ftp.cwd(destdir) + ifd = open(absfilename, "rb") + ftp.storbinary("STOR %s" % filename, ifd) + ftp.quit() + rc = len(ifd.read(1)) == 0 + ifd.close() + except Exception as e: + raise SvnBackupException("ftp transfer failed:\n file: '%s'\n error: %s" % \ + (absfilename, str(e))) + return rc + + def transfer_smb(self, absfilename, filename): + share = self.__transfer[1] + user = self.__transfer[2] + passwd = self.__transfer[3] + if passwd == "": + passwd = "-N" + destdir = self.__transfer[4].replace("%r", self.__reposname) + cmd = ("smbclient", share, "-U", user, passwd, "-D", destdir, + "-c", "put %s %s" % (absfilename, filename)) + r = self.exec_cmd(cmd) + rc = r[0] == 0 + if not rc: + print(r[2]) + return rc + + def transfer(self, absfilename, filename): + if self.__transfer == None: + return + elif self.__transfer[0] == "ftp": + self.transfer_ftp(absfilename, filename) + elif self.__transfer[0] == "smb": + self.transfer_smb(absfilename, filename) + else: + print("unknown transfer method '%s'." % self.__transfer[0]) + + def create_dump(self, checkonly, overwrite, fromrev, torev=None): + revparam = "%d" % fromrev + r = "%06d" % fromrev + if torev != None: + revparam += ":%d" % torev + r += "-%06d" % torev + filename = "%s.%s.svndmp" % (self.__reposname, r) + output = None + if self.__bzip2_path: + output = SvnBackupOutputCommand(self.__dumpdir, filename, ".bz2", + self.__bzip2_path, "-cz" ) + elif self.__gzip_path: + output = SvnBackupOutputCommand(self.__dumpdir, filename, ".gz", + self.__gzip_path, "-cf" ) + elif self.__zip: + if self.__zip == "gzip": + output = SvnBackupOutputGzip(self.__dumpdir, filename) + else: + output = SvnBackupOutputBzip2(self.__dumpdir, filename) + else: + output = SvnBackupOutputPlain(self.__dumpdir, filename) + absfilename = output.get_absfilename() + realfilename = output.get_filename() + if checkonly: + return os.path.exists(absfilename) + elif os.path.exists(absfilename): + if overwrite: + print("overwriting " + absfilename) + else: + print("%s already exists." % absfilename) + return True + else: + print("writing " + absfilename) + cmd = [ self.__svnadmin_path, "dump", + "--incremental", "-r", revparam, self.__repospath ] + if self.__quiet: + cmd[2:2] = [ "-q" ] + if self.__deltas: + cmd[2:2] = [ "--deltas" ] + output.open() + r = self.exec_cmd(cmd, output, True) + output.close() + rc = r[0] == 0 + if rc: + self.transfer(absfilename, realfilename) + return rc + + def export_single_rev(self): + return self.create_dump(False, self.__overwrite, self.__rev_nr) + + def export(self): + headrev = self.get_head_rev() + if headrev == -1: + return False + if self.__count is None: + return self.create_dump(False, self.__overwrite, 0, headrev) + baserev = headrev - (headrev % self.__count) + rc = True + cnt = self.__count + fromrev = baserev - cnt + torev = baserev - 1 + while fromrev >= 0 and rc: + if self.__overwrite_all or \ + not self.create_dump(True, False, fromrev, torev): + rc = self.create_dump(False, self.__overwrite_all, + fromrev, torev) + fromrev -= cnt + torev -= cnt + else: + fromrev = -1 + if rc: + rc = self.create_dump(False, self.__overwrite, baserev, headrev) + return rc + + def export_relative_incremental(self): + headrev = self.get_head_rev() + if headrev == -1: + return False + + last_dumped_rev = self.get_last_dumped_rev(); + if headrev < last_dumped_rev: + # that should not happen... + return False + + if headrev == last_dumped_rev: + # already up-to-date + return True + + return self.create_dump(False, False, last_dumped_rev + 1, headrev) + + def execute(self): + if self.__rev_nr != None: + return self.export_single_rev() + elif self.__relative_incremental: + return self.export_relative_incremental() + else: + return self.export() + + +if __name__ == "__main__": + usage = "usage: svn-backup-dumps.py [options] repospath dumpdir" + parser = OptionParser(usage=usage, version="%prog "+__version) + if have_bz2: + parser.add_option("-b", + action="store_true", + dest="bzip2", default=False, + help="compress the dump using python bzip2 library.") + parser.add_option("-i", + action="store_true", + dest="relative_incremental", default=False, + help="perform incremental relative to last dump.") + parser.add_option("--deltas", + action="store_true", + dest="deltas", default=False, + help="pass --deltas to svnadmin dump.") + parser.add_option("-c", + action="store", type="int", + dest="cnt", default=None, + help="count of revisions per dumpfile.") + parser.add_option("-o", + action="store_const", const=1, + dest="overwrite", default=0, + help="overwrite files.") + parser.add_option("-O", + action="store_const", const=2, + dest="overwrite", default=0, + help="overwrite all files.") + parser.add_option("-q", + action="store_true", + dest="quiet", default=False, + help="quiet.") + parser.add_option("-r", + action="store", type="int", + dest="rev", default=None, + help="revision number for single rev dump.") + parser.add_option("-t", + action="store", type="string", + dest="transfer", default=None, + help="transfer dumps to another machine "+ + "(s.a. --help-transfer).") + parser.add_option("-z", + action="store_true", + dest="gzip", default=False, + help="compress the dump using python gzip library.") + parser.add_option("--bzip2-path", + action="store", type="string", + dest="bzip2_path", default=None, + help="compress the dump using bzip2 custom command.") + parser.add_option("--gzip-path", + action="store", type="string", + dest="gzip_path", default=None, + help="compress the dump using gzip custom command.") + parser.add_option("--svnadmin-path", + action="store", type="string", + dest="svnadmin_path", default=None, + help="svnadmin command path.") + parser.add_option("--svnlook-path", + action="store", type="string", + dest="svnlook_path", default=None, + help="svnlook command path.") + parser.add_option("--help-transfer", + action="store_true", + dest="help_transfer", default=False, + help="shows detailed help for the transfer option.") + (options, args) = parser.parse_args(sys.argv) + if options.help_transfer: + print("Transfer help:") + print("") + print(" FTP:") + print(" -t ftp:<host>:<user>:<password>:<dest-path>") + print("") + print(" SMB (using smbclient):") + print(" -t smb:<share>:<user>:<password>:<dest-path>") + print("") + sys.exit(0) + rc = False + try: + backup = SvnBackup(options, args) + rc = backup.execute() + except SvnBackupException as e: + print("svn-backup-dumps.py: %s" % e) + if rc: + print("Everything OK.") + sys.exit(0) + else: + print("An error occurred!") + sys.exit(1) + +# vim:et:ts=4:sw=4 diff --git a/tools/server-side/svn-populate-node-origins-index.c b/tools/server-side/svn-populate-node-origins-index.c new file mode 100644 index 0000000..18514b8 --- /dev/null +++ b/tools/server-side/svn-populate-node-origins-index.c @@ -0,0 +1,187 @@ +/* + * svn-populate-node-origins-index.c : Populate the repository's node + * origins index. + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include "svn_cmdline.h" +#include "svn_error.h" +#include "svn_fs.h" +#include "svn_path.h" +#include "svn_pools.h" +#include "svn_repos.h" +#include "svn_utf.h" + +/* Used to terminate lines in large multi-line string literals. */ +#define NL APR_EOL_STR + +static const char *usage_summary = + "Crawl the Subversion repository located at REPOS-PATH in an attempt to" NL + "populate that repository's index of node origins. " NL + "" NL + "The node origins index is new as of Subversion 1.5, and behaves as a" NL + "cache to vastly speed up certain history-querying operations. For" NL + "compatibility with repositories created with pre-1.5 versions of" NL + "Subversion, Subversion will gracefully handle cache misses by doing a" NL + "brute-force calculation of the query answer and lazily populating the" NL + "index with answers it calculates. Unfortunately, calculating that" NL + "information using the brute-force method (instead of having the" NL + "information appear in the index organically) can be very costly." NL + "" NL + "This tool triggers the lazy index population logic built into" NL + "Subversion in a fashion far more efficient than is likely to happen" NL + "during typical repository usage. It can be run while the repository" NL + "is online, too, without interrupting normal Subversion activities." NL; + +/* Print a usage message for this program (PROGNAME), possibly with an + error message ERR_MSG, if not NULL. */ +static void +usage_maybe_with_err(const char *progname, const char *err_msg) +{ + FILE *out; + + out = err_msg ? stderr : stdout; + fprintf(out, "Usage: %s REPOS-PATH\n\n%s", progname, usage_summary); + if (err_msg) + fprintf(out, "\nERROR: %s\n", err_msg); +} + +/* Build the node-origins index any newly added items introduced in + REVISION in FS. Set *COUNT to the number of new items found. */ +static svn_error_t * +index_revision_adds(int *count, svn_fs_t *fs, + svn_revnum_t revision, apr_pool_t *pool) +{ + svn_fs_root_t *root; + apr_pool_t *subpool = svn_pool_create(pool); + + svn_fs_path_change_iterator_t *iterator; + svn_fs_path_change3_t *change; + + *count = 0; + SVN_ERR(svn_fs_revision_root(&root, fs, revision, pool)); + SVN_ERR(svn_fs_paths_changed3(&iterator, root, pool, subpool)); + SVN_ERR(svn_fs_path_change_get(&change, iterator)); + + while (change) + { + svn_pool_clear(subpool); + if ((change->change_kind == svn_fs_path_change_add) + || (change->change_kind == svn_fs_path_change_replace)) + { + if (! (change->copyfrom_path + && SVN_IS_VALID_REVNUM(change->copyfrom_rev))) + { + svn_revnum_t origin; + SVN_ERR(svn_fs_node_origin_rev(&origin, root, + change->path.data, subpool)); + (*count)++; + } + } + + SVN_ERR(svn_fs_path_change_get(&change, iterator)); + } + svn_pool_destroy(subpool); + + return SVN_NO_ERROR; +} + +/* Build the node-origins index for the repository located at REPOS_PATH. */ +static svn_error_t * +build_index(const char *repos_path, apr_pool_t *pool) +{ + svn_repos_t *repos; + svn_fs_t *fs; + svn_revnum_t youngest_rev, i; + size_t slotsize; + const char *progress_fmt; + apr_pool_t *subpool; + + /* Open the repository. */ + SVN_ERR(svn_repos_open3(&repos, repos_path, NULL, pool, pool)); + + /* Get a filesystem object. */ + fs = svn_repos_fs(repos); + + /* Fetch the youngest revision of the repository. */ + SVN_ERR(svn_fs_youngest_rev(&youngest_rev, fs, pool)); + slotsize = strlen(apr_ltoa(pool, youngest_rev)); + progress_fmt = apr_psprintf + (pool, + "[%%%" APR_SIZE_T_FMT "ld" + "/%%%" APR_SIZE_T_FMT "ld] " + "Found %%d new lines of history." + "\n", slotsize, slotsize); + + /* Now, iterate over all the revisions, calling index_revision_adds(). */ + subpool = svn_pool_create(pool); + for (i = 0; i < youngest_rev; i++) + { + int count; + svn_pool_clear(subpool); + SVN_ERR(index_revision_adds(&count, fs, i + 1, subpool)); + printf(progress_fmt, i + 1, youngest_rev, count); + } + svn_pool_destroy(subpool); + + return SVN_NO_ERROR; +} + + +int +main(int argc, const char **argv) +{ + apr_pool_t *pool; + svn_error_t *err = SVN_NO_ERROR; + const char *repos_path; + + /* Initialize the app. Send all error messages to 'stderr'. */ + if (svn_cmdline_init(argv[0], stderr) == EXIT_FAILURE) + return EXIT_FAILURE; + + pool = svn_pool_create(NULL); + + if (argc <= 1) + { + usage_maybe_with_err(argv[0], "Not enough arguments."); + goto cleanup; + } + + /* Convert argv[1] into a UTF8, internal-format, canonicalized path. */ + if ((err = svn_utf_cstring_to_utf8(&repos_path, argv[1], pool))) + goto cleanup; + repos_path = svn_dirent_internal_style(repos_path, pool); + repos_path = svn_dirent_canonicalize(repos_path, pool); + + if ((err = build_index(repos_path, pool))) + goto cleanup; + + cleanup: + svn_pool_destroy(pool); + + if (err) + { + svn_handle_error2(err, stderr, FALSE, + "svn-populate-node-origins-index: "); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} diff --git a/tools/server-side/svn_server_log_parse.py b/tools/server-side/svn_server_log_parse.py new file mode 100755 index 0000000..5ecb104 --- /dev/null +++ b/tools/server-side/svn_server_log_parse.py @@ -0,0 +1,460 @@ +#!/usr/bin/python + +# ==================================================================== +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ==================================================================== + +# TODO: Teach parse_open about capabilities, rather than allowing any +# words at all. + +"""Parse subversion server operational logs. + +SVN-ACTION strings +------------------ + +Angle brackets denote a variable, e.g. 'commit r<N>' means you'll see +lines like 'commit r17' for this action. + +<N> and <M> are revision numbers. + +<PATH>, <FROM-PATH>, and <TO-PATH> mean a URI-encoded path relative to +the repository root, including a leading '/'. + +<REVPROP> means a revision property, e.g. 'svn:log'. + +<I> represents a svn_mergeinfo_inheritance_t value and is one of these +words: explicit inherited nearest-ancestor. + +<D> represents a svn_depth_t value and is one of these words: empty +files immediates infinity. If the depth value for the operation was +svn_depth_unknown, the depth= portion is absent entirely. + +The get-mergeinfo and log actions use lists for paths and revprops. +The lists are enclosed in parentheses and each item is separated by a +space (spaces in paths are encoded as %20). + +The words will *always* be in this order, though some may be absent. + +General:: + + change-rev-prop r<N> <REVPROP> + commit r<N> + get-dir <PATH> r<N> text? props? + get-file <PATH> r<N> text? props? + lock (<PATH> ...) steal? + rev-proplist r<N> + unlock (<PATH> ...) break? + +Reports:: + + get-file-revs <PATH> r<N>:<M> include-merged-revisions? + get-mergeinfo (<PATH> ...) <I> include-descendants? + log (<PATH> ...) r<N>:<M> limit=<N>? discover-changed-paths? strict? include-merged-revisions? revprops=all|(<REVPROP> ...)? + replay <PATH> r<N> + +The update report:: + + checkout-or-export <PATH> r<N> depth=<D>? + diff <FROM-PATH>@<N> <TO-PATH>@<M> depth=<D>? ignore-ancestry? + diff <PATH> r<N>:<M> depth=<D>? ignore-ancestry? + status <PATH> r<N> depth=<D>? + switch <FROM-PATH> <TO-PATH>@<N> depth=<D>? + update <PATH> r<N> depth=<D>? send-copyfrom-args? +""" + + +import re +try: + # Python >=3.0 + from urllib.parse import unquote as urllib_parse_unquote +except ImportError: + # Python <3.0 + from urllib import unquote as urllib_parse_unquote + +import svn.core + +# +# Valid words for _parse_depth and _parse_mergeinfo_inheritance +# + +DEPTH_WORDS = ['empty', 'files', 'immediates', 'infinity'] +INHERITANCE_WORDS = { + 'explicit': svn.core.svn_mergeinfo_explicit, + 'inherited': svn.core.svn_mergeinfo_inherited, + 'nearest-ancestor': svn.core.svn_mergeinfo_nearest_ancestor, +} + +# +# Patterns for _match +# + +# <PATH> +pPATH = r'(/\S*)' +# (<PATH> ...) +pPATHS = r'\(([^)]*)\)' +# r<N> +pREVNUM = r'r(\d+)' +# (<N> ...) +pREVNUMS = r'\(((\d+\s*)*)\)' +# r<N>:<M> +pREVRANGE = r'r(-?\d+):(-?\d+)' +# <PATH>@<N> +pPATHREV = pPATH + r'@(\d+)' +pWORD = r'(\S+)' +pPROPERTY = pWORD +# depth=<D>? +pDEPTH = 'depth=' + pWORD + +# +# Exceptions +# + +class Error(Exception): pass +class BadDepthError(Error): + def __init__(self, value): + Error.__init__(self, 'bad svn_depth_t value ' + value) +class BadMergeinfoInheritanceError(Error): + def __init__(self, value): + Error.__init__(self, 'bad svn_mergeinfo_inheritance_t value ' + value) +class MatchError(Error): + def __init__(self, pattern, line): + Error.__init__(self, '/%s/ does not match log line:\n%s' + % (pattern, line)) + + +# +# Helper functions +# + +# TODO: Move to kitchensink.c like svn_depth_from_word? +try: + from svn.core import svn_inheritance_from_word +except ImportError: + def svn_inheritance_from_word(word): + try: + return INHERITANCE_WORDS[word] + except KeyError: + # XXX svn_inheritance_to_word uses explicit as default so... + return svn.core.svn_mergeinfo_explicit + +def _parse_depth(word): + if word is None: + return svn.core.svn_depth_unknown + if word not in DEPTH_WORDS: + raise BadDepthError(word) + return svn.core.svn_depth_from_word(word) + +def _parse_mergeinfo_inheritance(word): + if word not in INHERITANCE_WORDS: + raise BadMergeinfoInheritanceError(word) + return svn_inheritance_from_word(word) + +def _match(line, *patterns): + """Return a re.match object from matching patterns against line. + + All optional arguments must be strings suitable for ''.join()ing + into a single pattern string for re.match. The last optional + argument may instead be a list of such strings, which will be + joined into the final pattern as *optional* matches. + + Raises: + Error -- if re.match returns None (i.e. no match) + """ + if isinstance(patterns[-1], list): + optional = patterns[-1] + patterns = patterns[:-1] + else: + optional = [] + pattern = r'\s+'.join(patterns) + pattern += ''.join([r'(\s+' + x + ')?' for x in optional]) + m = re.match(pattern, line) + if m is None: + raise MatchError(pattern, line) + return m + + +class Parser(object): + """Subclass this and define the handle_ methods according to the + "SVN-ACTION strings" section of this module's documentation. For + example, "lock <PATH> steal?" => def handle_lock(self, path, steal) + where steal will be True if "steal" was present. + + See the end of test_svn_server_log_parse.py for a complete example. + """ + def parse(self, line): + """Parse line and call appropriate handle_ method. + + Returns one of: + - line remaining after the svn action, if one was parsed + - whatever your handle_unknown implementation returns + + Raises: + BadDepthError -- for bad svn_depth_t values + BadMergeinfoInheritanceError -- for bad svn_mergeinfo_inheritance_t + values + Error -- any other parse error + """ + self.line = line + words = self.split_line = line.split(' ') + try: + method = getattr(self, '_parse_' + words[0].replace('-', '_')) + except AttributeError: + return self.handle_unknown(self.line) + return method(' '.join(words[1:])) + + def _parse_commit(self, line): + m = _match(line, pREVNUM) + self.handle_commit(int(m.group(1))) + return line[m.end():] + + def _parse_open(self, line): + pINT = r'(\d+)' + pCAP = r'cap=\(([^)]*)\)' + pCLIENT = pWORD + m = _match(line, pINT, pCAP, pPATH, pCLIENT, pCLIENT) + protocol = int(m.group(1)) + if m.group(2) is None: + capabilities = [] + else: + capabilities = m.group(2).split() + path = m.group(3) + ra_client = urllib_parse_unquote(m.group(4)) + client = urllib_parse_unquote(m.group(5)) + self.handle_open(protocol, capabilities, path, ra_client, client) + return line[m.end():] + + def _parse_reparent(self, line): + m = _match(line, pPATH) + self.handle_reparent(urllib_parse_unquote(m.group(1))) + return line[m.end():] + + def _parse_get_latest_rev(self, line): + self.handle_get_latest_rev() + return line + + def _parse_get_dated_rev(self, line): + m = _match(line, pWORD) + self.handle_get_dated_rev(m.group(1)) + return line[m.end():] + + def _parse_get_dir(self, line): + m = _match(line, pPATH, pREVNUM, ['text', 'props']) + self.handle_get_dir(urllib_parse_unquote(m.group(1)), int(m.group(2)), + m.group(3) is not None, + m.group(4) is not None) + return line[m.end():] + + def _parse_get_file(self, line): + m = _match(line, pPATH, pREVNUM, ['text', 'props']) + self.handle_get_file(urllib_parse_unquote(m.group(1)), int(m.group(2)), + m.group(3) is not None, + m.group(4) is not None) + return line[m.end():] + + def _parse_lock(self, line): + m = _match(line, pPATHS, ['steal']) + paths = [urllib_parse_unquote(x) for x in m.group(1).split()] + self.handle_lock(paths, m.group(2) is not None) + return line[m.end():] + + def _parse_change_rev_prop(self, line): + m = _match(line, pREVNUM, pPROPERTY) + self.handle_change_rev_prop(int(m.group(1)), + urllib_parse_unquote(m.group(2))) + return line[m.end():] + + def _parse_rev_proplist(self, line): + m = _match(line, pREVNUM) + self.handle_rev_proplist(int(m.group(1))) + return line[m.end():] + + def _parse_rev_prop(self, line): + m = _match(line, pREVNUM, pPROPERTY) + self.handle_rev_prop(int(m.group(1)), urllib_parse_unquote(m.group(2))) + return line[m.end():] + + def _parse_unlock(self, line): + m = _match(line, pPATHS, ['break']) + paths = [urllib_parse_unquote(x) for x in m.group(1).split()] + self.handle_unlock(paths, m.group(2) is not None) + return line[m.end():] + + def _parse_get_lock(self, line): + m = _match(line, pPATH) + self.handle_get_lock(urllib_parse_unquote(m.group(1))) + return line[m.end():] + + def _parse_get_locks(self, line): + m = _match(line, pPATH) + self.handle_get_locks(urllib_parse_unquote(m.group(1))) + return line[m.end():] + + def _parse_get_locations(self, line): + m = _match(line, pPATH, pREVNUMS) + path = urllib_parse_unquote(m.group(1)) + revnums = [int(x) for x in m.group(2).split()] + self.handle_get_locations(path, revnums) + return line[m.end():] + + def _parse_get_location_segments(self, line): + m = _match(line, pPATHREV, pREVRANGE) + path = urllib_parse_unquote(m.group(1)) + peg = int(m.group(2)) + left = int(m.group(3)) + right = int(m.group(4)) + self.handle_get_location_segments(path, peg, left, right) + return line[m.end():] + + def _parse_get_file_revs(self, line): + m = _match(line, pPATH, pREVRANGE, ['include-merged-revisions']) + path = urllib_parse_unquote(m.group(1)) + left = int(m.group(2)) + right = int(m.group(3)) + include_merged_revisions = m.group(4) is not None + self.handle_get_file_revs(path, left, right, include_merged_revisions) + return line[m.end():] + + def _parse_get_mergeinfo(self, line): + # <I> + pMERGEINFO_INHERITANCE = pWORD + pINCLUDE_DESCENDANTS = pWORD + m = _match(line, + pPATHS, pMERGEINFO_INHERITANCE, ['include-descendants']) + paths = [urllib_parse_unquote(x) for x in m.group(1).split()] + inheritance = _parse_mergeinfo_inheritance(m.group(2)) + include_descendants = m.group(3) is not None + self.handle_get_mergeinfo(paths, inheritance, include_descendants) + return line[m.end():] + + def _parse_log(self, line): + # limit=<N>? + pLIMIT = r'limit=(\d+)' + # revprops=all|(<REVPROP> ...)? + pREVPROPS = r'revprops=(all|\(([^)]+)\))' + m = _match(line, pPATHS, pREVRANGE, + [pLIMIT, 'discover-changed-paths', 'strict', + 'include-merged-revisions', pREVPROPS]) + paths = [urllib_parse_unquote(x) for x in m.group(1).split()] + left = int(m.group(2)) + right = int(m.group(3)) + if m.group(5) is None: + limit = 0 + else: + limit = int(m.group(5)) + discover_changed_paths = m.group(6) is not None + strict = m.group(7) is not None + include_merged_revisions = m.group(8) is not None + if m.group(10) == 'all': + revprops = None + else: + if m.group(11) is None: + revprops = [] + else: + revprops = [urllib_parse_unquote(x) for x in m.group(11).split()] + self.handle_log(paths, left, right, limit, discover_changed_paths, + strict, include_merged_revisions, revprops) + return line[m.end():] + + def _parse_check_path(self, line): + m = _match(line, pPATHREV) + path = urllib_parse_unquote(m.group(1)) + revnum = int(m.group(2)) + self.handle_check_path(path, revnum) + return line[m.end():] + + def _parse_stat(self, line): + m = _match(line, pPATHREV) + path = urllib_parse_unquote(m.group(1)) + revnum = int(m.group(2)) + self.handle_stat(path, revnum) + return line[m.end():] + + def _parse_replay(self, line): + m = _match(line, pPATH, pREVNUM) + path = urllib_parse_unquote(m.group(1)) + revision = int(m.group(2)) + self.handle_replay(path, revision) + return line[m.end():] + + # the update report + + def _parse_checkout_or_export(self, line): + m = _match(line, pPATH, pREVNUM, [pDEPTH]) + path = urllib_parse_unquote(m.group(1)) + revision = int(m.group(2)) + depth = _parse_depth(m.group(4)) + self.handle_checkout_or_export(path, revision, depth) + return line[m.end():] + + def _parse_diff(self, line): + # First, try 1-path form. + try: + m = _match(line, pPATH, pREVRANGE, [pDEPTH, 'ignore-ancestry']) + f = self._parse_diff_1path + except Error: + # OK, how about 2-path form? + m = _match(line, pPATHREV, pPATHREV, [pDEPTH, 'ignore-ancestry']) + f = self._parse_diff_2paths + return f(line, m) + + def _parse_diff_1path(self, line, m): + path = urllib_parse_unquote(m.group(1)) + left = int(m.group(2)) + right = int(m.group(3)) + depth = _parse_depth(m.group(5)) + ignore_ancestry = m.group(6) is not None + self.handle_diff_1path(path, left, right, + depth, ignore_ancestry) + return line[m.end():] + + def _parse_diff_2paths(self, line, m): + from_path = urllib_parse_unquote(m.group(1)) + from_rev = int(m.group(2)) + to_path = urllib_parse_unquote(m.group(3)) + to_rev = int(m.group(4)) + depth = _parse_depth(m.group(6)) + ignore_ancestry = m.group(7) is not None + self.handle_diff_2paths(from_path, from_rev, to_path, to_rev, + depth, ignore_ancestry) + return line[m.end():] + + def _parse_status(self, line): + m = _match(line, pPATH, pREVNUM, [pDEPTH]) + path = urllib_parse_unquote(m.group(1)) + revision = int(m.group(2)) + depth = _parse_depth(m.group(4)) + self.handle_status(path, revision, depth) + return line[m.end():] + + def _parse_switch(self, line): + m = _match(line, pPATH, pPATHREV, [pDEPTH]) + from_path = urllib_parse_unquote(m.group(1)) + to_path = urllib_parse_unquote(m.group(2)) + to_rev = int(m.group(3)) + depth = _parse_depth(m.group(5)) + self.handle_switch(from_path, to_path, to_rev, depth) + return line[m.end():] + + def _parse_update(self, line): + m = _match(line, pPATH, pREVNUM, [pDEPTH, 'send-copyfrom-args']) + path = urllib_parse_unquote(m.group(1)) + revision = int(m.group(2)) + depth = _parse_depth(m.group(4)) + send_copyfrom_args = m.group(5) is not None + self.handle_update(path, revision, depth, send_copyfrom_args) + return line[m.end():] diff --git a/tools/server-side/svnauthz.c b/tools/server-side/svnauthz.c new file mode 100644 index 0000000..fc6cd89 --- /dev/null +++ b/tools/server-side/svnauthz.c @@ -0,0 +1,745 @@ +/* + * svnauthz.c : Tool for working with authz files. + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include "svn_cmdline.h" +#include "svn_dirent_uri.h" +#include "svn_opt.h" +#include "svn_pools.h" +#include "svn_repos.h" +#include "svn_utf.h" +#include "svn_path.h" + +#include "private/svn_fspath.h" +#include "private/svn_cmdline_private.h" + + +/*** Option Processing. ***/ + +enum svnauthz__cmdline_options_t +{ + svnauthz__version = SVN_OPT_FIRST_LONGOPT_ID, + svnauthz__username, + svnauthz__path, + svnauthz__repos, + svnauthz__is, + svnauthz__groups_file +}; + +/* Option codes and descriptions. + * + * The entire list must be terminated with an entry of nulls. + */ +static const apr_getopt_option_t options_table[] = +{ + {"help", 'h', 0, ("show help on a subcommand")}, + {NULL, '?', 0, ("show help on a subcommand")}, + {"version", svnauthz__version, 0, ("show program version information")}, + {"username", svnauthz__username, 1, ("username to check access of")}, + {"path", svnauthz__path, 1, ("path within repository to check access of")}, + {"repository", svnauthz__repos, 1, ("repository authz name")}, + {"transaction", 't', 1, ("transaction id")}, + {"is", svnauthz__is, 1, + ("instead of outputting, test if the access is\n" + " " + "exactly ARG\n" + " " + "ARG can be one of the following values:\n" + " " + " rw write access (which also implies read)\n" + " " + " r read-only access\n" + " " + " no no access") + }, + {"groups-file", svnauthz__groups_file, 1, + ("use the groups from file ARG")}, + {"recursive", 'R', 0, + ("determine recursive access to PATH")}, + {0, 0, 0, 0} +}; + +struct svnauthz_opt_state +{ + svn_boolean_t help; + svn_boolean_t version; + svn_boolean_t recursive; + const char *authz_file; + const char *groups_file; + const char *username; + const char *fspath; + const char *repos_name; + const char *txn; + const char *repos_path; + const char *is; +}; + +/* The name of this binary in 1.7 and earlier. */ +#define SVNAUTHZ_COMPAT_NAME "svnauthz-validate" + +/* Libtool command prefix */ +#define SVNAUTHZ_LT_PREFIX "lt-" + + +/*** Subcommands. */ + +static svn_opt_subcommand_t + subcommand_help, + subcommand_validate, + subcommand_accessof; + +/* Array of available subcommands. + * The entire list must be terminated with an entry of nulls. + */ +static const svn_opt_subcommand_desc2_t cmd_table[] = +{ + {"help", subcommand_help, {"?", "h"}, + ("usage: svnauthz help [SUBCOMMAND...]\n\n" + "Describe the usage of this program or its subcommands.\n"), + {0} }, + {"validate", subcommand_validate, {0} /* no aliases */, + ("Checks the syntax of an authz file.\n" + "usage: 1. svnauthz validate TARGET\n" + " 2. svnauthz validate --transaction TXN REPOS_PATH FILE_PATH\n\n" + " 1. Loads and validates the syntax of the authz file at TARGET.\n" + " TARGET can be a path to a file or an absolute file:// URL to an authz\n" + " file in a repository, but cannot be a repository relative URL (^/).\n\n" + " 2. Loads and validates the syntax of the authz file at FILE_PATH in the\n" + " transaction TXN in the repository at REPOS_PATH.\n\n" + "Returns:\n" + " 0 when syntax is OK.\n" + " 1 when syntax is invalid.\n" + " 2 operational error\n" + ), + {'t'} }, + {"accessof", subcommand_accessof, {0} /* no aliases */, + ("Print or test the permissions set by an authz file.\n" + "usage: 1. svnauthz accessof TARGET\n" + " 2. svnauthz accessof -t TXN REPOS_PATH FILE_PATH\n" + "\n" + " 1. Prints the access of USER to PATH based on authorization file at TARGET.\n" + " TARGET can be a path to a file or an absolute file:// URL to an authz\n" + " file in a repository, but cannot be a repository relative URL (^/).\n" + "\n" + " 2. Prints the access of USER to PATH based on authz file at FILE_PATH in the\n" + " transaction TXN in the repository at REPOS_PATH.\n" + "\n" + " USER is the argument to the --username option; if that option is not\n" + " provided, then access of an anonymous user will be printed or tested.\n" + "\n" + " PATH is the argument to the --path option; if that option is not provided,\n" + " the maximal access to any path in the repository will be considered.\n" + "\n" + "Outputs one of the following:\n" + " rw write access (which also implies read)\n" + " r read access\n" + " no no access\n" + "\n" + "Returns:\n" + " 0 when syntax is OK and '--is' argument (if any) matches.\n" + " 1 when syntax is invalid.\n" + " 2 operational error\n" + " 3 when '--is' argument doesn't match\n" + ), + {'t', svnauthz__username, svnauthz__path, svnauthz__repos, svnauthz__is, + svnauthz__groups_file, 'R'} }, + { NULL, NULL, {0}, NULL, {0} } +}; + +static svn_error_t * +subcommand_help(apr_getopt_t *os, void *baton, apr_pool_t *pool) +{ + struct svnauthz_opt_state *opt_state = baton; + const char *header = + ("general usage: svnauthz SUBCOMMAND TARGET [ARGS & OPTIONS ...]\n" + " " SVNAUTHZ_COMPAT_NAME " TARGET\n\n" + "If the command name starts with '" SVNAUTHZ_COMPAT_NAME "', runs in\n" + "pre-1.8 compatibility mode: run the 'validate' subcommand on TARGET.\n\n" + "Type 'svnauthz help <subcommand>' for help on a specific subcommand.\n" + "Type 'svnauthz --version' to see the program version.\n\n" + "Available subcommands:\n"); + + const char *fs_desc_start + = ("The following repository back-end (FS) modules are available:\n\n"); + + svn_stringbuf_t *version_footer; + + version_footer = svn_stringbuf_create(fs_desc_start, pool); + SVN_ERR(svn_fs_print_modules(version_footer, pool)); + + SVN_ERR(svn_opt_print_help4(os, "svnauthz", + opt_state ? opt_state->version : FALSE, + FALSE, /* quiet */ + FALSE, /* verbose */ + version_footer->data, + header, cmd_table, options_table, NULL, NULL, + pool)); + + return SVN_NO_ERROR; +} + +/* Loads the fs FILENAME contents into *CONTENTS ensuring that the + corresponding node is a file. Using POOL for allocations. */ +static svn_error_t * +read_file_contents(svn_stream_t **contents, const char *filename, + svn_fs_root_t *root, apr_pool_t *pool) +{ + svn_node_kind_t node_kind; + + /* Make sure the path is a file */ + SVN_ERR(svn_fs_check_path(&node_kind, root, filename, pool)); + if (node_kind != svn_node_file) + return svn_error_createf(SVN_ERR_FS_NOT_FILE, NULL, + "Path '%s' is not a file", filename); + + SVN_ERR(svn_fs_file_contents(contents, root, filename, pool)); + + return SVN_NO_ERROR; +} + +/* Loads the authz config into *AUTHZ from the file at AUTHZ_FILE + in repository at REPOS_PATH from the transaction TXN_NAME. If GROUPS_FILE + is set, the resulting *AUTHZ will be constructed from AUTHZ_FILE with + global groups taken from GROUPS_FILE. Using POOL for allocations. */ +static svn_error_t * +get_authz_from_txn(svn_authz_t **authz, const char *repos_path, + const char *authz_file, const char *groups_file, + const char *txn_name, apr_pool_t *pool) +{ + svn_repos_t *repos; + svn_fs_t *fs; + svn_fs_txn_t *txn; + svn_fs_root_t *root; + svn_stream_t *authz_contents; + svn_stream_t *groups_contents; + svn_error_t *err; + + /* Open up the repository and find the transaction root */ + SVN_ERR(svn_repos_open3(&repos, repos_path, NULL, pool, pool)); + fs = svn_repos_fs(repos); + SVN_ERR(svn_fs_open_txn(&txn, fs, txn_name, pool)); + SVN_ERR(svn_fs_txn_root(&root, txn, pool)); + + /* Get the authz file contents. */ + SVN_ERR(read_file_contents(&authz_contents, authz_file, root, pool)); + + /* Get the groups file contents if needed. */ + if (groups_file) + SVN_ERR(read_file_contents(&groups_contents, groups_file, root, pool)); + else + groups_contents = NULL; + + err = svn_repos_authz_parse(authz, authz_contents, groups_contents, pool); + + /* Add the filename to the error stack since the parser doesn't have it. */ + if (err != SVN_NO_ERROR) + return svn_error_createf(err->apr_err, err, + "Error parsing authz file: '%s':", authz_file); + + return SVN_NO_ERROR; +} + +/* Loads the authz config into *AUTHZ from OPT_STATE->AUTHZ_FILE. If + OPT_STATE->GROUPS_FILE is set, loads the global groups from it. + If OPT_STATE->TXN is set then OPT_STATE->AUTHZ_FILE and + OPT_STATE->GROUPS_FILE are treated as fspaths in repository at + OPT_STATE->REPOS_PATH. */ +static svn_error_t * +get_authz(svn_authz_t **authz, struct svnauthz_opt_state *opt_state, + apr_pool_t *pool) +{ + /* Read the access file and validate it. */ + if (opt_state->txn) + return get_authz_from_txn(authz, opt_state->repos_path, + opt_state->authz_file, + opt_state->groups_file, + opt_state->txn, pool); + + /* Else */ + return svn_repos_authz_read3(authz, opt_state->authz_file, + opt_state->groups_file, + TRUE, NULL, pool, pool); +} + +static svn_error_t * +subcommand_validate(apr_getopt_t *os, void *baton, apr_pool_t *pool) +{ + struct svnauthz_opt_state *opt_state = baton; + svn_authz_t *authz; + + /* Not much to do here since just loading the authz file also validates. */ + return get_authz(&authz, opt_state, pool); +} + +static svn_error_t * +subcommand_accessof(apr_getopt_t *os, void *baton, apr_pool_t *pool) +{ + svn_authz_t *authz; + svn_boolean_t read_access = FALSE, write_access = FALSE; + svn_boolean_t check_r = FALSE, check_rw = FALSE, check_no = FALSE; + svn_error_t *err; + struct svnauthz_opt_state *opt_state = baton; + const char *user = opt_state->username; + const char *path = opt_state->fspath; + const char *repos = opt_state->repos_name; + const char *is = opt_state->is; + svn_repos_authz_access_t request; + + if (opt_state->recursive && !path) + return svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL, + ("--recursive not valid without --path")); + + /* Handle is argument parsing/allowed values */ + if (is) { + if (0 == strcmp(is, "rw")) + check_rw = TRUE; + else if (0 == strcmp(is, "r")) + check_r = TRUE; + else if (0 == strcmp(is, "no")) + check_no = TRUE; + else + return svn_error_createf(SVN_ERR_CL_ARG_PARSING_ERROR, NULL, + ("'%s' is not a valid argument for --is"), is); + } + + SVN_ERR(get_authz(&authz, opt_state, pool)); + + + request = svn_authz_write; + if (opt_state->recursive) + request |= svn_authz_recursive; + err = svn_repos_authz_check_access(authz, repos, path, user, + request, &write_access, + pool); + + if (!write_access && !err) + { + request = svn_authz_read; + if (opt_state->recursive) + request |= svn_authz_recursive; + err = svn_repos_authz_check_access(authz, repos, path, user, + request, &read_access, + pool); + } + + if (!err) + { + const char *access_str = write_access ? "rw" : read_access ? "r" : "no"; + + if (is) + { + /* Check that --is argument matches. + * The errors returned here are not strictly correct, but + * none of the other code paths will generate them and they + * roughly mean what we're saying here. */ + if (check_rw && !write_access) + err = svn_error_createf(SVN_ERR_AUTHZ_UNWRITABLE, NULL, + ("%s is '%s', not writable"), + path ? path : ("Repository"), access_str); + else if (check_r && !read_access) + err = svn_error_createf(SVN_ERR_AUTHZ_UNREADABLE, NULL, + ("%s is '%s', not read only"), + path ? path : ("Repository"), access_str); + else if (check_no && (read_access || write_access)) + err = svn_error_createf(SVN_ERR_AUTHZ_PARTIALLY_READABLE, + NULL, ("%s is '%s', not no access"), + path ? path : ("Repository"), access_str); + } + else + { + err = svn_cmdline_printf(pool, "%s\n", access_str); + } + } + + return err; +} + + + +/*** Main. ***/ + +/* A redefinition of EXIT_FAILURE since our contract demands that we + exit with 2 for internal failures. */ +#undef EXIT_FAILURE +#define EXIT_FAILURE 2 + +/* Return TRUE if the UI of 'svnauthz-validate' (svn 1.7 and earlier) + should be emulated, given argv[0]. */ +static svn_boolean_t +use_compat_mode(const char *cmd, apr_pool_t *pool) +{ + cmd = svn_dirent_internal_style(cmd, pool); + cmd = svn_dirent_basename(cmd, NULL); + + /* Skip over the Libtool command prefix if it exists on the command. */ + if (0 == strncmp(SVNAUTHZ_LT_PREFIX, cmd, sizeof(SVNAUTHZ_LT_PREFIX)-1)) + cmd += sizeof(SVNAUTHZ_LT_PREFIX) - 1; + + /* Deliberately look only for the start of the name to deal with + the executable extension on some platforms. */ + return 0 == strncmp(SVNAUTHZ_COMPAT_NAME, cmd, + sizeof(SVNAUTHZ_COMPAT_NAME)-1); +} + +/* Canonicalize ACCESS_FILE into *CANONICALIZED_ACCESS_FILE based on the type + of argument. Error out on unsupported path types. If WITHIN_TXN is set, + ACCESS_FILE has to be a fspath in the repo. Use POOL for allocations. */ +static svn_error_t * +canonicalize_access_file(const char **canonicalized_access_file, + const char *access_file, + svn_boolean_t within_txn, + apr_pool_t *pool) +{ + if (svn_path_is_repos_relative_url(access_file)) + { + /* Can't accept repos relative urls since we don't have the path to + * the repository. */ + return svn_error_createf(SVN_ERR_CL_ARG_PARSING_ERROR, NULL, + ("'%s' is a repository relative URL when it " + "should be a local path or file:// URL"), + access_file); + } + else if (svn_path_is_url(access_file)) + { + if (within_txn) + { + /* Don't allow urls with transaction argument. */ + return svn_error_createf(SVN_ERR_CL_ARG_PARSING_ERROR, NULL, + ("'%s' is a URL when it should be a " + "repository-relative path"), + access_file); + } + + *canonicalized_access_file = svn_uri_canonicalize(access_file, pool); + } + else if (within_txn) + { + /* Transaction flag means this has to be a fspath to the access file + * in the repo. */ + *canonicalized_access_file = + svn_fspath__canonicalize(access_file, pool); + } + else + { + /* If it isn't a URL and there's no transaction flag then it's a + * dirent to the access file on local disk. */ + *canonicalized_access_file = + svn_dirent_internal_style(access_file, pool); + } + + return SVN_NO_ERROR; +} + +/* + * On success, leave *EXIT_CODE untouched and return SVN_NO_ERROR. On error, + * either return an error to be displayed, or set *EXIT_CODE to non-zero and + * return SVN_NO_ERROR. + */ +static svn_error_t * +sub_main(int *exit_code, int argc, const char *argv[], apr_pool_t *pool) +{ + svn_error_t *err; + + const svn_opt_subcommand_desc2_t *subcommand = NULL; + struct svnauthz_opt_state opt_state = { 0 }; + apr_getopt_t *os; + apr_array_header_t *received_opts; + int i; + + /* Initialize the FS library. */ + SVN_ERR(svn_fs_initialize(pool)); + + received_opts = apr_array_make(pool, SVN_OPT_MAX_OPTIONS, sizeof(int)); + + /* Initialize opt_state */ + opt_state.username = opt_state.fspath = opt_state.repos_name = NULL; + opt_state.txn = opt_state.repos_path = opt_state.groups_file = NULL; + + /* Parse options. */ + SVN_ERR(svn_cmdline__getopt_init(&os, argc, argv, pool)); + os->interleave = 1; + + if (!use_compat_mode(argv[0], pool)) + { + while (1) + { + int opt; + const char *arg; + apr_status_t status = apr_getopt_long(os, options_table, &opt, &arg); + + if (APR_STATUS_IS_EOF(status)) + break; + if (status != APR_SUCCESS) + { + SVN_ERR(subcommand_help(NULL, NULL, pool)); + *exit_code = EXIT_FAILURE; + return SVN_NO_ERROR; + } + + /* Stash the option code in an array before parsing it. */ + APR_ARRAY_PUSH(received_opts, int) = opt; + + switch (opt) + { + case 'h': + case '?': + opt_state.help = TRUE; + break; + case 't': + SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.txn, arg, pool)); + break; + case 'R': + opt_state.recursive = TRUE; + break; + case svnauthz__version: + opt_state.version = TRUE; + break; + case svnauthz__username: + SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.username, arg, pool)); + break; + case svnauthz__path: + SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.fspath, arg, pool)); + opt_state.fspath = svn_fspath__canonicalize(opt_state.fspath, + pool); + break; + case svnauthz__repos: + SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.repos_name, arg, pool)); + break; + case svnauthz__is: + SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.is, arg, pool)); + break; + case svnauthz__groups_file: + SVN_ERR( + svn_utf_cstring_to_utf8(&opt_state.groups_file, + arg, pool)); + break; + default: + { + SVN_ERR(subcommand_help(NULL, NULL, pool)); + *exit_code = EXIT_FAILURE; + return SVN_NO_ERROR; + } + } + } + } + else + { + /* Pre 1.8 compatibility mode. */ + if (argc == 1) /* No path argument */ + subcommand = svn_opt_get_canonical_subcommand2(cmd_table, "help"); + else + subcommand = svn_opt_get_canonical_subcommand2(cmd_table, "validate"); + } + + /* If the user asked for help, then the rest of the arguments are + the names of subcommands to get help on (if any), or else they're + just typos/mistakes. Whatever the case, the subcommand to + actually run is subcommand_help(). */ + if (opt_state.help) + subcommand = svn_opt_get_canonical_subcommand2(cmd_table, "help"); + + if (subcommand == NULL) + { + if (os->ind >= os->argc) + { + if (opt_state.version) + { + /* Use the "help" subcommand to handle the "--version" option. */ + static const svn_opt_subcommand_desc2_t pseudo_cmd = + { "--version", subcommand_help, {0}, "", + {svnauthz__version /* must accept its own option */ } }; + + subcommand = &pseudo_cmd; + } + else + { + svn_error_clear(svn_cmdline_fprintf(stderr, pool, + ("subcommand argument required\n"))); + SVN_ERR(subcommand_help(NULL, NULL, pool)); + *exit_code = EXIT_FAILURE; + return SVN_NO_ERROR; + } + } + else + { + const char *first_arg; + + SVN_ERR(svn_utf_cstring_to_utf8(&first_arg, os->argv[os->ind++], + pool)); + subcommand = svn_opt_get_canonical_subcommand2(cmd_table, first_arg); + if (subcommand == NULL) + { + os->ind++; + svn_error_clear( + svn_cmdline_fprintf(stderr, pool, + ("Unknown subcommand: '%s'\n"), + first_arg)); + SVN_ERR(subcommand_help(NULL, NULL, pool)); + *exit_code = EXIT_FAILURE; + return SVN_NO_ERROR; + } + } + } + + /* Every subcommand except `help' requires one or two non-option arguments. + Parse them and store them in opt_state.*/ + if (subcommand->cmd_func != subcommand_help) + { + /* Consume a non-option argument (repos_path) if --transaction */ + if (opt_state.txn) + { + if (os->ind +2 != argc) + { + return svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL, + ("Repository and authz file arguments " + "required")); + } + + SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.repos_path, os->argv[os->ind], + pool)); + os->ind++; + + opt_state.repos_path = svn_dirent_internal_style(opt_state.repos_path, pool); + } + + /* Exactly 1 non-option argument */ + if (os->ind + 1 != argc) + { + return svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL, + ("Authz file argument required")); + } + + /* Grab AUTHZ_FILE from argv. */ + SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.authz_file, os->argv[os->ind], + pool)); + + /* Canonicalize opt_state.authz_file appropriately. */ + SVN_ERR(canonicalize_access_file(&opt_state.authz_file, + opt_state.authz_file, + opt_state.txn != NULL, pool)); + + /* Same for opt_state.groups_file if it is present. */ + if (opt_state.groups_file) + { + SVN_ERR(canonicalize_access_file(&opt_state.groups_file, + opt_state.groups_file, + opt_state.txn != NULL, pool)); + } + } + + /* Check that the subcommand wasn't passed any inappropriate options. */ + for (i = 0; i < received_opts->nelts; i++) + { + int opt_id = APR_ARRAY_IDX(received_opts, i, int); + + /* All commands implicitly accept --help, so just skip over this + when we see it. Note that we don't want to include this option + in their "accepted options" list because it would be awfully + redundant to display it in every commands' help text. */ + if (opt_id == 'h' || opt_id == '?') + continue; + + if (! svn_opt_subcommand_takes_option3(subcommand, opt_id, NULL)) + { + const char *optstr; + const apr_getopt_option_t *badopt = + svn_opt_get_option_from_code2(opt_id, options_table, subcommand, + pool); + svn_opt_format_option(&optstr, badopt, FALSE, pool); + if (subcommand->name[0] == '-') + SVN_ERR(subcommand_help(NULL, NULL, pool)); + else + svn_error_clear(svn_cmdline_fprintf(stderr, pool, + ("Subcommand '%s' doesn't accept option '%s'\n" + "Type 'svnauthz help %s' for usage.\n"), + subcommand->name, optstr, subcommand->name)); + *exit_code = EXIT_FAILURE; + return SVN_NO_ERROR; + } + } + + /* Run the subcommand. */ + err = (*subcommand->cmd_func)(os, &opt_state, pool); + + if (err) + { + if (err->apr_err == SVN_ERR_CL_INSUFFICIENT_ARGS + || err->apr_err == SVN_ERR_CL_ARG_PARSING_ERROR) + { + /* For argument-related problems, suggest using the 'help' + subcommand. */ + err = svn_error_quick_wrap(err, + ("Try 'svnauthz help' for more info")); + } + else if (err->apr_err == SVN_ERR_AUTHZ_INVALID_CONFIG + || err->apr_err == SVN_ERR_MALFORMED_FILE) + { + /* Follow our contract that says we exit with 1 if the file does not + validate. */ + *exit_code = 1; + return err; + } + else if (err->apr_err == SVN_ERR_AUTHZ_UNREADABLE + || err->apr_err == SVN_ERR_AUTHZ_UNWRITABLE + || err->apr_err == SVN_ERR_AUTHZ_PARTIALLY_READABLE) + { + /* Follow our contract that says we exit with 3 if --is does not + * match. */ + *exit_code = 3; + return err; + } + + return err; + } + + return SVN_NO_ERROR; +} + +int +main(int argc, const char *argv[]) +{ + apr_pool_t *pool; + int exit_code = EXIT_SUCCESS; + svn_error_t *err; + + /* Initialize the app. Send all error messages to 'stderr'. */ + if (svn_cmdline_init(argv[0], stderr) != EXIT_SUCCESS) + return EXIT_FAILURE; + + pool = svn_pool_create(NULL); + + err = sub_main(&exit_code, argc, argv, pool); + + /* Flush stdout and report if it fails. It would be flushed on exit anyway + but this makes sure that output is not silently lost if it fails. */ + err = svn_error_compose_create(err, svn_cmdline_fflush(stdout)); + + if (err) + { + if (exit_code == 0) + exit_code = EXIT_FAILURE; + svn_cmdline_handle_exit_error(err, NULL, "svnauthz: "); + } + + svn_pool_destroy(pool); + return exit_code; +} diff --git a/tools/server-side/svnpredumpfilter.py b/tools/server-side/svnpredumpfilter.py new file mode 100755 index 0000000..04190c1 --- /dev/null +++ b/tools/server-side/svnpredumpfilter.py @@ -0,0 +1,338 @@ +#!/usr/bin/env python + +# ==================================================================== +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ==================================================================== +"""\ +Usage: 1. {PROGRAM} [OPTIONS] include INCLUDE-PATH ... + 2. {PROGRAM} [OPTIONS] exclude EXCLUDE-PATH ... + +Read a Subversion revision log output stream from stdin, analyzing its +revision log history to see what paths would need to be additionally +provided as part of the list of included/excluded paths if trying to +use Subversion's 'svndumpfilter' program to include/exclude paths from +a full dump of a repository's history. + +The revision log stream should be the result of 'svn log -v' or 'svn +log -vq' when run against the root of the repository whose history +will be filtered by a user with universal read access to the +repository's data. Do not use the --use-merge-history (-g) or +--stop-on-copy when generating this revision log stream. +Use the default ordering of revisions (that is, '-r HEAD:0'). + +Return errorcode 0 if there are no additional dependencies found, 1 if +there were; any other errorcode indicates a fatal error. + +Paths in mergeinfo are not considered as additional dependencies so the +--skip-missing-merge-sources option of 'svndumpfilter' may be required +for successful filtering with the resulting path list. + +Options: + + --help (-h) Show this usage message and exit. + + --targets FILE Read INCLUDE-PATHs and EXCLUDE-PATHs from FILE, + one path per line. + + --verbose (-v) Provide more information. May be used multiple + times for additional levels of information (-vv). +""" +import sys +import os +import getopt +import string + +verbosity = 0 + +class LogStreamError(Exception): pass +class EOFError(Exception): pass + +EXIT_SUCCESS = 0 +EXIT_MOREDEPS = 1 +EXIT_FAILURE = 2 + +def sanitize_path(path): + return '/'.join(filter(None, path.split('/'))) + +def subsumes(path, maybe_child): + if path == maybe_child: + return True + if maybe_child.startswith(path + '/'): + return True + return False + +def compare_paths(path1, path2): + # Are the paths exactly the same? + if path1 == path2: + return 0 + + # Skip past common prefix + path1_len = len(path1); + path2_len = len(path2); + min_len = min(path1_len, path2_len) + i = 0 + while (i < min_len) and (path1[i] == path2[i]): + i = i + 1 + + # Children of paths are greater than their parents, but less than + # greater siblings of their parents + char1 = '\0' + char2 = '\0' + if (i < path1_len): + char1 = path1[i] + if (i < path2_len): + char2 = path2[i] + + if (char1 == '/') and (i == path2_len): + return 1 + if (char2 == '/') and (i == path1_len): + return -1 + if (i < path1_len) and (char1 == '/'): + return -1 + if (i < path2_len) and (char2 == '/'): + return 1 + + # Common prefix was skipped above, next character is compared to + # determine order + return cmp(char1, char2) + +def log(msg, min_verbosity): + if verbosity >= min_verbosity: + if min_verbosity == 1: + sys.stderr.write("[* ] ") + elif min_verbosity == 2: + sys.stderr.write("[**] ") + sys.stderr.write(msg + "\n") + +class DependencyTracker: + def __init__(self, include_paths): + self.include_paths = set(include_paths) + self.dependent_paths = set() + + def path_included(self, path): + for include_path in self.include_paths | self.dependent_paths: + if subsumes(include_path, path): + return True + return False + + def include_missing_copies(self, path_copies): + while True: + log("Cross-checking %d included paths with %d copies " + "for missing path dependencies..." % ( + len(self.include_paths) + len(self.dependent_paths), + len(path_copies)), + 1) + included_copies = [] + for path, copyfrom_path in path_copies: + if self.path_included(path): + log("Adding copy '%s' -> '%s'" % (copyfrom_path, path), 1) + self.dependent_paths.add(copyfrom_path) + included_copies.append((path, copyfrom_path)) + if not included_copies: + log("Found all missing path dependencies", 1) + break + for path, copyfrom_path in included_copies: + path_copies.remove((path, copyfrom_path)) + log("Found %d new copy dependencies, need to re-check for more" + % len(included_copies), 1) + +def readline(stream): + line = stream.readline() + if not line: + raise EOFError("Unexpected end of stream") + line = line.rstrip('\n\r') + log(line, 2) + return line + +def svn_log_stream_get_dependencies(stream, included_paths): + import re + + dt = DependencyTracker(included_paths) + + header_re = re.compile(r'^r([0-9]+) \|.*$') + action_re = re.compile(r'^ [ADMR] /(.*)$') + copy_action_re = re.compile(r'^ [AR] /(.*) \(from /(.*):[0-9]+\)$') + line_buf = None + last_revision = 0 + eof = False + path_copies = set() + found_changed_path = False + + while not eof: + try: + line = line_buf is not None and line_buf or readline(stream) + except EOFError: + break + + # We should be sitting at a log divider line. + if line != '-' * 72: + raise LogStreamError("Expected log divider line; not found.") + + # Next up is a log header line. + try: + line = readline(stream) + except EOFError: + break + match = header_re.search(line) + if not match: + raise LogStreamError("Expected log header line; not found.") + pieces = map(string.strip, line.split('|')) + revision = int(pieces[0][1:]) + if last_revision and revision >= last_revision: + raise LogStreamError("Revisions are misordered. Make sure log stream " + "is from 'svn log' with the youngest revisions " + "before the oldest ones (the default ordering).") + log("Parsing revision %d" % (revision), 1) + last_revision = revision + idx = pieces[-1].find(' line') + if idx != -1: + log_lines = int(pieces[-1][:idx]) + else: + log_lines = 0 + + # Now see if there are any changed paths. If so, parse and process them. + line = readline(stream) + if line == 'Changed paths:': + while 1: + try: + line = readline(stream) + except EOFError: + eof = True + break + match = copy_action_re.search(line) + if match: + found_changed_path = True + path_copies.add((sanitize_path(match.group(1)), + sanitize_path(match.group(2)))) + elif action_re.search(line): + found_changed_path = True + else: + break + + # Finally, skip any log message lines. (If there are none, + # remember the last line we read, because it probably has + # something important in it.) + if log_lines: + for i in range(log_lines): + readline(stream) + line_buf = None + else: + line_buf = line + + if not found_changed_path: + raise LogStreamError("No changed paths found; did you remember to run " + "'svn log' with the --verbose (-v) option when " + "generating the input to this script?") + + dt.include_missing_copies(path_copies) + return dt + +def analyze_logs(included_paths): + print("Initial include paths:") + for path in included_paths: + print(" + /%s" % (path)) + + dt = svn_log_stream_get_dependencies(sys.stdin, included_paths) + + if dt.dependent_paths: + found_new_deps = True + print("Dependent include paths found:") + for path in dt.dependent_paths: + print(" + /%s" % (path)) + print("You need to also include them (or one of their parents).") + else: + found_new_deps = False + print("No new dependencies found!") + parents = {} + for path in dt.include_paths: + while 1: + parent = os.path.dirname(path) + if not parent: + break + parents[parent] = 1 + path = parent + parents = parents.keys() + if parents: + print("You might still need to manually create parent directories " \ + "for the included paths before loading a filtered dump:") + parents.sort(compare_paths) + for parent in parents: + print(" /%s" % (parent)) + + return found_new_deps and EXIT_MOREDEPS or EXIT_SUCCESS + +def usage_and_exit(errmsg=None): + program = os.path.basename(sys.argv[0]) + stream = errmsg and sys.stderr or sys.stdout + stream.write(__doc__.replace("{PROGRAM}", program)) + if errmsg: + stream.write("\nERROR: %s\n" % (errmsg)) + sys.exit(errmsg and EXIT_FAILURE or EXIT_SUCCESS) + +def main(): + config_dir = None + targets_file = None + + try: + opts, args = getopt.getopt(sys.argv[1:], "hv", + ["help", "verbose", "targets="]) + except getopt.GetoptError as e: + usage_and_exit(str(e)) + + for option, value in opts: + if option in ['-h', '--help']: + usage_and_exit() + elif option in ['-v', '--verbose']: + global verbosity + verbosity = verbosity + 1 + elif option in ['--targets']: + targets_file = value + + if len(args) == 0: + usage_and_exit("Not enough arguments") + + if targets_file is None: + targets = args[1:] + else: + targets = map(lambda x: x.rstrip('\n\r'), + open(targets_file, 'r').readlines()) + if not targets: + usage_and_exit("No target paths specified") + + try: + if args[0] == 'include': + sys.exit(analyze_logs(map(sanitize_path, targets))) + elif args[0] == 'exclude': + usage_and_exit("Feature not implemented") + else: + usage_and_exit("Valid subcommands are 'include' and 'exclude'") + except SystemExit: + raise + except (LogStreamError, EOFError) as e: + log("ERROR: " + str(e), 0) + sys.exit(EXIT_FAILURE) + except: + import traceback + exc_type, exc, exc_tb = sys.exc_info() + tb = traceback.format_exception(exc_type, exc, exc_tb) + sys.stderr.write(''.join(tb)) + sys.exit(EXIT_FAILURE) + + +if __name__ == "__main__": + main() diff --git a/tools/server-side/svnpubsub/README.txt b/tools/server-side/svnpubsub/README.txt new file mode 100644 index 0000000..ad4975e --- /dev/null +++ b/tools/server-side/svnpubsub/README.txt @@ -0,0 +1,24 @@ +Installation instructions: + +1. Set up an svnpubsub service. + + This directory should be checked out to /usr/local/svnpubsub (or /opt/svnpubsub + on Debian). + + There are init scripts for several OSes in the rc.d/ directory; add them + to your OS boot process in the usual way for your OS. (For example, via + rc.conf(5) or update-rc.d(8).) + +2. Run "commit-hook.py $REPOS $REV" from your post-commit hook. + + (As of 1.7, these are the same ordered arguments the post-commmit hook + itself receives, so you can just symlink commit-hook.py as hooks/post-commit + hook if you don't need any other hooks to run in the server process. (This + isn't as insane as it sounds --- post-commit email hooks could also feed of + svnpubsub, and thus not be run within the committing server thread, but on + any other process or box that listens to the svnpubsub stream!)) + +3. Set up svnpubsub clients. + + (eg svnwcsub.py, svnpubsub/client.py, + 'curl -sN http://${hostname}:2069/commits') diff --git a/tools/server-side/svnpubsub/commit-hook.py b/tools/server-side/svnpubsub/commit-hook.py new file mode 100755 index 0000000..4e6a1cc --- /dev/null +++ b/tools/server-side/svnpubsub/commit-hook.py @@ -0,0 +1,92 @@ +#!/usr/local/bin/python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +SVNLOOK="/usr/local/svn-install/current/bin/svnlook" +#SVNLOOK="/usr/local/bin/svnlook" + +HOST="127.0.0.1" +PORT=2069 + +import sys +try: + import simplejson as json +except ImportError: + import json + +import urllib2 + +import svnpubsub.util + +def svnlook(cmd, **kwargs): + args = [SVNLOOK] + cmd + return svnpubsub.util.check_output(args, **kwargs) + +def svnlook_uuid(repo): + cmd = ["uuid", "--", repo] + return svnlook(cmd).strip() + +def svnlook_info(repo, revision): + cmd = ["info", "-r", revision, "--", repo] + data = svnlook(cmd, universal_newlines=True).split("\n") + #print data + return {'author': data[0].strip(), + 'date': data[1].strip(), + 'log': "\n".join(data[3:]).strip()} + +def svnlook_changed(repo, revision): + cmd = ["changed", "-r", revision, "--", repo] + lines = svnlook(cmd, universal_newlines=True).split("\n") + changed = {} + for line in lines: + line = line.strip() + if not line: + continue + (flags, filename) = (line[0:3], line[4:]) + changed[filename] = {'flags': flags} + return changed + +def do_put(body): + opener = urllib2.build_opener(urllib2.HTTPHandler) + request = urllib2.Request("http://%s:%d/commits" %(HOST, PORT), data=body) + request.add_header('Content-Type', 'application/json') + request.get_method = lambda: 'PUT' + url = opener.open(request) + + +def main(repo, revision): + revision = revision.lstrip('r') + i = svnlook_info(repo, revision) + data = {'type': 'svn', + 'format': 1, + 'id': int(revision), + 'changed': {}, + 'repository': svnlook_uuid(repo), + 'committer': i['author'], + 'log': i['log'], + 'date': i['date'], + } + data['changed'].update(svnlook_changed(repo, revision)) + body = json.dumps(data) + do_put(body) + +if __name__ == "__main__": + if len(sys.argv) not in (3, 4): + sys.stderr.write("invalid args\n") + sys.exit(1) + + main(*sys.argv[1:3]) diff --git a/tools/server-side/svnpubsub/daemonize.py b/tools/server-side/svnpubsub/daemonize.py new file mode 100644 index 0000000..9f30e59 --- /dev/null +++ b/tools/server-side/svnpubsub/daemonize.py @@ -0,0 +1,339 @@ +# --------------------------------------------------------------------------- +# +# Copyright (c) 2005, Greg Stein +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# --------------------------------------------------------------------------- +# +# This software lives at: +# http://gstein.googlecode.com/svn/trunk/python/daemonize.py +# + +import os +import signal +import sys +import time +import stat +import multiprocessing # requires Python 2.6 + + +# possible return values from Daemon.daemonize() +DAEMON_RUNNING = 'The daemon is running' +DAEMON_NOT_RUNNING = 'The daemon is not running' +DAEMON_COMPLETE = 'The daemon has completed its operations' +DAEMON_STARTED = 'The daemon has been started' + + +class Daemon(object): + + def __init__(self, logfile, pidfile): + self.logfile = logfile + self.pidfile = pidfile + + def foreground(self): + "Run in the foreground." + ### we should probably create a pidfile. other systems may try to detect + ### the pidfile to see if this "daemon" is running. + self.setup() + self.run() + ### remove the pidfile + + def daemonize_exit(self): + try: + result = self.daemonize() + except (ChildFailed, DaemonFailed) as e: + # duplicate the exit code + sys.exit(e.code) + except (ChildTerminatedAbnormally, ChildForkFailed, + DaemonTerminatedAbnormally, DaemonForkFailed), e: + sys.stderr.write('ERROR: %s\n' % e) + sys.exit(1) + except ChildResumedIncorrectly: + sys.stderr.write('ERROR: continued after receiving unknown signal.\n') + sys.exit(1) + + if result == DAEMON_STARTED or result == DAEMON_COMPLETE: + sys.exit(0) + elif result == DAEMON_NOT_RUNNING: + sys.stderr.write('ERROR: the daemon exited with a success code ' + 'without signalling its startup.\n') + sys.exit(1) + + # in original process. daemon is up and running. we're done. + + def daemonize(self): + ### review error situations. map to backwards compat. ?? + ### be mindful of daemonize_exit(). + ### we should try and raise ChildFailed / ChildTerminatedAbnormally. + ### ref: older revisions. OR: remove exceptions. + + child_is_ready = multiprocessing.Event() + child_completed = multiprocessing.Event() + + p = multiprocessing.Process(target=self._first_child, + args=(child_is_ready, child_completed)) + p.start() + + # Wait for the child to finish setting things up (in case we need + # to communicate with it). It will only exit when ready. + ### use a timeout here! (parameterized, of course) + p.join() + + ### need to propagate errors, to adjust the return codes + if child_completed.is_set(): + ### what was the exit status? + return DAEMON_COMPLETE + if child_is_ready.is_set(): + return DAEMON_RUNNING + + ### how did we get here?! the immediate child should not exit without + ### signalling ready/complete. some kind of error. + return DAEMON_STARTED + + def _first_child(self, child_is_ready, child_completed): + # we're in the child. + + ### NOTE: the original design was a bit bunk. Exceptions raised from + ### this point are within the child processes. We need to signal the + ### errors to the parent in other ways. + + # decouple from the parent process + os.chdir('/') + os.umask(0) + os.setsid() + + # remember this pid so the second child can signal it. + thispid = os.getpid() + + # if the daemon process exits before signalling readiness, then we + # need to see the problem. trap SIGCHLD with a SignalCatcher. + daemon_exit = SignalCatcher(signal.SIGCHLD) + + # perform the second fork + try: + pid = os.fork() + except OSError as e: + ### this won't make it to the parent process + raise DaemonForkFailed(e.errno, e.strerror) + + if pid > 0: + # in the parent. + + + # Wait for the child to be ready for operation. + while True: + # The readiness event will invariably be signalled early/first. + # If it *doesn't* get signalled because the child has prematurely + # exited, then we will pause 10ms before noticing the exit. The + # pause is acceptable since that is aberrant/unexpected behavior. + ### is there a way to break this wait() on a signal such as SIGCHLD? + ### parameterize this wait, in case the app knows children may + ### fail quickly? + if child_is_ready.wait(timeout=0.010): + # The child signalled readiness. Yay! + break + if daemon_exit.signalled: + # Whoops. The child exited without signalling :-( + break + # Python 2.6 compat: .wait() may exit when set, but return None + if child_is_ready.is_set(): + break + # A simple timeout. The child is taking a while to prepare. Go + # back and wait for readiness. + + if daemon_exit.signalled: + # Tell the parent that the child has exited. + ### we need to communicate the exit status, if possible. + child_completed.set() + + # reap the daemon process, getting its exit code. bubble it up. + cpid, status = os.waitpid(pid, 0) + assert pid == cpid + if os.WIFEXITED(status): + code = os.WEXITSTATUS(status) + if code: + ### this won't make it to the parent process + raise DaemonFailed(code) + ### this return value is ignored + return DAEMON_NOT_RUNNING + + # the daemon did not exit cleanly. + ### this won't make it to the parent process + raise DaemonTerminatedAbnormally(status) + + # child_is_ready got asserted. the daemon is up and running, so + # save the pid and return success. + if self.pidfile: + # Be wary of symlink attacks + try: + os.remove(self.pidfile) + except OSError: + pass + fd = os.open(self.pidfile, os.O_WRONLY | os.O_CREAT | os.O_EXCL, + stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) + os.write(fd, '%d\n' % pid) + os.close(fd) + + ### this return value is ignored + return DAEMON_STARTED + + ### old code. what to do with this? throw ChildResumedIncorrectly + ### or just toss this and the exception. + # some other signal popped us out of the pause. the daemon might not + # be running. + ### this won't make it to the parent process + raise ChildResumedIncorrectly() + + # we're a daemon now. get rid of the final remnants of the parent: + # restore the signal handlers and switch std* to the proper files. + signal.signal(signal.SIGUSR1, signal.SIG_DFL) + signal.signal(signal.SIGCHLD, signal.SIG_DFL) + sys.stdout.flush() + sys.stderr.flush() + si = open('/dev/null', 'r') + so = open(self.logfile, 'a+') + se = open(self.logfile, 'a+', 0) # unbuffered + os.dup2(si.fileno(), sys.stdin.fileno()) + os.dup2(so.fileno(), sys.stdout.fileno()) + os.dup2(se.fileno(), sys.stderr.fileno()) + # note: we could not inline the open() calls. after the fileno() completed, + # the file would be closed, making the fileno invalid. gotta hold them + # open until now: + si.close() + so.close() + se.close() + + ### TEST: don't release the parent immediately. the whole parent stack + ### should pause along with this sleep. + #time.sleep(10) + + # everything is set up. call the initialization function. + self.setup() + + ### TEST: exit before signalling. + #sys.exit(0) + #sys.exit(1) + + # the child is now ready for parent/anyone to communicate with it. + child_is_ready.set() + + # start the daemon now. + self.run() + + # The daemon is shutting down, so toss the pidfile. + if self.pidfile: + try: + os.remove(self.pidfile) + except OSError: + pass + + ### this return value is ignored + return DAEMON_COMPLETE + + def setup(self): + raise NotImplementedError + + def run(self): + raise NotImplementedError + + +class _Detacher(Daemon): + def __init__(self, target, logfile='/dev/null', pidfile=None, + args=(), kwargs={}): + Daemon.__init__(self, logfile, pidfile) + self.target = target + self.args = args + self.kwargs = kwargs + + def setup(self): + pass + + def run(self): + self.target(*self.args, **self.kwargs) + + +def run_detached(target, *args, **kwargs): + """Simple function to run TARGET as a detached daemon. + + The additional arguments/keywords will be passed along. This function + does not return -- sys.exit() will be called as appropriate. + + (capture SystemExit if logging/reporting is necessary) + ### if needed, a variant of this func could be written to not exit + """ + d = _Detacher(target, args=args, kwargs=kwargs) + d.daemonize_exit() + + +class SignalCatcher(object): + def __init__(self, signum): + self.signalled = False + signal.signal(signum, self.sig_handler) + + def sig_handler(self, signum, frame): + self.signalled = True + + +class ChildTerminatedAbnormally(Exception): + "The child process terminated abnormally." + def __init__(self, status): + Exception.__init__(self, status) + self.status = status + def __str__(self): + return 'child terminated abnormally (0x%04x)' % self.status + +class ChildFailed(Exception): + "The child process exited with a failure code." + def __init__(self, code): + Exception.__init__(self, code) + self.code = code + def __str__(self): + return 'child failed with exit code %d' % self.code + +class ChildForkFailed(Exception): + "The child process could not be forked." + def __init__(self, errno, strerror): + Exception.__init__(self, errno, strerror) + self.errno = errno + self.strerror = strerror + def __str__(self): + return 'child fork failed with error %d (%s)' % self.args + +class ChildResumedIncorrectly(Exception): + "The child resumed its operation incorrectly." + +class DaemonTerminatedAbnormally(Exception): + "The daemon process terminated abnormally." + def __init__(self, status): + Exception.__init__(self, status) + self.status = status + def __str__(self): + return 'daemon terminated abnormally (0x%04x)' % self.status + +class DaemonFailed(Exception): + "The daemon process exited with a failure code." + def __init__(self, code): + Exception.__init__(self, code) + self.code = code + def __str__(self): + return 'daemon failed with exit code %d' % self.code + +class DaemonForkFailed(Exception): + "The daemon process could not be forked." + def __init__(self, errno, strerror): + Exception.__init__(self, errno, strerror) + self.errno = errno + self.strerror = strerror + def __str__(self): + return 'daemon fork failed with error %d (%s)' % self.args diff --git a/tools/server-side/svnpubsub/irkerbridge.py b/tools/server-side/svnpubsub/irkerbridge.py new file mode 100755 index 0000000..ba61c99 --- /dev/null +++ b/tools/server-side/svnpubsub/irkerbridge.py @@ -0,0 +1,329 @@ +#!/usr/bin/env python +# +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# IrkerBridge - Bridge an SvnPubSub stream to Irker. + +# Example: +# irkerbridge.py --daemon --pidfile pid --logfile log config +# +# For detailed option help use: +# irkerbridge.py --help + +# It expects a config file that has the following parameters: +# streams=url +# Space separated list of URLs to streams. +# This option should only be in the DEFAULT section, is ignored in +# all other sections. +# irker=hostname:port +# The hostname/port combination of the irker daemon. If port is +# omitted it defaults to 6659. Irker is connected to over UDP. +# match=What to use to decide if the commit should be sent to irker. +# It consists of the repository UUID followed by a slash and a glob pattern. +# The UUID may be replaced by a * to match all UUIDs. The glob pattern will +# be matched against all of the dirs_changed. Both the UUID and the glob +# pattern must match to send the message to irker. +# to=url +# Space separated list of URLs (any URL that Irker will accept) to +# send the resulting message to. At current Irker only supports IRC. +# template=string +# A string to use to format the output. The string is a Python +# string Template. The following variables are available: +# $committer, $id, $date, $repository, $log, $log_firstline, +# $log_firstparagraph, $dirs_changed, $dirs_count, $dirs_count_s, +# $subdirs_count, $subdirs_count_s, $dirs_root +# Most of them should be self explanatory. $dirs_count is the number of +# entries in $dirs_changed, $dirs_count_s is a friendly string version, +# $dirs_root is the common root of all the $dirs_changed, $subdirs_count +# is the number of subdirs under the $dirs_root that changed, +# $subdirs_root_s is a friendly string version. $log_firstparagraph cuts +# the log message at the first blank line and replaces newlines with spaces. +# +# Within the config file you have sections. Any configuration option +# missing from a given section is found in the [DEFAULT] section. +# +# Section names are arbitrary names that mean nothing to the bridge. Each +# section other than the [DEFAULT] section consists of a configuration that +# may match and send a message to irker to deliver. All matching sections +# will generate a message. +# +# Interpolation of values within the config file is allowed by including +# %(name)s within a value. For example I can reference the UUID of a repo +# repeatedly by doing: +# [DEFAULT] +# ASF_REPO=13f79535-47bb-0310-9956-ffa450edef68 +# +# [#commits] +# match=%(ASF_REPO)s/ +# +# You can HUP the process to reload the config file without restarting the +# process. However, you cannot change the streams it is listening to without +# restarting the process. +# +# TODO: Logging in a better way. + +# Messages longer than this will be truncated and ... added to the end such +# that the resulting message is no longer than this: +MAX_PRIVMSG = 400 + +import os +import sys +import posixpath +import socket +import json +import optparse +import ConfigParser +import traceback +import signal +import re +import fnmatch +from string import Template + +try: + # Python >=3.0 + from urllib.parse import urlparse +except ImportError: + # Python <3.0 + from urlparse import urlparse + + +# Packages that come with svnpubsub +import svnpubsub.client +import daemonize + +class Daemon(daemonize.Daemon): + def __init__(self, logfile, pidfile, bdec): + daemonize.Daemon.__init__(self, logfile, pidfile) + + self.bdec = bdec + + def setup(self): + # There is no setup which the parent needs to wait for. + pass + + def run(self): + print('irkerbridge started, pid=%d' % (os.getpid())) + + mc = svnpubsub.client.MultiClient(self.bdec.urls, + self.bdec.commit, + self.bdec.event) + mc.run_forever() + + +class BigDoEverythingClass(object): + def __init__(self, config, options): + self.config = config + self.options = options + self.urls = config.get_value('streams').split() + + def locate_matching_configs(self, commit): + result = [ ] + for section in self.config.sections(): + match = self.config.get(section, "match").split('/', 1) + if len(match) < 2: + # No slash so assume all paths + match.append('*') + match_uuid, match_path = match + if commit.repository == match_uuid or match_uuid == "*": + for path in commit.changed: + if fnmatch.fnmatch(path, match_path): + result.append(section) + break + return result + + def _generate_dirs_changed(self, commit): + if hasattr(commit, 'dirs_changed') or not hasattr(commit, 'changed'): + return + + dirs_changed = set() + for p in commit.changed: + if p[-1] == '/' and commit.changed[p]['flags'][1] == 'U': + # directory with property changes add the directory itself. + dirs_changed.add(p) + else: + # everything else add the parent of the path + # directories have a trailing slash so if it's present remove + # it before finding the parent. The result will be a directory + # so it needs a trailing slash + dirs_changed.add(posixpath.dirname(p.rstrip('/')) + '/') + + commit.dirs_changed = dirs_changed + return + + def fill_in_extra_args(self, commit): + # Set any empty members to the string "<null>" + v = vars(commit) + for k in v.keys(): + if not v[k]: + v[k] = '<null>' + + self._generate_dirs_changed(commit) + # Add entries to the commit object that are useful for + # formatting. + commit.log_firstline = commit.log.split("\n",1)[0] + commit.log_firstparagraph = re.split("\r?\n\r?\n",commit.log,1)[0] + commit.log_firstparagraph = re.sub("\r?\n"," ",commit.log_firstparagraph) + if commit.dirs_changed: + commit.dirs_root = posixpath.commonprefix(commit.dirs_changed) + if commit.dirs_root == '': + commit.dirs_root = '/' + commit.dirs_count = len(commit.dirs_changed) + if commit.dirs_count > 1: + commit.dirs_count_s = " (%d dirs)" %(commit.dirs_count) + else: + commit.dirs_count_s = "" + + commit.subdirs_count = commit.dirs_count + if commit.dirs_root in commit.dirs_changed: + commit.subdirs_count -= 1 + if commit.subdirs_count >= 1: + commit.subdirs_count_s = " + %d subdirs" % (commit.subdirs_count) + else: + commit.subdirs_count_s = "" + + def _send(self, irker, msg): + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + irker_list = irker.split(':') + if len(irker_list) < 2: + irker_list.append(6659) + json_msg = json.dumps(msg) + sock.sendto(json_msg, (irker_list[0],int(irker_list[1]))) + if self.options.verbose: + print("SENT: %s to %s" % (json_msg, irker)) + + def join_all(self): + # Like self.commit(), but ignores self.config.get(section, "template"). + for section in self.config.sections(): + irker = self.config.get(section, "irker") + to_list = self.config.get(section, "to").split() + if not irker or not to_list: + continue + for to in to_list: + msg = {'to': to, 'privmsg': ''} + self._send(irker, msg) + + def commit(self, url, commit): + if self.options.verbose: + print("RECV: from %s" % url) + print(json.dumps(vars(commit), indent=2)) + + try: + config_sections = self.locate_matching_configs(commit) + if len(config_sections) > 0: + self.fill_in_extra_args(commit) + for section in config_sections: + irker = self.config.get(section, "irker") + to_list = self.config.get(section, "to").split() + template = self.config.get(section, "template") + if not irker or not to_list or not template: + continue + privmsg = Template(template).safe_substitute(vars(commit)) + if len(privmsg) > MAX_PRIVMSG: + privmsg = privmsg[:MAX_PRIVMSG-3] + '...' + for to in to_list: + msg = {'to': to, 'privmsg': privmsg} + self._send(irker, msg) + + except: + print("Unexpected error:") + traceback.print_exc() + sys.stdout.flush() + raise + + def event(self, url, event_name, event_arg): + if self.options.verbose or event_name != "ping": + print('EVENT: %s from %s' % (event_name, url)) + sys.stdout.flush() + + + +class ReloadableConfig(ConfigParser.SafeConfigParser): + def __init__(self, fname): + ConfigParser.SafeConfigParser.__init__(self) + + self.fname = fname + self.read(fname) + + signal.signal(signal.SIGHUP, self.hangup) + + def hangup(self, signalnum, frame): + self.reload() + + def reload(self): + print("RELOAD: config file: %s" % self.fname) + sys.stdout.flush() + + # Delete everything. Just re-reading would overlay, and would not + # remove sections/options. Note that [DEFAULT] will not be removed. + for section in self.sections(): + self.remove_section(section) + + # Get rid of [DEFAULT] + self.remove_section(ConfigParser.DEFAULTSECT) + + # Now re-read the configuration file. + self.read(self.fname) + + def get_value(self, which): + return self.get(ConfigParser.DEFAULTSECT, which) + + +def main(args): + parser = optparse.OptionParser( + description='An SvnPubSub client that bridges the data to irker.', + usage='Usage: %prog [options] CONFIG_FILE', + ) + parser.add_option('--logfile', + help='filename for logging') + parser.add_option('--verbose', action='store_true', + help="enable verbose logging") + parser.add_option('--pidfile', + help="the process' PID will be written to this file") + parser.add_option('--daemon', action='store_true', + help='run as a background daemon') + + options, extra = parser.parse_args(args) + + if len(extra) != 1: + parser.error('CONFIG_FILE is requried') + config_file = os.path.abspath(extra[0]) + + logfile, pidfile = None, None + if options.daemon: + if options.logfile: + logfile = os.path.abspath(options.logfile) + else: + parser.error('LOGFILE is required when running as a daemon') + + if options.pidfile: + pidfile = os.path.abspath(options.pidfile) + else: + parser.error('PIDFILE is required when running as a daemon') + + + config = ReloadableConfig(config_file) + bdec = BigDoEverythingClass(config, options) + + d = Daemon(logfile, pidfile, bdec) + if options.daemon: + d.daemonize_exit() + else: + d.foreground() + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/tools/server-side/svnpubsub/rc.d/svnpubsub b/tools/server-side/svnpubsub/rc.d/svnpubsub new file mode 120000 index 0000000..b05e35e --- /dev/null +++ b/tools/server-side/svnpubsub/rc.d/svnpubsub @@ -0,0 +1 @@ +svnpubsub.freebsd
\ No newline at end of file diff --git a/tools/server-side/svnpubsub/rc.d/svnpubsub.debian b/tools/server-side/svnpubsub/rc.d/svnpubsub.debian new file mode 100755 index 0000000..c61057d --- /dev/null +++ b/tools/server-side/svnpubsub/rc.d/svnpubsub.debian @@ -0,0 +1,62 @@ +#!/bin/bash +### BEGIN INIT INFO +# Provides: svnpubsub +# Required-Start: $remote_fs +# Required-Stop: $remote_fs +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: SvnPubSub +# Description: start SvnPubSub daemon +#### END INIT INFO + +. /lib/init/vars.sh +. /lib/lsb/init-functions + +svnpubsub_user=${svnpubsub_user-"daemon"} +svnpubsub_group=${svnpubsub_group-"daemon"} +svnpubsub_reactor=${svnpubsub_reactor-"poll"} +svnpubsub_pidfile=${svnpubsub_pidfile-"/var/run/svnpubsub.pid"} +pidfile="${svnpubsub_pidfile}" + +TWSITD_CMD="/usr/bin/twistd -y /opt/svnpubsub/svnpubsub.tac \ + --logfile=/var/log/svnpubsub/svnpubsub.log \ + --pidfile=${pidfile} \ + --uid=${svnpubsub_user} --gid=${svnpubsub_user} \ + -r${svnpubsub_reactor}" + +RETVAL=0 + +start() { + echo "Starting SvnPubSub Server: " + $TWSITD_CMD + RETVAL=$? + [ $RETVAL -eq 0 ] && echo "ok" || echo "failed" + return $RETVAL +} + +stop() { + echo "Stopping SvnPubSub Server: " + THE_PID=`cat ${pidfile}` + kill $THE_PID + RETVAL=$? + [ $RETVAL -eq 0 ] && echo "ok" || echo "failed" + return $RETVAL +} + +case "$1" in + start) + start + ;; + stop) + stop + ;; + restart) + stop + start + ;; + *) + echo "Usage: $0 {start|stop|restart}" + exit 1 +esac + +exit $RETVAL diff --git a/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd b/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd new file mode 100755 index 0000000..79b5901 --- /dev/null +++ b/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd @@ -0,0 +1,37 @@ +#!/bin/sh +# +# PROVIDE: svnpubsub +# REQUIRE: DAEMON +# KEYWORD: shutdown + +. /etc/rc.subr + +name="svnpubsub" +rcvar=`set_rcvar` + +load_rc_config $name + +# +# DO NOT CHANGE THESE DEFAULT VALUES HERE +# SET THEM IN THE /etc/rc.conf FILE +# +svnpubsub_enable=${svnpubsub_enable-"NO"} +svnpubsub_user=${svnpubsub_user-"svn"} +svnpubsub_group=${svnpubsub_group-"svn"} +svnpubsub_reactor=${svnpubsub_reactor-"poll"} +svnpubsub_pidfile=${svnpubsub_pidfile-"/var/run/svnpubsub/svnpubsub.pid"} +svnpubsub_cmd_int=${svnpubsub_cmd_int-"python"} +pidfile="${svnpubsub_pidfile}" + +export PYTHON_EGG_CACHE="/home/svn/.python-eggs" + +command="/usr/local/bin/twistd" +command_interpreter="/usr/local/bin/${svnpubsub_cmd_int}" +command_args="-y /usr/local/svnpubsub/svnpubsub.tac \ + --logfile=/var/log/vc/svnpubsub.log \ + --pidfile=${pidfile} \ + --uid=${svnpubsub_user} --gid=${svnpubsub_user} \ + -r${svnpubsub_reactor}" + + +run_rc_command "$1" diff --git a/tools/server-side/svnpubsub/rc.d/svnpubsub.solaris b/tools/server-side/svnpubsub/rc.d/svnpubsub.solaris new file mode 100755 index 0000000..3a9cf9f --- /dev/null +++ b/tools/server-side/svnpubsub/rc.d/svnpubsub.solaris @@ -0,0 +1,53 @@ +#!/usr/bin/bash +# +# a dumb init script for twistd on solaris. cus like, writing XML for SMF is f'ing lame. +# + +svnpubsub_user=${svnpubsub_user-"daemon"} +svnpubsub_group=${svnpubsub_group-"daemon"} +svnpubsub_reactor=${svnpubsub_reactor-"poll"} +svnpubsub_pidfile=${svnpubsub_pidfile-"/var/run/svnpubsub/svnpubsub.pid"} +pidfile="${svnpubsub_pidfile}" + +TWSITD_CMD="/opt/local/bin//twistd -y /usr/local/svnpubsub/svnpubsub.tac \ + --logfile=/x1/log/svnpubsub.log \ + --pidfile=${pidfile} \ + --uid=${svnpubsub_user} --gid=${svnpubsub_user} \ + -r${svnpubsub_reactor}" + +RETVAL=0 + +start() { + echo "Starting SvnPubSub Server: " + $TWSITD_CMD + RETVAL=$? + [ $RETVAL -eq 0 ] && echo "ok" || echo "failed" + return $RETVAL +} + +stop() { + echo "Stopping SvnPubSub Server: " + THE_PID=`cat ${pidfile}` + kill $THE_PID + RETVAL=$? + [ $RETVAL -eq 0 ] && echo "ok" || echo "failed" + return $RETVAL +} + +case "$1" in + start) + start + ;; + stop) + stop + ;; + restart) + stop + start + ;; + *) + echo "Usage: $0 {start|stop|restart}" + exit 1 +esac + +exit $RETVAL diff --git a/tools/server-side/svnpubsub/rc.d/svnwcsub b/tools/server-side/svnpubsub/rc.d/svnwcsub new file mode 120000 index 0000000..310fcbe --- /dev/null +++ b/tools/server-side/svnpubsub/rc.d/svnwcsub @@ -0,0 +1 @@ +svnwcsub.freebsd
\ No newline at end of file diff --git a/tools/server-side/svnpubsub/rc.d/svnwcsub.debian b/tools/server-side/svnpubsub/rc.d/svnwcsub.debian new file mode 100755 index 0000000..caf5511 --- /dev/null +++ b/tools/server-side/svnpubsub/rc.d/svnwcsub.debian @@ -0,0 +1,65 @@ +#!/bin/bash +### BEGIN INIT INFO +# Provides: svnwcsub +# Required-Start: $remote_fs +# Required-Stop: $remote_fs +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: SvnWcSub +# Description: start SvnWcSub daemon +#### END INIT INFO + +. /lib/init/vars.sh +. /lib/lsb/init-functions + +svnwcsub_user=${svnwcsub_user-"svnwc"} +svnwcsub_group=${svnwcsub_group-"svnwc"} +svnwcsub_pidfile=${svnwcsub_pidfile-"/var/run/svnwcsub.pid"} +svnwcsub_config=${svnwcsub_config-"/etc/svnwcsub.conf"} +svnwcsub_logfile=${svnwcsub_logfile-"/var/log/svnwcsub/svnwcsub.log"} +pidfile="${svnwcsub_pidfile}" + +SVNWCSUB_CMD="/opt/svnpubsub/svnwcsub.py \ + --daemon \ + --logfile=${svnwcsub_logfile} \ + --pidfile=${pidfile} \ + --uid=${svnwcsub_user} --gid=${svnwcsub_group} \ + --umask=002 \ + ${svnwcsub_config} " + +RETVAL=0 + +start() { + echo "Starting SvnWcSub Server: " + $SVNWCSUB_CMD + RETVAL=$? + [ $RETVAL -eq 0 ] && echo "ok" || echo "failed" + return $RETVAL +} + +stop() { + echo "Stopping SvnWcSub Server: " + THE_PID=`cat ${pidfile}` + kill $THE_PID + RETVAL=$? + [ $RETVAL -eq 0 ] && echo "ok" || echo "failed" + return $RETVAL +} + +case "$1" in + start) + start + ;; + stop) + stop + ;; + restart) + stop + start + ;; + *) + echo "Usage: $0 {start|stop|restart}" + exit 1 +esac + +exit $RETVAL diff --git a/tools/server-side/svnpubsub/rc.d/svnwcsub.freebsd b/tools/server-side/svnpubsub/rc.d/svnwcsub.freebsd new file mode 100755 index 0000000..58ad386 --- /dev/null +++ b/tools/server-side/svnpubsub/rc.d/svnwcsub.freebsd @@ -0,0 +1,39 @@ +#!/bin/sh +# +# PROVIDE: svnwcsub +# REQUIRE: DAEMON +# KEYWORD: shutdown + +. /etc/rc.subr + +name="svnwcsub" +rcvar=`set_rcvar` + +load_rc_config $name + +# +# DO NOT CHANGE THESE DEFAULT VALUES HERE +# SET THEM IN THE /etc/rc.conf FILE +# +svnwcsub_enable=${svnwcsub_enable-"NO"} +svnwcsub_user=${svnwcsub_user-"svnwc"} +svnwcsub_group=${svnwcsub_group-"svnwc"} +svnwcsub_pidfile=${svnwcsub_pidfile-"/var/run/svnwcsub/svnwcsub.pub"} +svnwcsub_env="PYTHON_EGG_CACHE" +svnwcsub_cmd_int=${svnwcsub_cmd_int-"python"} +svnwcsub_config=${svnwcsub_config-"/etc/svnwcsub.conf"} +svnwcsub_logfile=${svnwcsub_logfile-"/var/log/svnwcsub/svnwcsub.log"} +pidfile="${svnwcsub_pidfile}" + +export PYTHON_EGG_CACHE="/var/run/svnwcsub" + +command="/usr/local/svnpubsub/svnwcsub.py" +command_interpreter="/usr/local/bin/${svnwcsub_cmd_int}" +command_args="--daemon \ + --logfile=${svnwcsub_logfile} \ + --pidfile=${pidfile} \ + --uid=${svnwcsub_user} --gid=${svnwcsub_group} \ + --umask=002 \ + ${svnwcsub_config}" + +run_rc_command "$1" diff --git a/tools/server-side/svnpubsub/rc.d/svnwcsub.solaris b/tools/server-side/svnpubsub/rc.d/svnwcsub.solaris new file mode 100755 index 0000000..bd0c2bd --- /dev/null +++ b/tools/server-side/svnpubsub/rc.d/svnwcsub.solaris @@ -0,0 +1,56 @@ +#!/usr/bin/bash +# +# a dumb init script for twistd on solaris. cus like, writing XML for SMF is f'ing lame. +# + +svnwcsub_user=${svnwcsub_user-"svnwc"} +svnwcsub_group=${svnwcsub_group-"other"} +svnwcsub_pidfile=${svnwcsub_pidfile-"/var/run/svnwcsub/svnwcsub.pid"} +svnwcsub_config=${svnwcsub_config-"/etc/svnwcsub.conf"} +svnwcsub_logfile=${svnwcsub_logfile-"/x1/log/svnwcsub/svnwcsub.log"} +pidfile="${svnwcsub_pidfile}" + +SVNWCSUB_CMD="/usr/local/svnpubsub/svnwcsub.py \ + --daemon \ + --logfile=${svnwcsub_logfile} \ + --pidfile=${pidfile} \ + --uid=${svnwcsub_user} --gid=${svnwcsub_group} \ + --umask=002 \ + ${svnwcsub_config}" + +RETVAL=0 + +start() { + echo "Starting SvnWcSub Server: " + $SVNWCSUB_CMD + RETVAL=$? + [ $RETVAL -eq 0 ] && echo "ok" || echo "failed" + return $RETVAL +} + +stop() { + echo "Stopping SvnWcSub Server: " + THE_PID=`cat ${pidfile}` + kill $THE_PID + RETVAL=$? + [ $RETVAL -eq 0 ] && echo "ok" || echo "failed" + return $RETVAL +} + +case "$1" in + start) + start + ;; + stop) + stop + ;; + restart) + stop + start + ;; + *) + echo "Usage: $0 {start|stop|restart}" + exit 1 +esac + +exit $RETVAL diff --git a/tools/server-side/svnpubsub/revprop-change-hook.py b/tools/server-side/svnpubsub/revprop-change-hook.py new file mode 100755 index 0000000..3aa857b --- /dev/null +++ b/tools/server-side/svnpubsub/revprop-change-hook.py @@ -0,0 +1,90 @@ +#!/usr/local/bin/python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +SVNLOOK="/usr/local/svn-install/current/bin/svnlook" +#SVNLOOK="/usr/local/bin/svnlook" + +HOST="127.0.0.1" +PORT=2069 + +import sys +try: + import simplejson as json +except ImportError: + import json + +import urllib2 + + +import svnpubsub.util + +def svnlook(cmd, **kwargs): + args = [SVNLOOK] + cmd + return svnpubsub.util.check_output(args, **kwargs) + +def svnlook_uuid(repo): + cmd = ["uuid", "--", repo] + return svnlook(cmd).strip() + +def svnlook_revprop(repo, revision, propname): + cmd = ["propget", "-r", revision, "--revprop", "--", repo, propname] + data = svnlook(cmd) + #print data + return data + +def do_put(body): + opener = urllib2.build_opener(urllib2.HTTPHandler) + request = urllib2.Request("http://%s:%d/metadata" %(HOST, PORT), data=body) + request.add_header('Content-Type', 'application/json') + request.get_method = lambda: 'PUT' + url = opener.open(request) + + +def main(repo, revision, author, propname, action): + revision = revision.lstrip('r') + if action in ('A', 'M'): + new_value = svnlook_revprop(repo, revision, propname) + elif action == 'D': + new_value = None + else: + sys.stderr.write('Unknown revprop change action "%s"\n' % action) + sys.exit(1) + if action in ('D', 'M'): + old_value = sys.stdin.read() + else: + old_value = None + data = {'type': 'svn', + 'format': 1, + 'id': int(revision), + 'repository': svnlook_uuid(repo), + 'revprop': { + 'name': propname, + 'committer': author, + 'value': new_value, + 'old_value': old_value, + } + } + body = json.dumps(data) + do_put(body) + +if __name__ == "__main__": + if len(sys.argv) != 6: + sys.stderr.write("invalid args\n") + sys.exit(1) + + main(*sys.argv[1:6]) diff --git a/tools/server-side/svnpubsub/svnpubsub.tac b/tools/server-side/svnpubsub/svnpubsub.tac new file mode 100644 index 0000000..574ad24 --- /dev/null +++ b/tools/server-side/svnpubsub/svnpubsub.tac @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys +import os +from twisted.application import service, internet + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from svnpubsub.server import svnpubsub_server + +application = service.Application("SvnPubSub") + +def get_service(): + return internet.TCPServer(2069, svnpubsub_server()) + +service = get_service() +service.setServiceParent(application) diff --git a/tools/server-side/svnpubsub/svnpubsub/__init__.py b/tools/server-side/svnpubsub/svnpubsub/__init__.py new file mode 100644 index 0000000..f50e195 --- /dev/null +++ b/tools/server-side/svnpubsub/svnpubsub/__init__.py @@ -0,0 +1 @@ +# Turn svnpubsub/ into a package. diff --git a/tools/server-side/svnpubsub/svnpubsub/client.py b/tools/server-side/svnpubsub/svnpubsub/client.py new file mode 100644 index 0000000..871a5e9 --- /dev/null +++ b/tools/server-side/svnpubsub/svnpubsub/client.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# Generic client for SvnPubSub +# +# ### usage... +# +# +# EVENTS +# +# connected: a connection to the server has been opened (though not +# necessarily established) +# closed: the connection was closed. reconnect will be attempted. +# error: an error closed the connection. reconnect will be attempted. +# ping: the server has sent a keepalive +# stale: no activity has been seen, so the connection will be closed +# and reopened +# + +import asyncore +import asynchat +import socket +import functools +import time +import json +try: + import urlparse +except ImportError: + import urllib.parse as urlparse + +# How long the polling loop should wait for activity before returning. +TIMEOUT = 30.0 + +# Always delay a bit when trying to reconnect. This is not precise, but sets +# a minimum amount of delay. At the moment, there is no further backoff. +RECONNECT_DELAY = 25.0 + +# If we don't see anything from the server for this amount time, then we +# will drop and reconnect. The TCP connection may have gone down without +# us noticing it somehow. +STALE_DELAY = 60.0 + + +class SvnpubsubClientException(Exception): + pass + +class Client(asynchat.async_chat): + + def __init__(self, url, commit_callback, event_callback, + metadata_callback = None): + asynchat.async_chat.__init__(self) + + self.last_activity = time.time() + self.ibuffer = [] + + self.url = url + parsed_url = urlparse.urlsplit(url) + if parsed_url.scheme != 'http': + raise ValueError("URL scheme must be http: '%s'" % url) + host = parsed_url.hostname + port = parsed_url.port + resource = parsed_url.path + if parsed_url.query: + resource += "?%s" % parsed_url.query + if parsed_url.fragment: + resource += "#%s" % parsed_url.fragment + + self.event_callback = event_callback + + self.parser = JSONRecordHandler(commit_callback, event_callback, + metadata_callback) + + # Wait for the end of headers. Then we start parsing JSON. + self.set_terminator(b'\r\n\r\n') + self.skipping_headers = True + + self.create_socket(socket.AF_INET, socket.SOCK_STREAM) + try: + self.connect((host, port)) + except: + self.handle_error() + return + + self.push(('GET %s HTTP/1.0\r\n\r\n' % resource).encode('ascii')) + + def handle_connect(self): + self.event_callback('connected', None) + + def handle_close(self): + self.event_callback('closed', None) + self.close() + + def handle_error(self): + self.event_callback('error', None) + self.close() + + def found_terminator(self): + if self.skipping_headers: + self.skipping_headers = False + # Each JSON record is terminated by a null character + self.set_terminator(b'\0') + else: + record = b"".join(self.ibuffer) + self.ibuffer = [] + self.parser.feed(record.decode()) + + def collect_incoming_data(self, data): + # Remember the last time we saw activity + self.last_activity = time.time() + + if not self.skipping_headers: + self.ibuffer.append(data) + + +class Notification(object): + def __init__(self, data): + self.__dict__.update(data) + +class Commit(Notification): + KIND = 'COMMIT' + +class Metadata(Notification): + KIND = 'METADATA' + + +class JSONRecordHandler: + def __init__(self, commit_callback, event_callback, metadata_callback): + self.commit_callback = commit_callback + self.event_callback = event_callback + self.metadata_callback = metadata_callback + + EXPECTED_VERSION = 1 + + def feed(self, record): + obj = json.loads(record) + if 'svnpubsub' in obj: + actual_version = obj['svnpubsub'].get('version') + if actual_version != self.EXPECTED_VERSION: + raise SvnpubsubClientException( + "Unknown svnpubsub format: %r != %d" + % (actual_version, self.EXPECTED_VERSION)) + self.event_callback('version', obj['svnpubsub']['version']) + elif 'commit' in obj: + commit = Commit(obj['commit']) + self.commit_callback(commit) + elif 'stillalive' in obj: + self.event_callback('ping', obj['stillalive']) + elif 'metadata' in obj and self.metadata_callback: + metadata = Metadata(obj['metadata']) + self.metadata_callback(metadata) + + +class MultiClient(object): + def __init__(self, urls, commit_callback, event_callback, + metadata_callback = None): + self.commit_callback = commit_callback + self.event_callback = event_callback + self.metadata_callback = metadata_callback + + # No target time, as no work to do + self.target_time = 0 + self.work_items = [ ] + + for url in urls: + self._add_channel(url) + + def _reconnect(self, url, event_name, event_arg): + if event_name == 'closed' or event_name == 'error': + # Stupid connection closed for some reason. Set up a reconnect. Note + # that it should have been removed from asyncore.socket_map already. + self._reconnect_later(url) + + # Call the user's callback now. + self.event_callback(url, event_name, event_arg) + + def _reconnect_later(self, url): + # Set up a work item to reconnect in a little while. + self.work_items.append(url) + + # Only set a target if one has not been set yet. Otherwise, we could + # create a race condition of continually moving out towards the future + if not self.target_time: + self.target_time = time.time() + RECONNECT_DELAY + + def _add_channel(self, url): + # Simply instantiating the client will install it into the global map + # for processing in the main event loop. + if self.metadata_callback: + Client(url, + functools.partial(self.commit_callback, url), + functools.partial(self._reconnect, url), + functools.partial(self.metadata_callback, url)) + else: + Client(url, + functools.partial(self.commit_callback, url), + functools.partial(self._reconnect, url)) + + def _check_stale(self): + now = time.time() + for client in asyncore.socket_map.values(): + if client.last_activity + STALE_DELAY < now: + # Whoops. No activity in a while. Signal this fact, Close the + # Client, then have it reconnected later on. + self.event_callback(client.url, 'stale', client.last_activity) + + # This should remove it from .socket_map. + client.close() + + self._reconnect_later(client.url) + + def _maybe_work(self): + # If we haven't reach the targetted time, or have no work to do, + # then fast-path exit + if time.time() < self.target_time or not self.work_items: + return + + # We'll take care of all the work items, so no target for future work + self.target_time = 0 + + # Play a little dance just in case work gets added while we're + # currently working on stuff + work = self.work_items + self.work_items = [ ] + + for url in work: + self._add_channel(url) + + def run_forever(self): + while True: + if asyncore.socket_map: + asyncore.loop(timeout=TIMEOUT, count=1) + else: + time.sleep(TIMEOUT) + + self._check_stale() + self._maybe_work() diff --git a/tools/server-side/svnpubsub/svnpubsub/server.py b/tools/server-side/svnpubsub/svnpubsub/server.py new file mode 100644 index 0000000..d0cdff9 --- /dev/null +++ b/tools/server-side/svnpubsub/svnpubsub/server.py @@ -0,0 +1,289 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# SvnPubSub - Simple Push Notification of Subversion commits +# +# Based on the theory behind the Live Journal Atom Streaming Service: +# <http://atom.services.livejournal.com/> +# +# Instead of using a complicated XMPP/AMPQ/JMS/super messaging service, +# we have simple HTTP GETs and PUTs to get data in and out. +# +# Currently supports JSON serialization. +# +# Example Sub clients: +# curl -sN http://127.0.0.1:2069/commits +# curl -sN 'http://127.0.0.1:2069/commits/svn/*' +# curl -sN http://127.0.0.1:2069/commits/svn +# curl -sN 'http://127.0.0.1:2069/commits/*/13f79535-47bb-0310-9956-ffa450edef68' +# curl -sN http://127.0.0.1:2069/commits/svn/13f79535-47bb-0310-9956-ffa450edef68 +# +# curl -sN http://127.0.0.1:2069/metadata +# curl -sN 'http://127.0.0.1:2069/metadata/svn/*' +# curl -sN http://127.0.0.1:2069/metadata/svn +# curl -sN 'http://127.0.0.1:2069/metadata/*/13f79535-47bb-0310-9956-ffa450edef68' +# curl -sN http://127.0.0.1:2069/metadata/svn/13f79535-47bb-0310-9956-ffa450edef68 +# +# URLs are constructed from 3 parts: +# /${notification}/${optional_type}/${optional_repository} +# +# Notifications can be sent for commits or metadata (e.g., revprop) changes. +# If the type is included in the URL, you will only get notifications of that type. +# The type can be * and then you will receive notifications of any type. +# +# If the repository is included in the URL, you will only receive +# messages about that repository. The repository can be * and then you +# will receive messages about all repositories. +# +# Example Pub clients: +# curl -T revinfo.json -i http://127.0.0.1:2069/commits +# +# TODO: +# - Add Real access controls (not just 127.0.0.1) +# - Document PUT format +# - Convert to twisted.python.log + + + + +try: + import simplejson as json +except ImportError: + import json + +import sys + +import twisted +from twisted.internet import reactor +from twisted.internet import defer +from twisted.web import server +from twisted.web import resource +from twisted.python import log + +import time + +class Notification(object): + def __init__(self, r): + self.__dict__.update(r) + if not self.check_value('repository'): + raise ValueError('Invalid Repository Value') + if not self.check_value('type'): + raise ValueError('Invalid Type Value') + if not self.check_value('format'): + raise ValueError('Invalid Format Value') + if not self.check_value('id'): + raise ValueError('Invalid ID Value') + + def check_value(self, k): + return hasattr(self, k) and self.__dict__[k] + + def render(self): + raise NotImplementedError + + def render_log(self): + raise NotImplementedError + +class Commit(Notification): + KIND = 'COMMIT' + + def render(self): + obj = {'commit': {}} + obj['commit'].update(self.__dict__) + return json.dumps(obj) + + def render_log(self): + try: + paths_changed = " %d paths changed" % len(self.changed) + except: + paths_changed = "" + return "commit %s:%s repo '%s' id '%s'%s" % ( + self.type, self.format, self.repository, self.id, + paths_changed) + +class Metadata(Notification): + KIND = 'METADATA' + + def render(self): + obj = {'metadata': {}} + obj['metadata'].update(self.__dict__) + return json.dumps(obj) + + def render_log(self): + return "metadata %s:%s repo '%s' id '%s' revprop '%s'" % ( + self.type, self.format, self.repository, self.id, + self.revprop['name']) + + +HEARTBEAT_TIME = 15 + +class Client(object): + def __init__(self, pubsub, r, kind, type, repository): + self.pubsub = pubsub + r.notifyFinish().addErrback(self.finished) + self.r = r + self.kind = kind + self.type = type + self.repository = repository + self.alive = True + log.msg("OPEN: %s:%d (%d clients online)"% (r.getClientIP(), r.client.port, pubsub.cc()+1)) + + def finished(self, reason): + self.alive = False + log.msg("CLOSE: %s:%d (%d clients online)"% (self.r.getClientIP(), self.r.client.port, self.pubsub.cc())) + try: + self.pubsub.remove(self) + except ValueError: + pass + + def interested_in(self, notification): + if self.kind != notification.KIND: + return False + + if self.type and self.type != notification.type: + return False + + if self.repository and self.repository != notification.repository: + return False + + return True + + def notify(self, data): + self.write(data) + + def start(self): + self.write_start() + reactor.callLater(HEARTBEAT_TIME, self.heartbeat, None) + + def heartbeat(self, args): + if self.alive: + self.write_heartbeat() + reactor.callLater(HEARTBEAT_TIME, self.heartbeat, None) + + def write_data(self, data): + self.write(data + "\n\0") + + """ "Data must not be unicode" is what the interfaces.ITransport says... grr. """ + def write(self, input): + self.r.write(str(input)) + + def write_start(self): + self.r.setHeader('X-SVNPubSub-Version', '1') + self.r.setHeader('content-type', 'application/vnd.apache.vc-notify+json') + self.write('{"svnpubsub": {"version": 1}}\n\0') + + def write_heartbeat(self): + self.write(json.dumps({"stillalive": time.time()}) + "\n\0") + + +class SvnPubSub(resource.Resource): + isLeaf = True + clients = [] + + __notification_uri_map = {'commits': Commit.KIND, + 'metadata': Metadata.KIND} + + def __init__(self, notification_class): + resource.Resource.__init__(self) + self.__notification_class = notification_class + + def cc(self): + return len(self.clients) + + def remove(self, c): + self.clients.remove(c) + + def render_GET(self, request): + log.msg("REQUEST: %s" % (request.uri)) + request.setHeader('content-type', 'text/plain') + + repository = None + type = None + + uri = request.uri.split('/') + uri_len = len(uri) + if uri_len < 2 or uri_len > 4: + request.setResponseCode(400) + return "Invalid path\n" + + kind = self.__notification_uri_map.get(uri[1], None) + if kind is None: + request.setResponseCode(400) + return "Invalid path\n" + + if uri_len >= 3: + type = uri[2] + + if uri_len == 4: + repository = uri[3] + + # Convert wild card to None. + if type == '*': + type = None + if repository == '*': + repository = None + + c = Client(self, request, kind, type, repository) + self.clients.append(c) + c.start() + return twisted.web.server.NOT_DONE_YET + + def notifyAll(self, notification): + data = notification.render() + + log.msg("%s: %s (%d clients)" + % (notification.KIND, notification.render_log(), self.cc())) + for client in self.clients: + if client.interested_in(notification): + client.write_data(data) + + def render_PUT(self, request): + request.setHeader('content-type', 'text/plain') + ip = request.getClientIP() + if ip != "127.0.0.1": + request.setResponseCode(401) + return "Access Denied" + input = request.content.read() + #import pdb;pdb.set_trace() + #print "input: %s" % (input) + try: + data = json.loads(input) + notification = self.__notification_class(data) + except ValueError as e: + request.setResponseCode(400) + errstr = str(e) + log.msg("%s: failed due to: %s" % (notification.KIND, errstr)) + return errstr + self.notifyAll(notification) + return "Ok" + + +def svnpubsub_server(): + root = resource.Resource() + c = SvnPubSub(Commit) + m = SvnPubSub(Metadata) + root.putChild('commits', c) + root.putChild('metadata', m) + return server.Site(root) + +if __name__ == "__main__": + log.startLogging(sys.stdout) + # Port 2069 "HTTP Event Port", whatever, sounds good to me + reactor.listenTCP(2069, svnpubsub_server()) + reactor.run() + diff --git a/tools/server-side/svnpubsub/svnpubsub/util.py b/tools/server-side/svnpubsub/svnpubsub/util.py new file mode 100644 index 0000000..e254f8b --- /dev/null +++ b/tools/server-side/svnpubsub/svnpubsub/util.py @@ -0,0 +1,36 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import subprocess as __subprocess + +# check_output() is only available in Python 2.7. Allow us to run with +# earlier versions +try: + __check_output = __subprocess.check_output + def check_output(args, env=None, universal_newlines=False): + return __check_output(args, shell=False, env=env, + universal_newlines=universal_newlines) +except AttributeError: + def check_output(args, env=None, universal_newlines=False): + # note: we only use these three args + pipe = __subprocess.Popen(args, shell=False, env=env, + stdout=__subprocess.PIPE, + universal_newlines=universal_newlines) + output, _ = pipe.communicate() + if pipe.returncode: + raise subprocess.CalledProcessError(pipe.returncode, args) + return output diff --git a/tools/server-side/svnpubsub/svntweet.py b/tools/server-side/svnpubsub/svntweet.py new file mode 100755 index 0000000..eae8e9a --- /dev/null +++ b/tools/server-side/svnpubsub/svntweet.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# SvnTweet - Subscribe to a SvnPubSub stream, and Twitter about it! +# +# Example: +# svntweet.py my-config.json +# +# With my-config.json containing stream paths and the twitter auth info: +# {"stream": "http://svn.apache.org:2069/commits", +# "username": "asfcommits", +# "password": "MyLuggageComboIs1234"} +# +# +# + +import threading +import sys +import os +try: + import simplejson as json +except ImportError: + import json + +from twisted.internet import defer, reactor, task, threads +from twisted.python import failure, log +from twisted.web.client import HTTPClientFactory, HTTPPageDownloader + +try: + # Python >=3.0 + from urllib.parse import urlparse +except ImportError: + # Python <3.0 + from urlparse import urlparse + +import time +import posixpath + +sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "twitty-twister", "lib")) +try: + import twitter +except: + print("Get a copy of twitty-twister from <http://github.com/dustin/twitty-twister>") + sys.exit(-1) +class Config(object): + def __init__(self, path): + self.path = path + self.mtime_path = 0 + self.config = {} + self._load_config() + + def _load_config(self): + mtime = os.path.getmtime(self.path) + if mtime != self.mtime_path: + fp = open(self.path, "rb") + self.mtime_path = mtime + self.config = json.loads(fp.read()) + +class HTTPStream(HTTPClientFactory): + protocol = HTTPPageDownloader + + def __init__(self, url): + HTTPClientFactory.__init__(self, url, method="GET", agent="SvnTweet/0.1.0") + + def pageStart(self, partial): + pass + + def pagePart(self, data): + pass + + def pageEnd(self): + pass + +class Commit(object): + def __init__(self, commit): + self.__dict__.update(commit) + +class JSONRecordHandler: + def __init__(self, bdec): + self.bdec = bdec + + def feed(self, record): + obj = json.loads(record) + if 'svnpubsub' in obj: + actual_version = obj['svnpubsub'].get('version') + EXPECTED_VERSION = 1 + if actual_version != EXPECTED_VERSION: + raise ValueException("Unknown svnpubsub format: %r != %d" + % (actual_format, expected_format)) + elif 'commit' in obj: + commit = Commit(obj['commit']) + if not hasattr(commit, 'type'): + raise ValueException("Commit object is missing type field.") + if not hasattr(commit, 'format'): + raise ValueException("Commit object is missing format field.") + if commit.type != 'svn' and commit.format != 1: + raise ValueException("Unexpected type and/or format: %s:%s" + % (commit.type, commit.format)) + self.bdec.commit(commit) + elif 'stillalive' in obj: + self.bdec.stillalive() + +class JSONHTTPStream(HTTPStream): + def __init__(self, url, bdec): + HTTPStream.__init__(self, url) + self.bdec = bdec + self.ibuffer = [] + self.parser = JSONRecordHandler(bdec) + + def pageStart(self, partial): + self.bdec.pageStart() + + def pagePart(self, data): + eor = data.find("\0") + if eor >= 0: + self.ibuffer.append(data[0:eor]) + self.parser.feed(''.join(self.ibuffer)) + self.ibuffer = [data[eor+1:]] + else: + self.ibuffer.append(data) + +def connectTo(url, bdec): + u = urlparse(url) + port = u.port + if not port: + port = 80 + s = JSONHTTPStream(url, bdec) + conn = reactor.connectTCP(u.hostname, u.port, s) + return [s, conn] + + +CHECKBEAT_TIME = 90 + +class BigDoEverythingClasss(object): + def __init__(self, config): + self.c = config + self.c._load_config() + self.url = str(self.c.config.get('stream')) + self.failures = 0 + self.alive = time.time() + self.checker = task.LoopingCall(self._checkalive) + self.transport = None + self.stream = None + self._restartStream() + self.watch = [] + self.twit = twitter.Twitter(self.c.config.get('username'), self.c.config.get('password')) + + def pageStart(self): + log.msg("Stream Connection Established") + self.failures = 0 + + def _restartStream(self): + (self.stream, self.transport) = connectTo(self.url, self) + self.stream.deferred.addBoth(self.streamDead) + self.alive = time.time() + self.checker.start(CHECKBEAT_TIME) + + def _checkalive(self): + n = time.time() + if n - self.alive > CHECKBEAT_TIME: + log.msg("Stream is dead, reconnecting") + self.transport.disconnect() + + def stillalive(self): + self.alive = time.time() + + def streamDead(self, v): + BACKOFF_SECS = 5 + BACKOFF_MAX = 60 + self.checker.stop() + + self.stream = None + self.failures += 1 + backoff = min(self.failures * BACKOFF_SECS, BACKOFF_MAX) + log.msg("Stream disconnected, trying again in %d seconds.... %s" % (backoff, self.url)) + reactor.callLater(backoff, self._restartStream) + + def _normalize_path(self, path): + if path[0] != '/': + return "/" + path + return posixpath.abspath(path) + + def tweet(self, msg): + log.msg("SEND TWEET: %s" % (msg)) + self.twit.update(msg).addCallback(self.tweet_done).addErrback(log.msg) + + def tweet_done(self, x): + log.msg("TWEET: Success!") + + def build_tweet(self, commit): + maxlen = 144 + left = maxlen + paths = map(self._normalize_path, commit.changed) + if not len(paths): + return None + path = posixpath.commonprefix(paths) + if path[0:1] == '/' and len(path) > 1: + path = path[1:] + + #TODO: allow URL to be configurable. + link = " - http://svn.apache.org/r%d" % (commit.id) + left -= len(link) + msg = "r%d in %s by %s: " % (commit.id, path, commit.committer) + left -= len(msg) + if left > 3: + msg += commit.log[0:left] + msg += link + return msg + + def commit(self, commit): + log.msg("COMMIT r%d (%d paths)" % (commit.id, len(commit.changed))) + msg = self.build_tweet(commit) + if msg: + self.tweet(msg) + #print "Common Prefix: %s" % (pre) + +def main(config_file): + c = Config(config_file) + big = BigDoEverythingClasss(c) + reactor.run() + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("invalid args, read source code") + sys.exit(0) + log.startLogging(sys.stdout) + main(sys.argv[1]) diff --git a/tools/server-side/svnpubsub/svnwcsub.conf.example b/tools/server-side/svnpubsub/svnwcsub.conf.example new file mode 100644 index 0000000..644a3b7 --- /dev/null +++ b/tools/server-side/svnpubsub/svnwcsub.conf.example @@ -0,0 +1,16 @@ +[DEFAULT] +svnbin: /usr/local/bin/svn +streams: http://svn.example.org:2069/commits/svn +# hook: /usr/bin/true + +## The values below are used by ConfigParser's interpolation syntax. +## See http://docs.python.org/library/configparser +SOME_REPOS: svn://svn.example.org/repos/chaos + +[env] +HOME: /home/svn +LANG: en_US.UTF-8 + +[track] +/usr/local/foo/prod: %(SOME_REPOS)s/foo/production +/usr/local/foo/dev: %(SOME_REPOS)s/foo/trunk diff --git a/tools/server-side/svnpubsub/svnwcsub.py b/tools/server-side/svnpubsub/svnwcsub.py new file mode 100755 index 0000000..1a65b37 --- /dev/null +++ b/tools/server-side/svnpubsub/svnwcsub.py @@ -0,0 +1,559 @@ +#!/usr/bin/env python +# encoding: UTF-8 +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# SvnWcSub - Subscribe to a SvnPubSub stream, and keep a set of working copy +# paths in sync +# +# Example: +# svnwcsub.py svnwcsub.conf +# +# On startup svnwcsub checks the working copy's path, runs a single svn update +# and then watches for changes to that path. +# +# See svnwcsub.conf for more information on its contents. +# + +# TODO: +# - bulk update at startup time to avoid backlog warnings +# - fold BDEC into Daemon +# - fold WorkingCopy._get_match() into __init__ +# - remove wc_ready(). assume all WorkingCopy instances are usable. +# place the instances into .watch at creation. the .update_applies() +# just returns if the wc is disabled (eg. could not find wc dir) +# - figure out way to avoid the ASF-specific PRODUCTION_RE_FILTER +# (a base path exclusion list should work for the ASF) +# - add support for SIGHUP to reread the config and reinitialize working copies +# - joes will write documentation for svnpubsub as these items become fulfilled +# - make LOGLEVEL configurable + +import errno +import subprocess +import threading +import sys +import stat +import os +import re +import posixpath +try: + import ConfigParser +except ImportError: + import configparser as ConfigParser +import time +import logging.handlers +try: + import Queue +except ImportError: + import queue as Queue +import optparse +import functools +try: + import urlparse +except ImportError: + import urllib.parse as urlparse + +import daemonize +import svnpubsub.client +import svnpubsub.util + +assert hasattr(subprocess, 'check_call') +def check_call(*args, **kwds): + """Wrapper around subprocess.check_call() that logs stderr upon failure, + with an optional list of exit codes to consider non-failure.""" + assert 'stderr' not in kwds + if '__okayexits' in kwds: + __okayexits = kwds['__okayexits'] + del kwds['__okayexits'] + else: + __okayexits = set([0]) # EXIT_SUCCESS + kwds.update(stderr=subprocess.PIPE) + pipe = subprocess.Popen(*args, **kwds) + output, errput = pipe.communicate() + if pipe.returncode not in __okayexits: + cmd = args[0] if len(args) else kwds.get('args', '(no command)') + # TODO: log stdout too? + logging.error('Command failed: returncode=%d command=%r stderr=%r', + pipe.returncode, cmd, errput) + raise subprocess.CalledProcessError(pipe.returncode, args) + return pipe.returncode # is EXIT_OK + +### note: this runs synchronously. within the current Twisted environment, +### it is called from ._get_match() which is run on a thread so it won't +### block the Twisted main loop. +def svn_info(svnbin, env, path): + "Run 'svn info' on the target path, returning a dict of info data." + args = [svnbin, "info", "--non-interactive", "--", path] + output = svnpubsub.util.check_output(args, env=env).strip() + info = { } + for line in output.split('\n'): + idx = line.index(':') + info[line[:idx]] = line[idx+1:].strip() + return info + +try: + import glob + glob.iglob + def is_emptydir(path): + # ### If the directory contains only dotfile children, this will readdir() + # ### the entire directory. But os.readdir() is not exposed to us... + for x in glob.iglob('%s/*' % path): + return False + for x in glob.iglob('%s/.*' % path): + return False + return True +except (ImportError, AttributeError): + # Python ≤2.4 + def is_emptydir(path): + # This will read the entire directory list to memory. + return not os.listdir(path) + +class WorkingCopy(object): + def __init__(self, bdec, path, url): + self.path = path + self.url = url + + try: + self.match, self.uuid = self._get_match(bdec.svnbin, bdec.env) + bdec.wc_ready(self) + except: + logging.exception('problem with working copy: %s', path) + + def update_applies(self, uuid, path): + if self.uuid != uuid: + return False + + path = str(path) + if path == self.match: + #print "ua: Simple match" + # easy case. woo. + return True + if len(path) < len(self.match): + # path is potentially a parent directory of match? + #print "ua: parent check" + if self.match[0:len(path)] == path: + return True + if len(path) > len(self.match): + # path is potentially a sub directory of match + #print "ua: sub dir check" + if path[0:len(self.match)] == self.match: + return True + return False + + def _get_match(self, svnbin, env): + ### quick little hack to auto-checkout missing working copies + dotsvn = os.path.join(self.path, ".svn") + if not os.path.isdir(dotsvn) or is_emptydir(dotsvn): + logging.info("autopopulate %s from %s" % (self.path, self.url)) + check_call([svnbin, 'co', '-q', + '--force', + '--non-interactive', + '--config-option', + 'config:miscellany:use-commit-times=on', + '--', self.url, self.path], + env=env) + + # Fetch the info for matching dirs_changed against this WC + info = svn_info(svnbin, env, self.path) + root = info['Repository Root'] + url = info['URL'] + relpath = url[len(root):] # also has leading '/' + uuid = info['Repository UUID'] + return str(relpath), uuid + + +PRODUCTION_RE_FILTER = re.compile("/websites/production/[^/]+/") + +class BigDoEverythingClasss(object): + def __init__(self, config): + self.svnbin = config.get_value('svnbin') + self.env = config.get_env() + self.tracking = config.get_track() + self.hook = config.get_optional_value('hook') + self.streams = config.get_value('streams').split() + self.worker = BackgroundWorker(self.svnbin, self.env, self.hook) + self.watch = [ ] + + def start(self): + for path, url in self.tracking.items(): + # working copies auto-register with the BDEC when they are ready. + WorkingCopy(self, path, url) + + def wc_ready(self, wc): + # called when a working copy object has its basic info/url, + # Add it to our watchers, and trigger an svn update. + logging.info("Watching WC at %s <-> %s" % (wc.path, wc.url)) + self.watch.append(wc) + self.worker.add_work(OP_BOOT, wc) + + def _normalize_path(self, path): + if path[0] != '/': + return "/" + path + return posixpath.abspath(path) + + def commit(self, url, commit): + if commit.type != 'svn' or commit.format != 1: + logging.info("SKIP unknown commit format (%s.%d)", + commit.type, commit.format) + return + logging.info("COMMIT r%d (%d paths) from %s" + % (commit.id, len(commit.changed), url)) + + paths = map(self._normalize_path, commit.changed) + if len(paths): + pre = posixpath.commonprefix(paths) + if pre == "/websites/": + # special case for svnmucc "dynamic content" buildbot commits + # just take the first production path to avoid updating all cms working copies + for p in paths: + m = PRODUCTION_RE_FILTER.match(p) + if m: + pre = m.group(0) + break + + #print "Common Prefix: %s" % (pre) + wcs = [wc for wc in self.watch if wc.update_applies(commit.repository, pre)] + logging.info("Updating %d WC for r%d" % (len(wcs), commit.id)) + for wc in wcs: + self.worker.add_work(OP_UPDATE, wc) + + +# Start logging warnings if the work backlog reaches this many items +BACKLOG_TOO_HIGH = 20 +OP_BOOT = 'boot' +OP_UPDATE = 'update' +OP_CLEANUP = 'cleanup' + +class BackgroundWorker(threading.Thread): + def __init__(self, svnbin, env, hook): + threading.Thread.__init__(self) + + # The main thread/process should not wait for this thread to exit. + ### compat with Python 2.5 + self.setDaemon(True) + + self.svnbin = svnbin + self.env = env + self.hook = hook + self.q = Queue.Queue() + + self.has_started = False + + def run(self): + while True: + # This will block until something arrives + operation, wc = self.q.get() + + # Warn if the queue is too long. + # (Note: the other thread might have added entries to self.q + # after the .get() and before the .qsize().) + qsize = self.q.qsize()+1 + if operation != OP_BOOT and qsize > BACKLOG_TOO_HIGH: + logging.warn('worker backlog is at %d', qsize) + + try: + if operation == OP_UPDATE: + self._update(wc) + elif operation == OP_BOOT: + self._update(wc, boot=True) + elif operation == OP_CLEANUP: + self._cleanup(wc) + else: + logging.critical('unknown operation: %s', operation) + except: + logging.exception('exception in worker') + + # In case we ever want to .join() against the work queue + self.q.task_done() + + def add_work(self, operation, wc): + # Start the thread when work first arrives. Thread-start needs to + # be delayed in case the process forks itself to become a daemon. + if not self.has_started: + self.start() + self.has_started = True + + self.q.put((operation, wc)) + + def _update(self, wc, boot=False): + "Update the specified working copy." + + # For giggles, let's clean up the working copy in case something + # happened earlier. + self._cleanup(wc) + + logging.info("updating: %s", wc.path) + + ## Run the hook + HEAD = svn_info(self.svnbin, self.env, wc.url)['Revision'] + if self.hook: + hook_mode = ['pre-update', 'pre-boot'][boot] + logging.info('running hook: %s at %s', + wc.path, hook_mode) + args = [self.hook, hook_mode, wc.path, HEAD, wc.url] + rc = check_call(args, env=self.env, __okayexits=[0, 1]) + if rc == 1: + # TODO: log stderr + logging.warn('hook denied update of %s at %s', + wc.path, hook_mode) + return + del rc + + ### we need to move some of these args into the config. these are + ### still specific to the ASF setup. + args = [self.svnbin, 'switch', + '--quiet', + '--non-interactive', + '--trust-server-cert', + '--ignore-externals', + '--config-option', + 'config:miscellany:use-commit-times=on', + '--', + wc.url + '@' + HEAD, + wc.path] + check_call(args, env=self.env) + + ### check the loglevel before running 'svn info'? + info = svn_info(self.svnbin, self.env, wc.path) + assert info['Revision'] == HEAD + logging.info("updated: %s now at r%s", wc.path, info['Revision']) + + ## Run the hook + if self.hook: + hook_mode = ['post-update', 'boot'][boot] + logging.info('running hook: %s at revision %s due to %s', + wc.path, info['Revision'], hook_mode) + args = [self.hook, hook_mode, + wc.path, info['Revision'], wc.url] + check_call(args, env=self.env) + + def _cleanup(self, wc): + "Run a cleanup on the specified working copy." + + ### we need to move some of these args into the config. these are + ### still specific to the ASF setup. + args = [self.svnbin, 'cleanup', + '--non-interactive', + '--trust-server-cert', + '--config-option', + 'config:miscellany:use-commit-times=on', + wc.path] + check_call(args, env=self.env) + + +class ReloadableConfig(ConfigParser.SafeConfigParser): + def __init__(self, fname): + ConfigParser.SafeConfigParser.__init__(self) + + self.fname = fname + self.read(fname) + + ### install a signal handler to set SHOULD_RELOAD. BDEC should + ### poll this flag, and then adjust its internal structures after + ### the reload. + self.should_reload = False + + def reload(self): + # Delete everything. Just re-reading would overlay, and would not + # remove sections/options. Note that [DEFAULT] will not be removed. + for section in self.sections(): + self.remove_section(section) + + # Now re-read the configuration file. + self.read(fname) + + def get_value(self, which): + return self.get(ConfigParser.DEFAULTSECT, which) + + def get_optional_value(self, which, default=None): + if self.has_option(ConfigParser.DEFAULTSECT, which): + return self.get(ConfigParser.DEFAULTSECT, which) + else: + return default + + def get_env(self): + env = os.environ.copy() + default_options = self.defaults().keys() + for name, value in self.items('env'): + if name not in default_options: + env[name] = value + return env + + def get_track(self): + "Return the {PATH: URL} dictionary of working copies to track." + track = dict(self.items('track')) + for name in self.defaults().keys(): + del track[name] + return track + + def optionxform(self, option): + # Do not lowercase the option name. + return str(option) + + +class Daemon(daemonize.Daemon): + def __init__(self, logfile, pidfile, umask, bdec): + daemonize.Daemon.__init__(self, logfile, pidfile) + + self.umask = umask + self.bdec = bdec + + def setup(self): + # There is no setup which the parent needs to wait for. + pass + + def run(self): + logging.info('svnwcsub started, pid=%d', os.getpid()) + + # Set the umask in the daemon process. Defaults to 000 for + # daemonized processes. Foreground processes simply inherit + # the value from the parent process. + if self.umask is not None: + umask = int(self.umask, 8) + os.umask(umask) + logging.info('umask set to %03o', umask) + + # Start the BDEC (on the main thread), then start the client + self.bdec.start() + + mc = svnpubsub.client.MultiClient(self.bdec.streams, + self.bdec.commit, + self._event) + mc.run_forever() + + def _event(self, url, event_name, event_arg): + if event_name == 'error': + logging.exception('from %s', url) + elif event_name == 'ping': + logging.debug('ping from %s', url) + else: + logging.info('"%s" from %s', event_name, url) + + +def prepare_logging(logfile): + "Log to the specified file, or to stdout if None." + + if logfile: + # Rotate logs daily, keeping 7 days worth. + handler = logging.handlers.TimedRotatingFileHandler( + logfile, when='midnight', backupCount=7, + ) + else: + handler = logging.StreamHandler(sys.stdout) + + # Add a timestamp to the log records + formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(message)s', + '%Y-%m-%d %H:%M:%S') + handler.setFormatter(formatter) + + # Apply the handler to the root logger + root = logging.getLogger() + root.addHandler(handler) + + ### use logging.INFO for now. switch to cmdline option or a config? + root.setLevel(logging.INFO) + + +def handle_options(options): + # Set up the logging, then process the rest of the options. + prepare_logging(options.logfile) + + # In daemon mode, we let the daemonize module handle the pidfile. + # Otherwise, we should write this (foreground) PID into the file. + if options.pidfile and not options.daemon: + pid = os.getpid() + # Be wary of symlink attacks + try: + os.remove(options.pidfile) + except OSError: + pass + fd = os.open(options.pidfile, os.O_WRONLY | os.O_CREAT | os.O_EXCL, + stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) + os.write(fd, '%d\n' % pid) + os.close(fd) + logging.info('pid %d written to %s', pid, options.pidfile) + + if options.gid: + try: + gid = int(options.gid) + except ValueError: + import grp + gid = grp.getgrnam(options.gid)[2] + logging.info('setting gid %d', gid) + os.setgid(gid) + + if options.uid: + try: + uid = int(options.uid) + except ValueError: + import pwd + uid = pwd.getpwnam(options.uid)[2] + logging.info('setting uid %d', uid) + os.setuid(uid) + + +def main(args): + parser = optparse.OptionParser( + description='An SvnPubSub client to keep working copies synchronized ' + 'with a repository.', + usage='Usage: %prog [options] CONFIG_FILE', + ) + parser.add_option('--logfile', + help='filename for logging') + parser.add_option('--pidfile', + help="the process' PID will be written to this file") + parser.add_option('--uid', + help='switch to this UID before running') + parser.add_option('--gid', + help='switch to this GID before running') + parser.add_option('--umask', + help='set this (octal) umask before running') + parser.add_option('--daemon', action='store_true', + help='run as a background daemon') + + options, extra = parser.parse_args(args) + + if len(extra) != 1: + parser.error('CONFIG_FILE is required') + config_file = extra[0] + + if options.daemon and not options.logfile: + parser.error('LOGFILE is required when running as a daemon') + if options.daemon and not options.pidfile: + parser.error('PIDFILE is required when running as a daemon') + + # Process any provided options. + handle_options(options) + + c = ReloadableConfig(config_file) + bdec = BigDoEverythingClasss(c) + + # We manage the logfile ourselves (along with possible rotation). The + # daemon process can just drop stdout/stderr into /dev/null. + d = Daemon('/dev/null', os.path.abspath(options.pidfile), + options.umask, bdec) + if options.daemon: + # Daemonize the process and call sys.exit() with appropriate code + d.daemonize_exit() + else: + # Just run in the foreground (the default) + d.foreground() + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/tools/server-side/svnpubsub/testserver.py b/tools/server-side/svnpubsub/testserver.py new file mode 100755 index 0000000..8966a95 --- /dev/null +++ b/tools/server-side/svnpubsub/testserver.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# A simple test server for responding in different ways to SvnPubSub clients. +# This avoids the complexity of the Twisted framework in order to direct +# various (abnormal) conditions at the client. +# +# ### usage... +# + +import sys +import BaseHTTPServer + + +PORT = 2069 + +TEST_BODY = '{"svnpubsub": {"version": 1}}\n\0{"commit": {"type": "svn", "format": 1, "repository": "12345678-1234-1234-1234-123456789012", "id": "1234", "committer": "johndoe", "date": "2012-01-01 01:01:01 +0000 (Sun, 01 Jan 2012)", "log": "Frob the ganoozle with the snookish", "changed": {"one/path/alpha": {"flags": "U "}, "some/other/directory/": {"flags": "_U "}}}}\n\0' + +SEND_KEEPALIVE = True + + +class TestHandler(BaseHTTPServer.BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(200) + self.send_header('Content-Length', str(len(TEST_BODY))) + self.send_header('Connection', 'keep-alive') + self.end_headers() + self.wfile.write(TEST_BODY) + + +if __name__ == '__main__': + server = BaseHTTPServer.HTTPServer(('', PORT), TestHandler) + sys.stderr.write('Now listening on port %d...\n' % (PORT,)) + server.serve_forever() diff --git a/tools/server-side/svnpubsub/watcher.py b/tools/server-side/svnpubsub/watcher.py new file mode 100755 index 0000000..11bf066 --- /dev/null +++ b/tools/server-side/svnpubsub/watcher.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# Watch for events from SvnPubSub and print them to stdout +# +# + +import sys +import pprint +try: + import urlparse +except ImportError: + import urllib.parse as urlparse + +import svnpubsub.client + + +def _commit(url, commit): + print('COMMIT: from %s' % url) + pprint.pprint(vars(commit), indent=2) + +def _metadata(url, metadata): + print('METADATA: from %s' % url) + pprint.pprint(vars(metadata), indent=2) + +def _event(url, event_name, event_arg): + if event_arg: + print('EVENT: from %s "%s" "%s"' % (url, event_name, event_arg)) + else: + print('EVENT: from %s "%s"' % (url, event_name)) + + +def main(urls): + mc = svnpubsub.client.MultiClient(urls, _commit, _event, _metadata) + mc.run_forever() + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("usage: watcher.py URL [URL...]") + sys.exit(0) + main(sys.argv[1:]) diff --git a/tools/server-side/test_svn_server_log_parse.py b/tools/server-side/test_svn_server_log_parse.py new file mode 100755 index 0000000..efc642c --- /dev/null +++ b/tools/server-side/test_svn_server_log_parse.py @@ -0,0 +1,611 @@ +#!/usr/bin/python + +# ==================================================================== +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ==================================================================== + +# Run this without arguments to run unit tests. +# Run with a path to a davautocheck ops log to test that it can parse that. + +import os +import re +import sys +import tempfile +try: + # Python >=3.0 + from urllib.parse import quote as urllib_parse_quote +except ImportError: + # Python <3.0 + from urllib import quote as urllib_parse_quote +import unittest + +import svn.core + +import svn_server_log_parse + +class TestCase(unittest.TestCase): + def setUp(self): + # Define a class to stuff everything passed to any handle_ + # method into self.result. + class cls(svn_server_log_parse.Parser): + def __getattr__(cls_self, attr): + if attr.startswith('handle_'): + return lambda *a: setattr(self, 'result', a) + raise AttributeError + self.parse = cls().parse + + def test_unknown(self): + line = 'unknown log line' + self.parse(line) + self.assertEqual(self.result, (line,)) + + def test_open(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'open') + self.assertRaises(svn_server_log_parse.Error, self.parse, 'open 2 cap / SVN/1.60. fooclient') + self.assertRaises(svn_server_log_parse.Error, self.parse, 'open a cap=() / SVN/1.60. fooclient') + self.assertEqual(self.parse('open 2 cap=() / SVN fooclient'), '') + self.assertEqual(self.result, (2, [], '/', 'SVN', 'fooclient')) + # TODO: Teach it about the capabilities, rather than allowing + # any words at all. + self.assertEqual(self.parse('open 2 cap=(foo) / SVN foo%20client'), '') + self.assertEqual(self.result, (2, ['foo'], '/', 'SVN', 'foo client')) + + def test_reparent(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'reparent') + self.assertEqual(self.parse('reparent /'), '') + self.assertEqual(self.result, ('/',)) + + def test_get_latest_rev(self): + self.assertEqual(self.parse('get-latest-rev'), '') + self.assertEqual(self.result, ()) + self.assertEqual(self.parse('get-latest-rev r3'), 'r3') + self.assertEqual(self.result, ()) + + def test_get_dated_rev(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, + 'get-dated-rev') + self.assertEqual(self.parse('get-dated-rev 2008-04-15T20:41:24.000000Z'), '') + self.assertEqual(self.result, ('2008-04-15T20:41:24.000000Z',)) + + def test_commit(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'commit') + self.assertRaises(svn_server_log_parse.Error, self.parse, 'commit 3') + self.assertEqual(self.parse('commit r3'), '') + self.assertEqual(self.result, (3,)) + self.assertEqual(self.parse('commit r3 leftover'), ' leftover') + self.assertEqual(self.result, (3,)) + + def test_get_dir(self): + self.get_dir_or_file('get-dir') + + def test_get_file(self): + self.get_dir_or_file('get-file') + + def get_dir_or_file(self, c): + self.assertRaises(svn_server_log_parse.Error, self.parse, c) + self.assertRaises(svn_server_log_parse.Error, self.parse, c + ' foo') + self.assertRaises(svn_server_log_parse.Error, self.parse, c + ' foo 3') + self.assertEqual(self.parse(c + ' /a/b/c r3 ...'), ' ...') + self.assertEqual(self.result, ('/a/b/c', 3, False, False)) + self.assertEqual(self.parse(c + ' / r3'), '') + self.assertEqual(self.result, ('/', 3, False, False)) + # path must be absolute + self.assertRaises(svn_server_log_parse.Error, + self.parse, c + ' a/b/c r3') + self.assertEqual(self.parse(c + ' /k r27 text'), '') + self.assertEqual(self.result, ('/k', 27, True, False)) + self.assertEqual(self.parse(c + ' /k r27 props'), '') + self.assertEqual(self.result, ('/k', 27, False, True)) + self.assertEqual(self.parse(c + ' /k r27 text props'), '') + self.assertEqual(self.result, ('/k', 27, True, True)) + # out of order not accepted + self.assertEqual(self.parse(c + ' /k r27 props text'), ' text') + self.assertEqual(self.result, ('/k', 27, False, True)) + + def test_lock(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'lock') + self.parse('lock (/foo)') + self.assertEqual(self.result, (['/foo'], False)) + self.assertEqual(self.parse('lock (/foo) steal ...'), ' ...') + self.assertEqual(self.result, (['/foo'], True)) + self.assertEqual(self.parse('lock (/foo) stear'), ' stear') + + def test_change_rev_prop(self): + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'change-rev-prop r3') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'change-rev-prop r svn:log') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'change-rev-prop rX svn:log') + self.assertEqual(self.parse('change-rev-prop r3 svn:log ...'), ' ...') + self.assertEqual(self.result, (3, 'svn:log')) + + def test_rev_proplist(self): + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'rev-proplist') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'rev-proplist r') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'rev-proplist rX') + self.assertEqual(self.parse('rev-proplist r3 ...'), ' ...') + self.assertEqual(self.result, (3,)) + + def test_rev_prop(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'rev-prop') + self.assertRaises(svn_server_log_parse.Error, self.parse, 'rev-prop r') + self.assertRaises(svn_server_log_parse.Error, self.parse, 'rev-prop rX') + self.assertEqual(self.parse('rev-prop r3 foo ...'), ' ...') + self.assertEqual(self.result, (3, 'foo')) + + def test_unlock(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'unlock') + self.parse('unlock (/foo)') + self.assertEqual(self.result, (['/foo'], False)) + self.assertEqual(self.parse('unlock (/foo) break ...'), ' ...') + self.assertEqual(self.result, (['/foo'], True)) + self.assertEqual(self.parse('unlock (/foo) bear'), ' bear') + + def test_get_lock(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'get-lock') + self.parse('get-lock /foo') + self.assertEqual(self.result, ('/foo',)) + + def test_get_locks(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'get-locks') + self.parse('get-locks /foo') + self.assertEqual(self.result, ('/foo',)) + + def test_get_locations(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, + 'get-locations') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-locations /foo 3') + self.assertEqual(self.parse('get-locations /foo (3 4) ...'), ' ...') + self.assertEqual(self.result, ('/foo', [3, 4])) + self.assertEqual(self.parse('get-locations /foo (3)'), '') + self.assertEqual(self.result, ('/foo', [3])) + + def test_get_location_segments(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, + 'get-location-segments') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-location-segments /foo 3') + self.assertEqual(self.parse('get-location-segments /foo@2 r3:4'), '') + self.assertEqual(self.result, ('/foo', 2, 3, 4)) + + def test_get_file_revs(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'get-file-revs') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-file-revs /foo 3') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-file-revs /foo 3:a') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-file-revs /foo r3:a') + self.assertEqual(self.parse('get-file-revs /foo r3:4 ...'), ' ...') + self.assertEqual(self.result, ('/foo', 3, 4, False)) + self.assertEqual(self.parse('get-file-revs /foo r3:4' + ' include-merged-revisions ...'), ' ...') + self.assertEqual(self.result, ('/foo', 3, 4, True)) + + def test_get_mergeinfo(self): + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-mergeinfo') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-mergeinfo /foo') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-mergeinfo (/foo') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-mergeinfo (/foo /bar') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-mergeinfo (/foo)') + self.assertRaises(svn_server_log_parse.BadMergeinfoInheritanceError, + self.parse, 'get-mergeinfo (/foo) bork') + self.assertEqual(self.parse('get-mergeinfo (/foo) explicit'), '') + self.assertEqual(self.result, (['/foo'], + svn.core.svn_mergeinfo_explicit, False)) + self.assertEqual(self.parse('get-mergeinfo (/foo /bar) inherited ...'), + ' ...') + self.assertEqual(self.result, (['/foo', '/bar'], + svn.core.svn_mergeinfo_inherited, False)) + self.assertEqual(self.result, (['/foo', '/bar'], + svn.core.svn_mergeinfo_inherited, False)) + + def test_log(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'log') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'log /foo') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'log (/foo)') + self.assertEqual(self.parse('log (/foo) r3:4' + ' include-merged-revisions'), '') + self.assertEqual(self.result, + (['/foo'], 3, 4, 0, False, False, True, [])) + self.assertEqual(self.parse('log (/foo /bar) r3:4 revprops=all ...'), + ' ...') + self.assertEqual(self.result, + (['/foo', '/bar'], 3, 4, 0, False, False, False, None)) + self.assertEqual(self.parse('log (/foo) r3:4 revprops=(a b) ...'), + ' ...') + self.assertEqual(self.result, + (['/foo'], 3, 4, 0, False, False, False, ['a', 'b'])) + self.assertEqual(self.parse('log (/foo) r8:1 limit=3'), '') + self.assertEqual(self.result, + (['/foo'], 8, 1, 3, False, False, False, [])) + + def test_check_path(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'check-path') + self.assertEqual(self.parse('check-path /foo@9'), '') + self.assertEqual(self.result, ('/foo', 9)) + + def test_stat(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'stat') + self.assertEqual(self.parse('stat /foo@9'), '') + self.assertEqual(self.result, ('/foo', 9)) + + def test_replay(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'replay') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'replay /foo') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'replay (/foo) r9') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'replay (/foo) r9:10') + self.assertEqual(self.parse('replay /foo r9'), '') + self.assertEqual(self.result, ('/foo', 9)) + + def test_checkout_or_export(self): + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'checkout-or-export') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'checkout-or-export /foo') + self.assertEqual(self.parse('checkout-or-export /foo r9'), '') + self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_unknown)) + self.assertRaises(svn_server_log_parse.BadDepthError, self.parse, + 'checkout-or-export /foo r9 depth=INVALID-DEPTH') + self.assertRaises(svn_server_log_parse.BadDepthError, self.parse, + 'checkout-or-export /foo r9 depth=bork') + self.assertEqual(self.parse('checkout-or-export /foo r9 depth=files .'), + ' .') + self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_files)) + + def test_diff_1path(self): + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'diff') + self.assertEqual(self.parse('diff /foo r9:10'), '') + self.assertEqual(self.result, ('/foo', 9, 10, + svn.core.svn_depth_unknown, False)) + self.assertEqual(self.parse('diff /foo r9:10' + ' ignore-ancestry ...'), ' ...') + self.assertEqual(self.result, ('/foo', 9, 10, + svn.core.svn_depth_unknown, True)) + self.assertEqual(self.parse('diff /foo r9:10 depth=files'), '') + self.assertEqual(self.result, ('/foo', 9, 10, + svn.core.svn_depth_files, False)) + + def test_diff_2paths(self): + self.assertEqual(self.parse('diff /foo@9 /bar@10'), '') + self.assertEqual(self.result, ('/foo', 9, '/bar', 10, + svn.core.svn_depth_unknown, False)) + self.assertEqual(self.parse('diff /foo@9 /bar@10' + ' ignore-ancestry ...'), ' ...') + self.assertEqual(self.result, ('/foo', 9, '/bar', 10, + svn.core.svn_depth_unknown, True)) + self.assertEqual(self.parse('diff /foo@9 /bar@10' + ' depth=files ignore-ancestry'), '') + self.assertEqual(self.result, ('/foo', 9, '/bar', 10, + svn.core.svn_depth_files, True)) + + def test_status(self): + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'status') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'status /foo') + self.assertEqual(self.parse('status /foo r9'), '') + self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_unknown)) + self.assertRaises(svn_server_log_parse.BadDepthError, self.parse, + 'status /foo r9 depth=INVALID-DEPTH') + self.assertRaises(svn_server_log_parse.BadDepthError, self.parse, + 'status /foo r9 depth=bork') + self.assertEqual(self.parse('status /foo r9 depth=files .'), + ' .') + self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_files)) + + def test_switch(self): + self.assertEqual(self.parse('switch /foo /bar@10 ...'), ' ...') + self.assertEqual(self.result, ('/foo', '/bar', 10, + svn.core.svn_depth_unknown)) + self.assertEqual(self.parse('switch /foo /bar@10' + ' depth=files'), '') + self.assertEqual(self.result, ('/foo', '/bar', 10, + svn.core.svn_depth_files)) + + def test_update(self): + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'update') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'update /foo') + self.assertEqual(self.parse('update /foo r9'), '') + self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_unknown, + False)) + self.assertRaises(svn_server_log_parse.BadDepthError, self.parse, + 'update /foo r9 depth=INVALID-DEPTH') + self.assertRaises(svn_server_log_parse.BadDepthError, self.parse, + 'update /foo r9 depth=bork') + self.assertEqual(self.parse('update /foo r9 depth=files .'), ' .') + self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_files, + False)) + self.assertEqual(self.parse('update /foo r9 send-copyfrom-args .'), + ' .') + self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_unknown, + True)) + +if __name__ == '__main__': + if len(sys.argv) == 1: + # No arguments so run the unit tests. + unittest.main() + sys.stderr.write('unittest.main failed to exit\n') + sys.exit(2) + + # Use the argument as the path to a log file to test against. + + def uri_encode(s): + # urllib.parse.quote encodes :&@ characters, svn does not. + return urllib_parse_quote(s, safe='/:&@') + + # Define a class to reconstruct the SVN-ACTION string. + class Test(svn_server_log_parse.Parser): + def handle_unknown(self, line): + sys.stderr.write('unknown log line at %d:\n%s\n' % (self.linenum, + line)) + sys.exit(2) + + def handle_open(self, protocol, capabilities, path, ra_client, client): + capabilities = ' '.join(capabilities) + if ra_client is None: + ra_client = '-' + if client is None: + client = '-' + path = uri_encode(path) + self.action = ('open %d cap=(%s) %s %s %s' + % (protocol, capabilities, path, ra_client, client)) + + def handle_reparent(self, path): + path = uri_encode(path) + self.action = 'reparent ' + path + + def handle_get_latest_rev(self): + self.action = 'get-latest-rev' + + def handle_get_dated_rev(self, date): + self.action = 'get-dated-rev ' + date + + def handle_commit(self, revision): + self.action = 'commit r%d' % (revision,) + + def handle_get_dir(self, path, revision, text, props): + path = uri_encode(path) + self.action = 'get-dir %s r%d' % (path, revision) + if text: + self.action += ' text' + if props: + self.action += ' props' + + def handle_get_file(self, path, revision, text, props): + path = uri_encode(path) + self.action = 'get-file %s r%d' % (path, revision) + if text: + self.action += ' text' + if props: + self.action += ' props' + + def handle_lock(self, paths, steal): + paths = [uri_encode(x) for x in paths] + self.action = 'lock (%s)' % (' '.join(paths),) + if steal: + self.action += ' steal' + + def handle_change_rev_prop(self, revision, revprop): + revprop = uri_encode(revprop) + self.action = 'change-rev-prop r%d %s' % (revision, revprop) + + def handle_rev_prop(self, revision, revprop): + revprop = uri_encode(revprop) + self.action = 'rev-prop r%d %s' % (revision, revprop) + + def handle_rev_proplist(self, revision): + self.action = 'rev-proplist r%d' % (revision,) + + def handle_unlock(self, paths, break_lock): + paths = [uri_encode(x) for x in paths] + self.action = 'unlock (%s)' % (' '.join(paths),) + if break_lock: + self.action += ' break' + + def handle_get_lock(self, path): + path = uri_encode(path) + self.action = 'get-lock ' + path + + def handle_get_locks(self, path): + self.action = 'get-locks ' + path + path = uri_encode(path) + + def handle_get_locations(self, path, revisions): + path = uri_encode(path) + self.action = ('get-locations %s (%s)' + % (path, ' '.join([str(x) for x in revisions]))) + + def handle_get_location_segments(self, path, peg, left, right): + path = uri_encode(path) + self.action = 'get-location-segments %s@%d r%d:%d' % (path, peg, + left, right) + + def handle_get_file_revs(self, path, left, right, + include_merged_revisions): + path = uri_encode(path) + self.action = 'get-file-revs %s r%d:%d' % (path, left, right) + if include_merged_revisions: + self.action += ' include-merged-revisions' + + def handle_get_mergeinfo(self, paths, inheritance, include_descendants): + paths = [uri_encode(x) for x in paths] + self.action = ('get-mergeinfo (%s) %s' + % (' '.join(paths), + svn.core.svn_inheritance_to_word(inheritance))) + if include_descendants: + self.action += ' include-descendants' + + def handle_log(self, paths, left, right, limit, discover_changed_paths, + strict, include_merged_revisions, revprops): + paths = [uri_encode(x) for x in paths] + self.action = 'log (%s) r%d:%d' % (' '.join(paths), + left, right) + if limit != 0: + self.action += ' limit=%d' % (limit,) + if discover_changed_paths: + self.action += ' discover-changed-paths' + if strict: + self.action += ' strict' + if include_merged_revisions: + self.action += ' include-merged-revisions' + if revprops is None: + self.action += ' revprops=all' + elif len(revprops) > 0: + revprops = [uri_encode(x) for x in revprops] + self.action += ' revprops=(%s)' % (' '.join(revprops),) + + def handle_check_path(self, path, revision): + path = uri_encode(path) + self.action = 'check-path %s@%d' % (path, revision) + + def handle_stat(self, path, revision): + path = uri_encode(path) + self.action = 'stat %s@%d' % (path, revision) + + def handle_replay(self, path, revision): + path = uri_encode(path) + self.action = 'replay %s r%d' % (path, revision) + + def maybe_depth(self, depth): + if depth != svn.core.svn_depth_unknown: + self.action += ' depth=%s' % ( + svn.core.svn_depth_to_word(depth),) + + def handle_checkout_or_export(self, path, revision, depth): + path = uri_encode(path) + self.action = 'checkout-or-export %s r%d' % (path, revision) + self.maybe_depth(depth) + + def handle_diff_1path(self, path, left, right, + depth, ignore_ancestry): + path = uri_encode(path) + self.action = 'diff %s r%d:%d' % (path, left, right) + self.maybe_depth(depth) + if ignore_ancestry: + self.action += ' ignore-ancestry' + + def handle_diff_2paths(self, from_path, from_rev, + to_path, to_rev, + depth, ignore_ancestry): + from_path = uri_encode(from_path) + to_path = uri_encode(to_path) + self.action = ('diff %s@%d %s@%d' + % (from_path, from_rev, to_path, to_rev)) + self.maybe_depth(depth) + if ignore_ancestry: + self.action += ' ignore-ancestry' + + def handle_status(self, path, revision, depth): + path = uri_encode(path) + self.action = 'status %s r%d' % (path, revision) + self.maybe_depth(depth) + + def handle_switch(self, from_path, to_path, to_rev, depth): + from_path = uri_encode(from_path) + to_path = uri_encode(to_path) + self.action = ('switch %s %s@%d' + % (from_path, to_path, to_rev)) + self.maybe_depth(depth) + + def handle_update(self, path, revision, depth, send_copyfrom_args): + path = uri_encode(path) + self.action = 'update %s r%d' % (path, revision) + self.maybe_depth(depth) + if send_copyfrom_args: + self.action += ' send-copyfrom-args' + + tmp = tempfile.mktemp() + try: + fp = open(tmp, 'w') + parser = Test() + parser.linenum = 0 + log_file = sys.argv[1] + log_type = None + for line in open(log_file): + if log_type is None: + # Figure out which log type we have. + if re.match(r'\d+ \d\d\d\d-', line): + log_type = 'svnserve' + elif re.match(r'\[\d\d/', line): + log_type = 'mod_dav_svn' + else: + sys.stderr.write("unknown log format in '%s'" + % (log_file,)) + sys.exit(3) + sys.stderr.write('parsing %s log...\n' % (log_type,)) + sys.stderr.flush() + + words = line.split() + if log_type == 'svnserve': + # Skip over PID, date, client address, username, and repos. + if words[5].startswith('ERR'): + # Skip error lines. + fp.write(line) + continue + leading = ' '.join(words[:5]) + action = ' '.join(words[5:]) + else: + # Find the SVN-ACTION string from the CustomLog format + # davautocheck.sh uses. If that changes, this will need + # to as well. Currently it's + # %t %u %{SVN-REPOS-NAME}e %{SVN-ACTION}e + leading = ' '.join(words[:4]) + action = ' '.join(words[4:]) + + # Parse the action and write the reconstructed action to + # the temporary file. Ignore the returned trailing text, + # as we have none in the davautocheck ops log. + parser.linenum += 1 + try: + parser.parse(action) + except svn_server_log_parse.Error: + sys.stderr.write('error at line %d: %s\n' + % (parser.linenum, action)) + raise + fp.write(leading + ' ' + parser.action + '\n') + fp.close() + # Check differences between original and reconstructed files + # (should be identical). + result = os.spawnlp(os.P_WAIT, 'diff', 'diff', '-u', log_file, tmp) + if result == 0: + sys.stderr.write('OK\n') + sys.exit(result) + finally: + try: + os.unlink(tmp) + except Exception as e: + sys.stderr.write('os.unlink(tmp): %s\n' % (e,)) |