summaryrefslogtreecommitdiff
path: root/tools/backup/hot-backup.py.in
blob: 76bc91de871574d1552b1749a1fc6970120cd921 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#  hot-backup.py: perform a "hot" backup of a Subversion repository
#                 and clean any old Berkeley DB logfiles after the
#                 backup completes, if the repository backend is
#                 Berkeley DB.
#
#  Subversion is a tool for revision control. 
#  See http://subversion.apache.org for more information.
#    
# ====================================================================
#    Licensed to the Apache Software Foundation (ASF) under one
#    or more contributor license agreements.  See the NOTICE file
#    distributed with this work for additional information
#    regarding copyright ownership.  The ASF licenses this file
#    to you under the Apache License, Version 2.0 (the
#    "License"); you may not use this file except in compliance
#    with the License.  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing,
#    software distributed under the License is distributed on an
#    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
#    KIND, either express or implied.  See the License for the
#    specific language governing permissions and limitations
#    under the License.
# ====================================================================

# $HeadURL: https://svn.apache.org/repos/asf/subversion/branches/1.10.x/tools/backup/hot-backup.py.in $
# $LastChangedDate: 2015-12-21 15:02:31 +0000 (Mon, 21 Dec 2015) $
# $LastChangedBy: danielsh $
# $LastChangedRevision: 1721179 $

######################################################################

import sys, os, getopt, stat, re, time, shutil, subprocess

######################################################################
# Global Settings

# Path to svnlook utility
svnlook = r"@SVN_BINDIR@/svnlook"

# Path to svnadmin utility
svnadmin = r"@SVN_BINDIR@/svnadmin"

# Default number of backups to keep around (0 for "keep them all")
num_backups = int(os.environ.get("SVN_HOTBACKUP_BACKUPS_NUMBER", 64))

# Archive types/extensions
archive_map = {
  'gz'  : ".tar.gz",
  'bz2' : ".tar.bz2",
  'zip' : ".zip",
  'zip64' : ".zip"
  }

# Chmod recursively on a whole subtree
def chmod_tree(path, mode, mask):
  for dirpath, dirs, files in os.walk(path):
    for name in dirs + files:
      fullname = os.path.join(dirpath, name)
      if not os.path.islink(fullname):
        new_mode = (os.stat(fullname)[stat.ST_MODE] & ~mask) | mode
        os.chmod(fullname, new_mode)

# For clearing away read-only directories
def safe_rmtree(dirname, retry=0):
  "Remove the tree at DIRNAME, making it writable first"
  def rmtree(dirname):
    chmod_tree(dirname, 0o666, 0o666)
    shutil.rmtree(dirname)

  if not os.path.exists(dirname):
    return

  if retry:
    for delay in (0.5, 1, 2, 4):
      try:
        rmtree(dirname)
        break
      except:
        time.sleep(delay)
    else:
      rmtree(dirname)
  else:
    rmtree(dirname)

######################################################################
# Command line arguments

def usage(out = sys.stdout):
  scriptname = os.path.basename(sys.argv[0])
  out.write(
"""USAGE: %s [OPTIONS] REPOS_PATH BACKUP_PATH

Create a backup of the repository at REPOS_PATH in a subdirectory of
the BACKUP_PATH location, named after the youngest revision.

Options:
  --archive-type=FMT Create an archive of the backup. FMT can be one of:
                       bz2  : Creates a bzip2 compressed tar file.
                       gz   : Creates a gzip compressed tar file.
                       zip  : Creates a compressed zip file.
                       zip64: Creates a zip64 file (can be > 2GB).
  --num-backups=N    Number of prior backups to keep around (0 to keep all).
  --verify           Verify the backup.
  --help      -h     Print this help message and exit.

""" % (scriptname,))


try:
  opts, args = getopt.gnu_getopt(sys.argv[1:], "h?", ["archive-type=",
                                                      "num-backups=",
                                                      "verify",
                                                      "help"])
except getopt.GetoptError as e:
  sys.stderr.write("ERROR: %s\n\n" % e)
  sys.stderr.flush()
  usage(sys.stderr)
  sys.exit(2)

archive_type = None
verify_copy = False

for o, a in opts:
  if o == "--archive-type":
    archive_type = a
  elif o == "--num-backups":
    num_backups = int(a)
  elif o == "--verify":
    verify_copy = True
  elif o in ("-h", "--help", "-?"):
    usage()
    sys.exit()

if archive_type not in (None, 'bz2', 'gz', 'zip', 'zip64'):
  sys.stderr.write("ERROR: Bad --archive-type\n")
  usage(sys.stderr)
  sys.exit(2)

if len(args) != 2:
  sys.stderr.write("ERROR: only two arguments allowed.\n\n")
  sys.stderr.flush()
  usage(sys.stderr)
  sys.exit(2)

# Path to repository
repo_dir = args[0]
repo = os.path.basename(os.path.abspath(repo_dir))

# Where to store the repository backup.  The backup will be placed in
# a *subdirectory* of this location, named after the youngest
# revision.
backup_dir = args[1]

# Added to the filename regexp, set when using --archive-type.
ext_re = ""

# Do we want to create an archive of the backup
if archive_type:
  if archive_type in archive_map:
    # Additionally find files with the archive extension.
    ext_re = "(" + re.escape(archive_map[archive_type]) + ")?"
  else:
    sys.stderr.write("Unknown archive type '%s'.\n\n\n" % archive_type)
    sys.stderr.flush()
    usage(sys.stderr)
    sys.exit(2)


######################################################################
# Helper functions

def comparator(a, b):
  # We pass in filenames so there is never a case where they are equal.
  regexp = re.compile("-(?P<revision>[0-9]+)(-(?P<increment>[0-9]+))?" +
                      ext_re + "$")
  matcha = regexp.search(a)
  matchb = regexp.search(b)
  reva = int(matcha.groupdict()['revision'])
  revb = int(matchb.groupdict()['revision'])
  if (reva < revb):
    return -1
  elif (reva > revb):
    return 1
  else:
    inca = matcha.groupdict()['increment']
    incb = matchb.groupdict()['increment']
    if not inca:
      return -1
    elif not incb:
      return 1;
    elif (int(inca) < int(incb)):
      return -1
    else:
      return 1

def get_youngest_revision():
  """Examine the repository REPO_DIR using the svnlook binary
  specified by SVNLOOK, and return the youngest revision."""

  p = subprocess.Popen([svnlook, 'youngest', '--', repo_dir],
                       stdin=subprocess.PIPE,
                       stdout=subprocess.PIPE,
                       stderr=subprocess.PIPE)
  infile, outfile, errfile = p.stdin, p.stdout, p.stderr

  stdout_lines = outfile.readlines()
  stderr_lines = errfile.readlines()
  outfile.close()
  infile.close()
  errfile.close()

  if stderr_lines:
    raise Exception("Unable to find the youngest revision for repository '%s'"
                    ": %s" % (repo_dir, stderr_lines[0].rstrip()))

  return stdout_lines[0].strip()

######################################################################
# Main

print("Beginning hot backup of '"+ repo_dir + "'.")


### Step 1: get the youngest revision.

try:
  youngest = get_youngest_revision()
except Exception as e:
  sys.stderr.write("%s\n" % e)
  sys.stderr.flush()
  sys.exit(1)

print("Youngest revision is %s" % youngest)


### Step 2: Find next available backup path

backup_subdir = os.path.join(backup_dir, repo + "-" + youngest)

# If there is already a backup of this revision, then append the
# next highest increment to the path. We still need to do a backup
# because the repository might have changed despite no new revision
# having been created. We find the highest increment and add one
# rather than start from 1 and increment because the starting
# increments may have already been removed due to num_backups.

regexp = re.compile("^" + re.escape(repo) + "-" + re.escape(youngest) +
                    "(-(?P<increment>[0-9]+))?" + ext_re + "$")
directory_list = os.listdir(backup_dir)
young_list = [x for x in directory_list if regexp.search(x)]
if young_list:
  young_list.sort(comparator)
  increment = regexp.search(young_list.pop()).groupdict()['increment']
  if increment:
    backup_subdir = os.path.join(backup_dir, repo + "-" + youngest + "-"
                                 + str(int(increment) + 1))
  else:
    backup_subdir = os.path.join(backup_dir, repo + "-" + youngest + "-1")

### Step 3: Ask subversion to make a hot copy of a repository.
###         copied last.

print("Backing up repository to '" + backup_subdir + "'...")
err_code = subprocess.call([svnadmin, "hotcopy", "--clean-logs",
                            '--', repo_dir, backup_subdir])
if err_code != 0:
  sys.stderr.write("Unable to backup the repository.\n")
  sys.stderr.flush()
  sys.exit(err_code)
else:
  print("Done.")

### Step 4: Verify the hotcopy
if verify_copy:
  print("Verifying backup...")
  err_code = subprocess.call([svnadmin, "verify", "--quiet", '--', backup_subdir])
  if err_code != 0:
    sys.stderr.write("Backup verification failed.\n")
    sys.stderr.flush()
    sys.exit(err_code)
  else:
    print("Done.")

### Step 5: Make an archive of the backup if required.
if archive_type:
  archive_path = backup_subdir + archive_map[archive_type]
  err_msg = ""

  print("Archiving backup to '" + archive_path + "'...")
  if archive_type == 'gz' or archive_type == 'bz2':
    try:
      import tarfile
      tar = tarfile.open(archive_path, 'w:' + archive_type)
      tar.add(backup_subdir, os.path.basename(backup_subdir))
      tar.close()
    except ImportError as e:
      err_msg = "Import failed: " + str(e)
      err_code = -2
    except tarfile.TarError as e:
      err_msg = "Tar failed: " + str(e)
      err_code = -3

  elif archive_type == 'zip' or archive_type == 'zip64':
    try:
      import zipfile
      
      def add_to_zip(zp, root, dirname, names):
        root = os.path.join(root, '')
        
        for file in names:
          path = os.path.join(dirname, file)
          if os.path.isfile(path):
            zp.write(path, path[len(root):])
          elif os.path.isdir(path) and os.path.islink(path):
            for dirpath, dirs, files in os.walk(path):
              add_to_zip(zp, path, dirpath, dirs + files)
            
      zp = zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED, archive_type == 'zip64')
      for dirpath, dirs, files in os.walk(backup_subdir):
        add_to_zip(zp, backup_dir, dirpath, dirs + files)
      zp.close()
    except ImportError as e:
      err_msg = "Import failed: " + str(e)
      err_code = -4
    except zipfile.error as e:
      err_msg = "Zip failed: " + str(e)
      err_code = -5


  if err_code != 0:
    sys.stderr.write("Unable to create an archive for the backup.\n%s\n" % err_msg)
    sys.stderr.flush()
    sys.exit(err_code)
  else:
    print("Archive created, removing backup '" + backup_subdir + "'...")
    safe_rmtree(backup_subdir, 1)

### Step 6: finally, remove all repository backups other than the last
###         NUM_BACKUPS.

if num_backups > 0:
  regexp = re.compile("^" + re.escape(repo) + "-[0-9]+(-[0-9]+)?" + ext_re + "$")
  directory_list = os.listdir(backup_dir)
  old_list = [x for x in directory_list if regexp.search(x)]
  old_list.sort(comparator)
  del old_list[max(0,len(old_list)-num_backups):]
  for item in old_list:
    old_backup_item = os.path.join(backup_dir, item)
    print("Removing old backup: " + old_backup_item)
    if os.path.isdir(old_backup_item):
      safe_rmtree(old_backup_item, 1)
    else:
      os.remove(old_backup_item)