summaryrefslogtreecommitdiff
path: root/contrib/expire_backups.py
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/expire_backups.py')
-rwxr-xr-xcontrib/expire_backups.py286
1 files changed, 286 insertions, 0 deletions
diff --git a/contrib/expire_backups.py b/contrib/expire_backups.py
new file mode 100755
index 0000000..971e21b
--- /dev/null
+++ b/contrib/expire_backups.py
@@ -0,0 +1,286 @@
+#!/usr/bin/env python
+'''
+expire_backups.py - this file is part of S3QL (http://s3ql.googlecode.com)
+
+Copyright (C) Nikolaus Rath <Nikolaus@rath.org>
+
+This program can be distributed under the terms of the GNU LGPL.
+'''
+
+from __future__ import division, print_function, absolute_import
+
+
+import sys
+import os
+import logging
+import re
+import textwrap
+import shutil
+import cPickle as pickle
+from datetime import datetime, timedelta
+from collections import defaultdict
+
+# We are running from the S3QL source directory, make sure
+# that we use modules from this directory
+basedir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..'))
+if (os.path.exists(os.path.join(basedir, 'setup.py')) and
+ os.path.exists(os.path.join(basedir, 'src', 's3ql', '__init__.py'))):
+ sys.path = [os.path.join(basedir, 'src')] + sys.path
+
+from s3ql.common import setup_logging, QuietError
+from s3ql.parse_args import ArgumentParser
+from s3ql.cli.remove import main as s3qlrm
+
+log = logging.getLogger('expire_backups')
+
+
+def parse_args(args):
+ '''Parse command line'''
+
+ parser = ArgumentParser(
+ description=textwrap.dedent('''\
+ ``expire_backups.py`` is a program to intelligently remove old backups
+ that are no longer needed.
+
+ To define what backups you want to keep for how long, you define a
+ number of *age ranges*. ``expire_backups`` ensures that you will
+ have at least one backup in each age range at all times. It will keep
+ exactly as many backups as are required for that and delete any
+ backups that become redundant.
+
+ Age ranges are specified by giving a list of range boundaries in terms
+ of backup cycles. Every time you create a new backup, the existing
+ backups age by one cycle.
+
+ Please refer to the S3QL documentation for details.
+ '''))
+
+ parser.add_quiet()
+ parser.add_debug()
+ parser.add_version()
+
+ parser.add_argument('cycles', nargs='+', type=int, metavar='<age>',
+ help='Age range boundaries in terms of backup cycles')
+ parser.add_argument('--state', metavar='<file>', type=str,
+ default='.expire_backups.dat',
+ # Add quotes around default to prevent groff
+ # from choking on leading . generated by buggy
+ # docutils man page generator.
+ help='File to save state information in (default: "%(default)s")')
+ parser.add_argument("-n", action="store_true", default=False,
+ help="Dry run. Just show which backups would be deleted.")
+ parser.add_argument('--reconstruct-state', action='store_true', default=False,
+ help='Try to reconstruct a missing state file from backup dates.')
+
+ parser.add_argument("--use-s3qlrm", action="store_true",
+ help="Use `s3qlrm` command to delete backups.")
+
+ options = parser.parse_args(args)
+
+ if sorted(options.cycles) != options.cycles:
+ parser.error('Age range boundaries must be in increasing order')
+
+ return options
+
+def main(args=None):
+
+ if args is None:
+ args = sys.argv[1:]
+
+ options = parse_args(args)
+ setup_logging(options)
+
+ # Determine available backups
+ backup_list = set(x for x in os.listdir('.')
+ if re.match(r'^\d{4}-\d\d-\d\d_\d\d:\d\d:\d\d$', x))
+
+ if not os.path.exists(options.state) and len(backup_list) > 1:
+ if not options.reconstruct_state:
+ raise QuietError('Found more than one backup but no state file! Aborting.')
+ else:
+ log.warn('Trying to reconstruct state file..')
+ state = upgrade_to_state(backup_list)
+ elif not os.path.exists(options.state):
+ log.warn('Creating state file..')
+ state = dict()
+ else:
+ log.info('Reading state...')
+ state = pickle.load(open(options.state, 'rb'))
+
+ to_delete = process_backups(backup_list, state, options.cycles)
+
+ for x in to_delete:
+ log.info('Backup %s is no longer needed, removing...', x)
+ if not options.n:
+ if options.use_s3qlrm:
+ s3qlrm([x])
+ else:
+ shutil.rmtree(x)
+
+ if options.n:
+ log.info('Dry run, not saving state.')
+ else:
+ log.info('Saving state..')
+ pickle.dump(state, open(options.state, 'wb'), pickle.HIGHEST_PROTOCOL)
+
+def upgrade_to_state(backup_list):
+ log.info('Several existing backups detected, trying to convert absolute ages to cycles')
+
+ now = datetime.now()
+ age = dict()
+ for x in sorted(backup_list):
+ age[x] = now - datetime.strptime(x, '%Y-%m-%d_%H:%M:%S')
+ log.info('Backup %s is %s hours old', x, age[x])
+
+ deltas = [ abs(x - y) for x in age.itervalues()
+ for y in age.itervalues() if x != y ]
+ step = min(deltas)
+ log.info('Assuming backup interval of %s hours', step)
+
+ state = dict()
+ for x in sorted(age):
+ state[x] = 0
+ while age[x] > timedelta(0):
+ state[x] += 1
+ age[x] -= step
+ log.info('Backup %s is %d cycles old', x, state[x])
+
+ log.info('State construction complete.')
+ return state
+
+def simulate(args):
+
+ options = parse_args(args)
+ setup_logging(options)
+
+ state = dict()
+ backup_list = set()
+ for i in xrange(50):
+ backup_list.add('backup-%2d' % i)
+ delete = process_backups(backup_list, state, options.cycles)
+ log.info('Deleting %s', delete)
+ backup_list -= delete
+
+ log.info('Available backups on day %d:', i)
+ for x in sorted(backup_list):
+ log.info(x)
+
+def process_backups(backup_list, state, cycles):
+
+ # New backups
+ new_backups = backup_list - set(state)
+ for x in sorted(new_backups):
+ log.info('Found new backup %s', x)
+ for y in state:
+ state[y] += 1
+ state[x] = 0
+
+ for x in state:
+ log.debug('Backup %s has age %d', x, state[x])
+
+ # Missing backups
+ missing_backups = set(state) - backup_list
+ for x in missing_backups:
+ log.warn('Warning: backup %s is missing. Did you delete it manually?', x)
+ del state[x]
+
+ # Ranges
+ ranges = [ (0, cycles[0]) ]
+ for i in range(1, len(cycles)):
+ ranges.append((cycles[i-1], cycles[i]))
+
+ # Go forward in time to see what backups need to be kept
+ simstate = dict()
+ keep = set()
+ missing = defaultdict(list)
+ for step in xrange(max(cycles)):
+
+ log.debug('Considering situation after %d more backups', step)
+ for x in simstate:
+ simstate[x] += 1
+ log.debug('Backup x now has simulated age %d', simstate[x])
+
+ # Add the hypothetical backup that has been made "just now"
+ if step != 0:
+ simstate[step] = 0
+
+ for (min_, max_) in ranges:
+ log.debug('Looking for backup for age range %d to %d', min_, max_)
+
+ # Look in simstate
+ found = False
+ for (backup, age) in simstate.iteritems():
+ if min_ <= age < max_:
+ found = True
+ break
+ if found:
+ # backup and age will be defined
+ #pylint: disable=W0631
+ log.debug('Using backup %s (age %d)', backup, age)
+ continue
+
+ # Look in state
+ for (backup, age) in state.iteritems():
+ age += step
+ if min_ <= age < max_:
+ log.info('Keeping backup %s (current age %d) for age range %d to %d%s',
+ backup, state[backup], min_, max_,
+ (' in %d cycles' % step) if step else '')
+ simstate[backup] = age
+ keep.add(backup)
+ break
+
+ else:
+ if step == 0:
+ log.info('Note: there is currently no backup available '
+ 'for age range %d to %d', min_, max_)
+ else:
+ missing['%d to %d' % (min_, max_)].append(step)
+
+ for range_ in sorted(missing):
+ log.info('Note: there will be no backup for age range %s '
+ 'in (forthcoming) cycle(s): %s',
+ range_, format_list(missing[range_]))
+
+ to_delete = set(state) - keep
+ for x in to_delete:
+ del state[x]
+
+ return to_delete
+
+
+def format_list(l):
+ if not l:
+ return ''
+ l = l[:]
+
+ # Append bogus end element
+ l.append(l[-1] + 2)
+
+ range_start = l.pop(0)
+ cur = range_start
+ res = list()
+ for n in l:
+ if n == cur+1:
+ pass
+ elif range_start == cur:
+ res.append('%d' % cur)
+ elif range_start == cur - 1:
+ res.append('%d' % range_start)
+ res.append('%d' % cur)
+ else:
+ res.append('%d-%d' % (range_start, cur))
+
+ if n != cur+1:
+ range_start = n
+ cur = n
+
+ if len(res) > 1:
+ return ('%s and %s' % (', '.join(res[:-1]), res[-1]))
+ else:
+ return ', '.join(res)
+
+
+if __name__ == '__main__':
+ #simulate(sys.argv[1:])
+ main(sys.argv[1:]) \ No newline at end of file