1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
#!/usr/bin/env python
'''
scramble_db.py - this file is part of S3QL (http://s3ql.googlecode.com)
Copyright (C) Nikolaus Rath <Nikolaus@rath.org>
This program can be distributed under the terms of the GNU GPLv3.
'''
# Data can be restored with:
#from s3ql.metadata import restore_metadata
#from s3ql.database import Connection
#restore_metadata(open('s3ql_metadata.dat', 'rb+'), 'data.sqlite')
from __future__ import division, print_function, absolute_import
import cPickle as pickle
import logging
import os
import shutil
import sys
import tempfile
import hashlib
# We are running from the S3QL source directory, make sure
# that we use modules from this directory
basedir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..'))
if (os.path.exists(os.path.join(basedir, 'setup.py')) and
os.path.exists(os.path.join(basedir, 'src', 's3ql', '__init__.py'))):
sys.path = [os.path.join(basedir, 'src')] + sys.path
from s3ql import CURRENT_FS_REV
from s3ql.common import setup_logging, get_backend_cachedir, QuietError
from s3ql.database import Connection
from s3ql.metadata import dump_metadata
from s3ql.parse_args import ArgumentParser
log = logging.getLogger("scramble_db")
DBNAME = 's3ql_metadata.dat'
def parse_args(args):
parser = ArgumentParser(
description="Create metadata copy where all file- and directory names, "
"and extended attribute names and values have been scrambled. "
"This is intended to preserve privacy when a metadata copy "
"needs to be provided to the developers for debugging.")
parser.add_debug_modules()
parser.add_quiet()
parser.add_version()
parser.add_cachedir()
parser.add_storage_url()
options = parser.parse_args(args)
return options
def main(args=None):
if args is None:
args = sys.argv[1:]
options = parse_args(args)
setup_logging(options)
# Check for cached metadata
cachepath = get_backend_cachedir(options.storage_url, options.cachedir)
if not os.path.exists(cachepath + '.params'):
raise QuietError("No local metadata found.")
param = pickle.load(open(cachepath + '.params', 'rb'))
# Check revision
if param['revision'] < CURRENT_FS_REV:
raise QuietError('File system revision too old.')
elif param['revision'] > CURRENT_FS_REV:
raise QuietError('File system revision too new.')
if os.path.exists(DBNAME):
raise QuietError('%s exists, aborting.' % DBNAME)
log.info('Copying database...')
dst = tempfile.NamedTemporaryFile()
with open(cachepath + '.db', 'rb') as src:
shutil.copyfileobj(src, dst)
dst.flush()
db = Connection(dst.name)
log.info('Scrambling...')
md5 = lambda x: hashlib.md5(x).hexdigest()
for (id_, name) in db.query('SELECT id, name FROM names'):
db.execute('UPDATE names SET name=? WHERE id=?',
(md5(name), id_))
for (id_, name) in db.query('SELECT inode, target FROM symlink_targets'):
db.execute('UPDATE symlink_targets SET target=? WHERE inode=?',
(md5(name), id_))
for (id_, name) in db.query('SELECT rowid, value FROM ext_attributes'):
db.execute('UPDATE ext_attributes SET value=? WHERE rowid=?',
(md5(name), id_))
log.info('Saving...')
with open(DBNAME, 'wb+') as fh:
dump_metadata(db, fh)
if __name__ == '__main__':
main(sys.argv[1:])
|