diff options
author | Jelmer Vernooij <jelmer@debian.org> | 2014-06-02 01:20:07 +0200 |
---|---|---|
committer | Jelmer Vernooij <jelmer@debian.org> | 2014-06-02 01:20:07 +0200 |
commit | a67a38c665bd646832a3abed7a18e54be7b5a5f0 (patch) | |
tree | 906d2fccdaaf23ea2bcbd449124128fdd7e6a6ca | |
parent | 54750a7567aa723bc19bae98795d8dd5d3df51f6 (diff) |
Imported Upstream version 1.3.0
83 files changed, 4198 insertions, 886 deletions
diff --git a/ABI/tdb-1.2.13.sigs b/ABI/tdb-1.2.13.sigs new file mode 100644 index 0000000..d727f21 --- /dev/null +++ b/ABI/tdb-1.2.13.sigs @@ -0,0 +1,67 @@ +tdb_add_flags: void (struct tdb_context *, unsigned int) +tdb_append: int (struct tdb_context *, TDB_DATA, TDB_DATA) +tdb_chainlock: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_mark: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_nonblock: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_read: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_unmark: int (struct tdb_context *, TDB_DATA) +tdb_chainunlock: int (struct tdb_context *, TDB_DATA) +tdb_chainunlock_read: int (struct tdb_context *, TDB_DATA) +tdb_check: int (struct tdb_context *, int (*)(TDB_DATA, TDB_DATA, void *), void *) +tdb_close: int (struct tdb_context *) +tdb_delete: int (struct tdb_context *, TDB_DATA) +tdb_dump_all: void (struct tdb_context *) +tdb_enable_seqnum: void (struct tdb_context *) +tdb_error: enum TDB_ERROR (struct tdb_context *) +tdb_errorstr: const char *(struct tdb_context *) +tdb_exists: int (struct tdb_context *, TDB_DATA) +tdb_fd: int (struct tdb_context *) +tdb_fetch: TDB_DATA (struct tdb_context *, TDB_DATA) +tdb_firstkey: TDB_DATA (struct tdb_context *) +tdb_freelist_size: int (struct tdb_context *) +tdb_get_flags: int (struct tdb_context *) +tdb_get_logging_private: void *(struct tdb_context *) +tdb_get_seqnum: int (struct tdb_context *) +tdb_hash_size: int (struct tdb_context *) +tdb_increment_seqnum_nonblock: void (struct tdb_context *) +tdb_jenkins_hash: unsigned int (TDB_DATA *) +tdb_lock_nonblock: int (struct tdb_context *, int, int) +tdb_lockall: int (struct tdb_context *) +tdb_lockall_mark: int (struct tdb_context *) +tdb_lockall_nonblock: int (struct tdb_context *) +tdb_lockall_read: int (struct tdb_context *) +tdb_lockall_read_nonblock: int (struct tdb_context *) +tdb_lockall_unmark: int (struct tdb_context *) +tdb_log_fn: tdb_log_func (struct tdb_context *) +tdb_map_size: size_t (struct tdb_context *) +tdb_name: const char *(struct tdb_context *) +tdb_nextkey: TDB_DATA (struct tdb_context *, TDB_DATA) +tdb_null: dptr = 0xXXXX, dsize = 0 +tdb_open: struct tdb_context *(const char *, int, int, int, mode_t) +tdb_open_ex: struct tdb_context *(const char *, int, int, int, mode_t, const struct tdb_logging_context *, tdb_hash_func) +tdb_parse_record: int (struct tdb_context *, TDB_DATA, int (*)(TDB_DATA, TDB_DATA, void *), void *) +tdb_printfreelist: int (struct tdb_context *) +tdb_remove_flags: void (struct tdb_context *, unsigned int) +tdb_reopen: int (struct tdb_context *) +tdb_reopen_all: int (int) +tdb_repack: int (struct tdb_context *) +tdb_rescue: int (struct tdb_context *, void (*)(TDB_DATA, TDB_DATA, void *), void *) +tdb_set_logging_function: void (struct tdb_context *, const struct tdb_logging_context *) +tdb_set_max_dead: void (struct tdb_context *, int) +tdb_setalarm_sigptr: void (struct tdb_context *, volatile sig_atomic_t *) +tdb_store: int (struct tdb_context *, TDB_DATA, TDB_DATA, int) +tdb_summary: char *(struct tdb_context *) +tdb_transaction_cancel: int (struct tdb_context *) +tdb_transaction_commit: int (struct tdb_context *) +tdb_transaction_prepare_commit: int (struct tdb_context *) +tdb_transaction_start: int (struct tdb_context *) +tdb_transaction_start_nonblock: int (struct tdb_context *) +tdb_transaction_write_lock_mark: int (struct tdb_context *) +tdb_transaction_write_lock_unmark: int (struct tdb_context *) +tdb_traverse: int (struct tdb_context *, tdb_traverse_func, void *) +tdb_traverse_read: int (struct tdb_context *, tdb_traverse_func, void *) +tdb_unlock: int (struct tdb_context *, int, int) +tdb_unlockall: int (struct tdb_context *) +tdb_unlockall_read: int (struct tdb_context *) +tdb_validate_freelist: int (struct tdb_context *, int *) +tdb_wipe_all: int (struct tdb_context *) diff --git a/ABI/tdb-1.3.0.sigs b/ABI/tdb-1.3.0.sigs new file mode 100644 index 0000000..7d3e469 --- /dev/null +++ b/ABI/tdb-1.3.0.sigs @@ -0,0 +1,68 @@ +tdb_add_flags: void (struct tdb_context *, unsigned int) +tdb_append: int (struct tdb_context *, TDB_DATA, TDB_DATA) +tdb_chainlock: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_mark: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_nonblock: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_read: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_unmark: int (struct tdb_context *, TDB_DATA) +tdb_chainunlock: int (struct tdb_context *, TDB_DATA) +tdb_chainunlock_read: int (struct tdb_context *, TDB_DATA) +tdb_check: int (struct tdb_context *, int (*)(TDB_DATA, TDB_DATA, void *), void *) +tdb_close: int (struct tdb_context *) +tdb_delete: int (struct tdb_context *, TDB_DATA) +tdb_dump_all: void (struct tdb_context *) +tdb_enable_seqnum: void (struct tdb_context *) +tdb_error: enum TDB_ERROR (struct tdb_context *) +tdb_errorstr: const char *(struct tdb_context *) +tdb_exists: int (struct tdb_context *, TDB_DATA) +tdb_fd: int (struct tdb_context *) +tdb_fetch: TDB_DATA (struct tdb_context *, TDB_DATA) +tdb_firstkey: TDB_DATA (struct tdb_context *) +tdb_freelist_size: int (struct tdb_context *) +tdb_get_flags: int (struct tdb_context *) +tdb_get_logging_private: void *(struct tdb_context *) +tdb_get_seqnum: int (struct tdb_context *) +tdb_hash_size: int (struct tdb_context *) +tdb_increment_seqnum_nonblock: void (struct tdb_context *) +tdb_jenkins_hash: unsigned int (TDB_DATA *) +tdb_lock_nonblock: int (struct tdb_context *, int, int) +tdb_lockall: int (struct tdb_context *) +tdb_lockall_mark: int (struct tdb_context *) +tdb_lockall_nonblock: int (struct tdb_context *) +tdb_lockall_read: int (struct tdb_context *) +tdb_lockall_read_nonblock: int (struct tdb_context *) +tdb_lockall_unmark: int (struct tdb_context *) +tdb_log_fn: tdb_log_func (struct tdb_context *) +tdb_map_size: size_t (struct tdb_context *) +tdb_name: const char *(struct tdb_context *) +tdb_nextkey: TDB_DATA (struct tdb_context *, TDB_DATA) +tdb_null: dptr = 0xXXXX, dsize = 0 +tdb_open: struct tdb_context *(const char *, int, int, int, mode_t) +tdb_open_ex: struct tdb_context *(const char *, int, int, int, mode_t, const struct tdb_logging_context *, tdb_hash_func) +tdb_parse_record: int (struct tdb_context *, TDB_DATA, int (*)(TDB_DATA, TDB_DATA, void *), void *) +tdb_printfreelist: int (struct tdb_context *) +tdb_remove_flags: void (struct tdb_context *, unsigned int) +tdb_reopen: int (struct tdb_context *) +tdb_reopen_all: int (int) +tdb_repack: int (struct tdb_context *) +tdb_rescue: int (struct tdb_context *, void (*)(TDB_DATA, TDB_DATA, void *), void *) +tdb_runtime_check_for_robust_mutexes: bool (void) +tdb_set_logging_function: void (struct tdb_context *, const struct tdb_logging_context *) +tdb_set_max_dead: void (struct tdb_context *, int) +tdb_setalarm_sigptr: void (struct tdb_context *, volatile sig_atomic_t *) +tdb_store: int (struct tdb_context *, TDB_DATA, TDB_DATA, int) +tdb_summary: char *(struct tdb_context *) +tdb_transaction_cancel: int (struct tdb_context *) +tdb_transaction_commit: int (struct tdb_context *) +tdb_transaction_prepare_commit: int (struct tdb_context *) +tdb_transaction_start: int (struct tdb_context *) +tdb_transaction_start_nonblock: int (struct tdb_context *) +tdb_transaction_write_lock_mark: int (struct tdb_context *) +tdb_transaction_write_lock_unmark: int (struct tdb_context *) +tdb_traverse: int (struct tdb_context *, tdb_traverse_func, void *) +tdb_traverse_read: int (struct tdb_context *, tdb_traverse_func, void *) +tdb_unlock: int (struct tdb_context *, int, int) +tdb_unlockall: int (struct tdb_context *) +tdb_unlockall_read: int (struct tdb_context *) +tdb_validate_freelist: int (struct tdb_context *, int *) +tdb_wipe_all: int (struct tdb_context *) diff --git a/buildtools/wafadmin/Tools/ccroot.py b/buildtools/wafadmin/Tools/ccroot.py index f54c82f..264bdc7 100644 --- a/buildtools/wafadmin/Tools/ccroot.py +++ b/buildtools/wafadmin/Tools/ccroot.py @@ -597,9 +597,12 @@ def apply_vnum(self): path = self.install_path if not path: return - bld.install_as(path + os.sep + name3, node, env=self.env) - bld.symlink_as(path + os.sep + name2, name3) - bld.symlink_as(path + os.sep + libname, name3) + if self.env.DEST_OS == 'openbsd': + bld.install_as(path + os.sep + name2, node, env=self.env, chmod=self.link_task.chmod) + else: + bld.install_as(path + os.sep + name3, node, env=self.env) + bld.symlink_as(path + os.sep + name2, name3) + bld.symlink_as(path + os.sep + libname, name3) # the following task is just to enable execution from the build dir :-/ self.create_task('vnum', node, [node.parent.find_or_declare(name2), node.parent.find_or_declare(name3)]) diff --git a/buildtools/wafadmin/Tools/config_c.py b/buildtools/wafadmin/Tools/config_c.py index a32d8aa..d0bc617 100644 --- a/buildtools/wafadmin/Tools/config_c.py +++ b/buildtools/wafadmin/Tools/config_c.py @@ -73,6 +73,19 @@ def parse_flags(line, uselib, env): app('CCFLAGS_' + uselib, x) app('CXXFLAGS_' + uselib, x) app('LINKFLAGS_' + uselib, x) + # + # NOTE on special treatment of -Wl,-R and -Wl,-rpath: + # + # It is important to not put a library provided RPATH + # into the LINKFLAGS but in the RPATH instead, since + # the provided LINKFLAGS get prepended to our own internal + # RPATH later, and hence can potentially lead to linking + # in too old versions of our internal libs. + # + elif x.startswith('-Wl,-R'): + app('RPATH_' + uselib, x[6:]) + elif x.startswith('-Wl,-rpath,'): + app('RPATH_' + uselib, x[11:]) elif x.startswith('-Wl'): app('LINKFLAGS_' + uselib, x) elif x.startswith('-m') or x.startswith('-f'): diff --git a/buildtools/wafadmin/Tools/perl.py b/buildtools/wafadmin/Tools/perl.py index a6787a8..0f34e79 100644 --- a/buildtools/wafadmin/Tools/perl.py +++ b/buildtools/wafadmin/Tools/perl.py @@ -98,12 +98,53 @@ def check_perl_ext_devel(conf): conf.env.EXTUTILS_TYPEMAP = read_out('print "$Config{privlib}/ExtUtils/typemap"') conf.env.perlext_PATTERN = '%s.' + read_out('print $Config{dlext}')[0] - if getattr(Options.options, 'perlarchdir', None): - conf.env.ARCHDIR_PERL = Options.options.perlarchdir - else: - conf.env.ARCHDIR_PERL = read_out('print $Config{sitearch}')[0] + def try_any(keys): + for k in keys: + conf.start_msg("Checking for perl $Config{%s}:" % k) + try: + v = read_out('print $Config{%s}' % k)[0] + conf.end_msg("'%s'" % (v), 'GREEN') + return v + except IndexError: + conf.end_msg(False, 'YELLOW') + pass + return None + + perl_arch_install_dir = None + if getattr(Options.options, 'perl_arch_install_dir', None): + perl_arch_install_dir = Options.options.perl_arch_install_dir + if perl_arch_install_dir is None: + perl_arch_install_dir = try_any(['vendorarch', 'sitearch', 'archlib']) + if perl_arch_install_dir is None: + conf.fatal('No perl arch install directory autodetected.' + + 'Please define it with --with-perl-arch-install-dir.') + conf.start_msg("PERL_ARCH_INSTALL_DIR: ") + conf.end_msg("'%s'" % (perl_arch_install_dir), 'GREEN') + conf.env.PERL_ARCH_INSTALL_DIR = perl_arch_install_dir + + perl_lib_install_dir = None + if getattr(Options.options, 'perl_lib_install_dir', None): + perl_lib_install_dir = Options.options.perl_lib_install_dir + if perl_lib_install_dir is None: + perl_lib_install_dir = try_any(['vendorlib', 'sitelib', 'privlib']) + if perl_lib_install_dir is None: + conf.fatal('No perl lib install directory autodetected. ' + + 'Please define it with --with-perl-lib-install-dir.') + conf.start_msg("PERL_LIB_INSTALL_DIR: ") + conf.end_msg("'%s'" % (perl_lib_install_dir), 'GREEN') + conf.env.PERL_LIB_INSTALL_DIR = perl_lib_install_dir def set_options(opt): opt.add_option("--with-perl-binary", type="string", dest="perlbinary", help = 'Specify alternate perl binary', default=None) - opt.add_option("--with-perl-archdir", type="string", dest="perlarchdir", help = 'Specify directory where to install arch specific files', default=None) + opt.add_option("--with-perl-arch-install-dir", + type="string", + dest="perl_arch_install_dir", + help = ('Specify directory where to install arch specific files'), + default=None) + + opt.add_option("--with-perl-lib-install-dir", + type="string", + dest="perl_lib_install_dir", + help = ('Specify directory where to install vendor specific files'), + default=None) diff --git a/buildtools/wafadmin/Tools/python.py b/buildtools/wafadmin/Tools/python.py index a15e1f6..35c61c2 100644 --- a/buildtools/wafadmin/Tools/python.py +++ b/buildtools/wafadmin/Tools/python.py @@ -10,6 +10,7 @@ import TaskGen, Utils, Options from Logs import debug, warn, info from TaskGen import extension, before, after, feature from Configure import conf +from config_c import parse_flags EXT_PY = ['.py'] FRAG_2 = ''' @@ -192,6 +193,19 @@ MACOSX_DEPLOYMENT_TARGET = %r """ % (python, python_prefix, python_SO, python_SYSLIBS, python_LDFLAGS, python_SHLIBS, python_LIBDIR, python_LIBPL, INCLUDEPY, Py_ENABLE_SHARED, python_MACOSX_DEPLOYMENT_TARGET)) + # Allow some python overrides from env vars for cross-compiling + os_env = dict(os.environ) + + override_python_LDFLAGS = os_env.get('python_LDFLAGS', None) + if override_python_LDFLAGS is not None: + conf.log.write("python_LDFLAGS override from environment = %r\n" % (override_python_LDFLAGS)) + python_LDFLAGS = override_python_LDFLAGS + + override_python_LIBDIR = os_env.get('python_LIBDIR', None) + if override_python_LIBDIR is not None: + conf.log.write("python_LIBDIR override from environment = %r\n" % (override_python_LIBDIR)) + python_LIBDIR = override_python_LIBDIR + if python_MACOSX_DEPLOYMENT_TARGET: conf.env['MACOSX_DEPLOYMENT_TARGET'] = python_MACOSX_DEPLOYMENT_TARGET conf.environ['MACOSX_DEPLOYMENT_TARGET'] = python_MACOSX_DEPLOYMENT_TARGET @@ -213,7 +227,7 @@ MACOSX_DEPLOYMENT_TARGET = %r env.append_value('LINKFLAGS_PYEMBED', lib) if Options.platform != 'darwin' and python_LDFLAGS: - env.append_value('LINKFLAGS_PYEMBED', python_LDFLAGS.split()) + parse_flags(python_LDFLAGS, 'PYEMBED', env) result = False name = 'python' + env['PYTHON_VERSION'] diff --git a/buildtools/wafsamba/samba_autoconf.py b/buildtools/wafsamba/samba_autoconf.py index fe110bd..f60ce9d 100644 --- a/buildtools/wafsamba/samba_autoconf.py +++ b/buildtools/wafsamba/samba_autoconf.py @@ -304,23 +304,27 @@ def CHECK_FUNCS(conf, list, link=True, lib=None, headers=None): @conf -def CHECK_SIZEOF(conf, vars, headers=None, define=None): +def CHECK_SIZEOF(conf, vars, headers=None, define=None, critical=True): '''check the size of a type''' - ret = True for v in TO_LIST(vars): v_define = define + ret = False if v_define is None: v_define = 'SIZEOF_%s' % v.upper().replace(' ', '_') - if not CHECK_CODE(conf, - 'printf("%%u", (unsigned)sizeof(%s))' % v, - define=v_define, - execute=True, - define_ret=True, - quote=False, - headers=headers, - local_include=False, - msg="Checking size of %s" % v): - ret = False + for size in list((1, 2, 4, 8, 16, 32)): + if CHECK_CODE(conf, + 'static int test_array[1 - 2 * !(((long int)(sizeof(%s))) <= %d)];' % (v, size), + define=v_define, + quote=False, + headers=headers, + local_include=False, + msg="Checking if size of %s == %d" % (v, size)): + conf.DEFINE(v_define, size) + ret = True + break + if not ret and critical: + Logs.error("Couldn't determine size of '%s'" % v) + sys.exit(1) return ret @conf @@ -495,6 +499,14 @@ def CONFIG_SET(conf, option): if v == (): return False return True + +@conf +def CONFIG_RESET(conf, option): + if option not in conf.env: + return + del conf.env[option] + +Build.BuildContext.CONFIG_RESET = CONFIG_RESET Build.BuildContext.CONFIG_SET = CONFIG_SET Build.BuildContext.CONFIG_GET = CONFIG_GET @@ -559,7 +571,7 @@ int foo() if set_target: SET_TARGET_TYPE(conf, lib, 'EMPTY') else: - conf.define('HAVE_LIB%s' % lib.upper().replace('-','_'), 1) + conf.define('HAVE_LIB%s' % lib.upper().replace('-','_').replace('.','_'), 1) conf.env['LIB_' + lib.upper()] = lib if set_target: conf.SET_TARGET_TYPE(lib, 'SYSLIB') @@ -635,16 +647,32 @@ def SAMBA_CONFIG_H(conf, path=None): return if Options.options.debug: - conf.ADD_CFLAGS('-g', - testflags=True) + conf.ADD_CFLAGS('-g', testflags=True) if Options.options.developer: + conf.env.DEVELOPER_MODE = True + + conf.ADD_CFLAGS('-g', testflags=True) + conf.ADD_CFLAGS('-Wall', testflags=True) + conf.ADD_CFLAGS('-Wshadow', testflags=True) + conf.ADD_CFLAGS('-Wmissing-prototypes', testflags=True) + conf.ADD_CFLAGS('-Wcast-align -Wcast-qual', testflags=True) + conf.ADD_CFLAGS('-fno-common', testflags=True) + + conf.ADD_CFLAGS('-Werror=address', testflags=True) # we add these here to ensure that -Wstrict-prototypes is not set during configure - conf.ADD_CFLAGS('-Wall -g -Wshadow -Werror=strict-prototypes -Wstrict-prototypes -Werror=pointer-arith -Wpointer-arith -Wcast-align -Werror=write-strings -Wwrite-strings -Werror-implicit-function-declaration -Wformat=2 -Wno-format-y2k -Wmissing-prototypes -fno-common -Werror=address', + conf.ADD_CFLAGS('-Werror=strict-prototypes -Wstrict-prototypes', + testflags=True) + conf.ADD_CFLAGS('-Werror=write-strings -Wwrite-strings', + testflags=True) + conf.ADD_CFLAGS('-Werror-implicit-function-declaration', + testflags=True) + conf.ADD_CFLAGS('-Werror=pointer-arith -Wpointer-arith', + testflags=True) + conf.ADD_CFLAGS('-Werror=declaration-after-statement -Wdeclaration-after-statement', testflags=True) - conf.ADD_CFLAGS('-Wcast-qual', testflags=True) - conf.env.DEVELOPER_MODE = True + conf.ADD_CFLAGS('-Wformat=2 -Wno-format-y2k', testflags=True) # This check is because for ldb_search(), a NULL format string # is not an error, but some compilers complain about that. if CHECK_CFLAGS(conf, ["-Werror=format", "-Wformat=2"], ''' @@ -661,7 +689,7 @@ int main(void) { conf.env['EXTRA_CFLAGS'].extend(TO_LIST("-Werror=format")) if Options.options.picky_developer: - conf.ADD_CFLAGS('-Werror', testflags=True) + conf.ADD_NAMED_CFLAGS('PICKY_CFLAGS', '-Werror', testflags=True) if Options.options.fatal_errors: conf.ADD_CFLAGS('-Wfatal-errors', testflags=True) @@ -686,7 +714,7 @@ def CONFIG_PATH(conf, name, default): conf.env[name] = conf.env['PREFIX'] + default @conf -def ADD_CFLAGS(conf, flags, testflags=False): +def ADD_NAMED_CFLAGS(conf, name, flags, testflags=False): '''add some CFLAGS to the command line optionally set testflags to ensure all the flags work ''' @@ -696,9 +724,16 @@ def ADD_CFLAGS(conf, flags, testflags=False): if CHECK_CFLAGS(conf, f): ok_flags.append(f) flags = ok_flags - if not 'EXTRA_CFLAGS' in conf.env: - conf.env['EXTRA_CFLAGS'] = [] - conf.env['EXTRA_CFLAGS'].extend(TO_LIST(flags)) + if not name in conf.env: + conf.env[name] = [] + conf.env[name].extend(TO_LIST(flags)) + +@conf +def ADD_CFLAGS(conf, flags, testflags=False): + '''add some CFLAGS to the command line + optionally set testflags to ensure all the flags work + ''' + ADD_NAMED_CFLAGS(conf, 'EXTRA_CFLAGS', flags, testflags=testflags) @conf def ADD_LDFLAGS(conf, flags, testflags=False): @@ -728,14 +763,17 @@ def ADD_EXTRA_INCLUDES(conf, includes): -def CURRENT_CFLAGS(bld, target, cflags, hide_symbols=False): +def CURRENT_CFLAGS(bld, target, cflags, allow_warnings=True, hide_symbols=False): '''work out the current flags. local flags are added first''' + ret = TO_LIST(cflags) if not 'EXTRA_CFLAGS' in bld.env: list = [] else: list = bld.env['EXTRA_CFLAGS']; - ret = TO_LIST(cflags) ret.extend(list) + if not allow_warnings and 'PICKY_CFLAGS' in bld.env: + list = bld.env['PICKY_CFLAGS']; + ret.extend(list) if hide_symbols and bld.env.HAVE_VISIBILITY_ATTR: ret.append('-fvisibility=hidden') return ret diff --git a/buildtools/wafsamba/samba_bundled.py b/buildtools/wafsamba/samba_bundled.py index afcf708..45946d5 100644 --- a/buildtools/wafsamba/samba_bundled.py +++ b/buildtools/wafsamba/samba_bundled.py @@ -70,14 +70,23 @@ def minimum_library_version(conf, libname, default): @conf def LIB_MAY_BE_BUNDLED(conf, libname): - return ('NONE' not in conf.env.BUNDLED_LIBS and - '!%s' % libname not in conf.env.BUNDLED_LIBS) - + if libname in conf.env.BUNDLED_LIBS: + return True + if '!%s' % libname in conf.env.BUNDLED_LIBS: + return False + if 'NONE' in conf.env.BUNDLED_LIBS: + return False + return True @conf def LIB_MUST_BE_BUNDLED(conf, libname): - return ('ALL' in conf.env.BUNDLED_LIBS or - libname in conf.env.BUNDLED_LIBS) + if libname in conf.env.BUNDLED_LIBS: + return True + if '!%s' % libname in conf.env.BUNDLED_LIBS: + return False + if 'ALL' in conf.env.BUNDLED_LIBS: + return True + return False @conf def LIB_MUST_BE_PRIVATE(conf, libname): @@ -102,54 +111,18 @@ def CHECK_BUNDLED_SYSTEM_PKG(conf, libname, minversion='0.0.0', This only tries using pkg-config ''' - if conf.LIB_MUST_BE_BUNDLED(libname): - return False - found = 'FOUND_SYSTEMLIB_%s' % libname - if found in conf.env: - return conf.env[found] - - # see if the library should only use a system version if another dependent - # system version is found. That prevents possible use of mixed library - # versions - if onlyif: - missing = conf.CHECK_PREREQUISITES(onlyif) - if missing: - if not conf.LIB_MAY_BE_BUNDLED(libname): - Logs.error('ERROR: Use of system library %s depends on missing system library/libraries %r' % (libname, missing)) - sys.exit(1) - conf.env[found] = False - return False - - minversion = minimum_library_version(conf, libname, minversion) - - msg = 'Checking for system %s' % libname - if minversion != '0.0.0': - msg += ' >= %s' % minversion - - if pkg is None: - pkg = libname - - if conf.check_cfg(package=pkg, - args='"%s >= %s" --cflags --libs' % (pkg, minversion), - msg=msg, uselib_store=libname.upper()): - conf.SET_TARGET_TYPE(libname, 'SYSLIB') - conf.env[found] = True - if implied_deps: - conf.SET_SYSLIB_DEPS(libname, implied_deps) - return True - conf.env[found] = False - if not conf.LIB_MAY_BE_BUNDLED(libname): - Logs.error('ERROR: System library %s of version %s not found, and bundling disabled' % (libname, minversion)) - sys.exit(1) - return False - + return conf.CHECK_BUNDLED_SYSTEM(libname, + minversion=minversion, + onlyif=onlyif, + implied_deps=implied_deps, + pkg=pkg) @runonce @conf def CHECK_BUNDLED_SYSTEM(conf, libname, minversion='0.0.0', - checkfunctions=None, headers=None, + checkfunctions=None, headers=None, checkcode=None, onlyif=None, implied_deps=None, - require_headers=True): + require_headers=True, pkg=None, set_target=True): '''check if a library is available as a system library. this first tries via pkg-config, then if that fails tries by testing for a specified function in the specified lib @@ -160,14 +133,25 @@ def CHECK_BUNDLED_SYSTEM(conf, libname, minversion='0.0.0', if found in conf.env: return conf.env[found] - def check_functions_headers(): + def check_functions_headers_code(): '''helper function for CHECK_BUNDLED_SYSTEM''' - if checkfunctions is None: - return True if require_headers and headers and not conf.CHECK_HEADERS(headers, lib=libname): return False - return conf.CHECK_FUNCS_IN(checkfunctions, libname, headers=headers, - empty_decl=False, set_target=False) + if checkfunctions is not None: + ok = conf.CHECK_FUNCS_IN(checkfunctions, libname, headers=headers, + empty_decl=False, set_target=False) + if not ok: + return False + if checkcode is not None: + define='CHECK_BUNDLED_SYSTEM_%s' % libname.upper() + ok = conf.CHECK_CODE(checkcode, lib=libname, + headers=headers, local_include=False, + msg=msg, define=define) + conf.CONFIG_RESET(define) + if not ok: + return False + return True + # see if the library should only use a system version if another dependent # system version is found. That prevents possible use of mixed library @@ -187,22 +171,28 @@ def CHECK_BUNDLED_SYSTEM(conf, libname, minversion='0.0.0', if minversion != '0.0.0': msg += ' >= %s' % minversion + uselib_store=libname.upper() + if pkg is None: + pkg = libname + # try pkgconfig first - if (conf.check_cfg(package=libname, - args='"%s >= %s" --cflags --libs' % (libname, minversion), - msg=msg) and - check_functions_headers()): - conf.SET_TARGET_TYPE(libname, 'SYSLIB') + if (conf.check_cfg(package=pkg, + args='"%s >= %s" --cflags --libs' % (pkg, minversion), + msg=msg, uselib_store=uselib_store) and + check_functions_headers_code()): + if set_target: + conf.SET_TARGET_TYPE(libname, 'SYSLIB') conf.env[found] = True if implied_deps: conf.SET_SYSLIB_DEPS(libname, implied_deps) return True if checkfunctions is not None: - if check_functions_headers(): + if check_functions_headers_code(): conf.env[found] = True if implied_deps: conf.SET_SYSLIB_DEPS(libname, implied_deps) - conf.SET_TARGET_TYPE(libname, 'SYSLIB') + if set_target: + conf.SET_TARGET_TYPE(libname, 'SYSLIB') return True conf.env[found] = False if not conf.LIB_MAY_BE_BUNDLED(libname): diff --git a/buildtools/wafsamba/samba_deps.py b/buildtools/wafsamba/samba_deps.py index 74a70cf..c00744e 100644 --- a/buildtools/wafsamba/samba_deps.py +++ b/buildtools/wafsamba/samba_deps.py @@ -224,7 +224,8 @@ def add_init_functions(self): sname = sname.replace('/','_') cflags.append('-DSTATIC_%s_MODULES=%s' % (sname, sentinel)) if sentinel == 'NULL': - cflags.append('-DSTATIC_%s_MODULES_PROTO=' % sname) + proto = "extern void __%s_dummy_module_proto(void)" % (sname) + cflags.append('-DSTATIC_%s_MODULES_PROTO=%s' % (sname, proto)) self.ccflags = cflags return @@ -238,12 +239,14 @@ def add_init_functions(self): if init_fn_list == []: cflags.append('-DSTATIC_%s_MODULES=%s' % (m, sentinel)) if sentinel == 'NULL': - cflags.append('-DSTATIC_%s_MODULES_PROTO' % m) + proto = "extern void __%s_dummy_module_proto(void)" % (m) + cflags.append('-DSTATIC_%s_MODULES_PROTO=%s' % (m, proto)) else: cflags.append('-DSTATIC_%s_MODULES=%s' % (m, ','.join(init_fn_list) + ',' + sentinel)) proto='' for f in init_fn_list: - proto = proto + '_MODULE_PROTO(%s)' % f + proto += '_MODULE_PROTO(%s)' % f + proto += "extern void __%s_dummy_module_proto(void)" % (m) cflags.append('-DSTATIC_%s_MODULES_PROTO=%s' % (m, proto)) self.ccflags = cflags diff --git a/buildtools/wafsamba/samba_patterns.py b/buildtools/wafsamba/samba_patterns.py index 55ed540..1e2df90 100644 --- a/buildtools/wafsamba/samba_patterns.py +++ b/buildtools/wafsamba/samba_patterns.py @@ -59,6 +59,8 @@ def write_build_options_header(fp): fp.write("#include \"includes.h\"\n") fp.write("#include \"build_env.h\"\n") fp.write("#include \"dynconfig/dynconfig.h\"\n") + fp.write("#include \"lib/cluster_support.h\"\n") + fp.write("\n") fp.write("static int output(bool screen, const char *format, ...) PRINTF_ATTRIBUTE(2,3);\n") fp.write("void build_options(bool screen);\n") @@ -129,6 +131,9 @@ def write_build_options_header(fp): fp.write("\n") def write_build_options_footer(fp): + fp.write(" /* Output the sizes of the various cluster features */\n") + fp.write(" output(screen, \"\\n%s\", cluster_support_features());\n") + fp.write("\n") fp.write(" /* Output the sizes of the various types */\n") fp.write(" output(screen, \"\\nType sizes:\\n\");\n") fp.write(" output(screen, \" sizeof(char): %lu\\n\",(unsigned long)sizeof(char));\n") diff --git a/buildtools/wafsamba/samba_python.py b/buildtools/wafsamba/samba_python.py index aee9547..1ec2f7b 100644 --- a/buildtools/wafsamba/samba_python.py +++ b/buildtools/wafsamba/samba_python.py @@ -63,6 +63,7 @@ def SAMBA_PYTHON(bld, name, target_type='PYTHON', install_path='${PYTHONARCHDIR}', allow_undefined_symbols=True, + allow_warnings=True, enabled=enabled) Build.BuildContext.SAMBA_PYTHON = SAMBA_PYTHON diff --git a/buildtools/wafsamba/samba_utils.py b/buildtools/wafsamba/samba_utils.py index c1ac7e2..0b0bb48 100644 --- a/buildtools/wafsamba/samba_utils.py +++ b/buildtools/wafsamba/samba_utils.py @@ -391,7 +391,7 @@ try: # Even if hashlib.md5 exists, it may be unusable. # Try to use MD5 function. In FIPS mode this will cause an exception # and we'll get to the replacement code - foo = md5.md5('abcd') + foo = md5('abcd') except: try: import md5 diff --git a/buildtools/wafsamba/wafsamba.py b/buildtools/wafsamba/wafsamba.py index aaa0939..8ec4cb8 100644 --- a/buildtools/wafsamba/wafsamba.py +++ b/buildtools/wafsamba/wafsamba.py @@ -130,6 +130,7 @@ def SAMBA_LIBRARY(bld, libname, source, private_library=False, grouping_library=False, allow_undefined_symbols=False, + allow_warnings=True, enabled=True): '''define a Samba library''' @@ -176,6 +177,7 @@ def SAMBA_LIBRARY(bld, libname, source, autoproto_extra_source=autoproto_extra_source, depends_on = depends_on, hide_symbols = hide_symbols, + allow_warnings = allow_warnings, pyembed = pyembed, pyext = pyext, local_include = local_include, @@ -348,6 +350,8 @@ def SAMBA_BINARY(bld, binname, source, if bld.env['ENABLE_PIE'] == True: pie_cflags += ' -fPIE' pie_ldflags.extend(TO_LIST('-pie')) + if bld.env['ENABLE_RELRO'] == True: + pie_ldflags.extend(TO_LIST('-Wl,-z,relro,-z,now')) # first create a target for building the object files for this binary # by separating in this way, we avoid recompiling the C files @@ -413,7 +417,8 @@ def SAMBA_MODULE(bld, modname, source, enabled=True, pyembed=False, manpages=None, - allow_undefined_symbols=False + allow_undefined_symbols=False, + allow_warnings=True ): '''define a Samba module.''' @@ -433,6 +438,7 @@ def SAMBA_MODULE(bld, modname, source, cflags=cflags, local_include=local_include, global_include=global_include, + allow_warnings=allow_warnings, enabled=enabled) bld.ADD_INIT_FUNCTION(subsystem, modname, init_function) @@ -479,7 +485,8 @@ def SAMBA_MODULE(bld, modname, source, install_path="${MODULESDIR}/%s" % subsystem, pyembed=pyembed, manpages=manpages, - allow_undefined_symbols=allow_undefined_symbols + allow_undefined_symbols=allow_undefined_symbols, + allow_warnings=allow_warnings ) @@ -511,6 +518,7 @@ def SAMBA_SUBSYSTEM(bld, modname, source, vars=None, subdir=None, hide_symbols=False, + allow_warnings=True, pyext=False, pyembed=False): '''define a Samba subsystem''' @@ -546,7 +554,9 @@ def SAMBA_SUBSYSTEM(bld, modname, source, features = features, source = source, target = modname, - samba_cflags = CURRENT_CFLAGS(bld, modname, cflags, hide_symbols=hide_symbols), + samba_cflags = CURRENT_CFLAGS(bld, modname, cflags, + allow_warnings=allow_warnings, + hide_symbols=hide_symbols), depends_on = depends_on, samba_deps = TO_LIST(deps), samba_includes = includes, @@ -804,7 +814,7 @@ def MANPAGES(bld, manpages, install): bld.INSTALL_FILES('${MANDIR}/man%s' % m[-1], m, flat=True) Build.BuildContext.MANPAGES = MANPAGES -def SAMBAMANPAGES(bld, manpages): +def SAMBAMANPAGES(bld, manpages, extra_source=None): '''build and install manual pages''' bld.env.SAMBA_EXPAND_XSL = bld.srcnode.abspath() + '/docs-xml/xslt/expand-sambadoc.xsl' bld.env.SAMBA_MAN_XSL = bld.srcnode.abspath() + '/docs-xml/xslt/man.xsl' @@ -812,13 +822,15 @@ def SAMBAMANPAGES(bld, manpages): for m in manpages.split(): source = m + '.xml' + if extra_source is not None: + source = [source, extra_source] bld.SAMBA_GENERATOR(m, source=source, target=m, group='final', rule='''XML_CATALOG_FILES="${SAMBA_CATALOGS}" export XML_CATALOG_FILES - ${XSLTPROC} --xinclude --stringparam noreference 0 -o ${TGT}.xml --nonet ${SAMBA_EXPAND_XSL} ${SRC} + ${XSLTPROC} --xinclude --stringparam noreference 0 -o ${TGT}.xml --nonet ${SAMBA_EXPAND_XSL} ${SRC[0].abspath(env)} ${XSLTPROC} --nonet -o ${TGT} ${SAMBA_MAN_XSL} ${TGT}.xml''' ) bld.INSTALL_FILES('${MANDIR}/man%s' % m[-1], m, flat=True) diff --git a/buildtools/wafsamba/wscript b/buildtools/wafsamba/wscript index 17aef27..1a2cfe6 100755 --- a/buildtools/wafsamba/wscript +++ b/buildtools/wafsamba/wscript @@ -80,8 +80,10 @@ def set_options(opt): match = ['Checking for library iconv', 'Checking for iconv_open', 'Checking for header iconv.h']) opt.add_option('--with-gettext', help='additional directory to search for gettext', - action='store', dest='gettext_location', default='/usr/local', - match = ['Checking for library intl', 'Checking for header libintl.h']) + action='store', dest='gettext_location', default='None') + opt.add_option('--without-gettext', + help=("Disable use of gettext"), + action="store_true", dest='disable_gettext', default=False) gr = opt.option_group('developer options') @@ -322,7 +324,7 @@ def configure(conf): else: conf.env.HAVE_LD_VERSION_SCRIPT = False - if sys.platform == "aix5" or sys.platform == "aix6": + if sys.platform.startswith('aix'): conf.DEFINE('_ALL_SOURCE', 1, add_to_cflags=True) # Might not be needed if ALL_SOURCE is defined # conf.DEFINE('_XOPEN_SOURCE', 600, add_to_cflags=True) @@ -388,9 +390,68 @@ def configure(conf): else: conf.define('SHLIBEXT', "so", quote=True) - conf.CHECK_CODE('long one = 1; return ((char *)(&one))[0]', - execute=True, - define='WORDS_BIGENDIAN') + # First try a header check for cross-compile friendlyness + conf.CHECK_CODE(code = """#ifdef __BYTE_ORDER + #define B __BYTE_ORDER + #elif defined(BYTE_ORDER) + #define B BYTE_ORDER + #endif + + #ifdef __LITTLE_ENDIAN + #define LITTLE __LITTLE_ENDIAN + #elif defined(LITTLE_ENDIAN) + #define LITTLE LITTLE_ENDIAN + #endif + + #if !defined(LITTLE) || !defined(B) || LITTLE != B + #error Not little endian. + #endif + int main(void) { return 0; }""", + addmain=False, + headers="endian.h sys/endian.h", + define="HAVE_LITTLE_ENDIAN") + conf.CHECK_CODE(code = """#ifdef __BYTE_ORDER + #define B __BYTE_ORDER + #elif defined(BYTE_ORDER) + #define B BYTE_ORDER + #endif + + #ifdef __BIG_ENDIAN + #define BIG __BIG_ENDIAN + #elif defined(BIG_ENDIAN) + #define BIG BIG_ENDIAN + #endif + + #if !defined(BIG) || !defined(B) || BIG != B + #error Not big endian. + #endif + int main(void) { return 0; }""", + addmain=False, + headers="endian.h sys/endian.h", + define="HAVE_BIG_ENDIAN") + + if not conf.CONFIG_SET("HAVE_BIG_ENDIAN") and not conf.CONFIG_SET("HAVE_LITTLE_ENDIAN"): + # That didn't work! Do runtime test. + conf.CHECK_CODE("""union { int i; char c[sizeof(int)]; } u; + u.i = 0x01020304; + return u.c[0] == 0x04 && u.c[1] == 0x03 && u.c[2] == 0x02 && u.c[3] == 0x01 ? 0 : 1;""", + addmain=True, execute=True, + define='HAVE_LITTLE_ENDIAN', + msg="Checking for HAVE_LITTLE_ENDIAN - runtime") + conf.CHECK_CODE("""union { int i; char c[sizeof(int)]; } u; + u.i = 0x01020304; + return u.c[0] == 0x01 && u.c[1] == 0x02 && u.c[2] == 0x03 && u.c[3] == 0x04 ? 0 : 1;""", + addmain=True, execute=True, + define='HAVE_BIG_ENDIAN', + msg="Checking for HAVE_BIG_ENDIAN - runtime") + + # Extra sanity check. + if conf.CONFIG_SET("HAVE_BIG_ENDIAN") == conf.CONFIG_SET("HAVE_LITTLE_ENDIAN"): + Logs.error("Failed endian determination. The PDP-11 is back?") + sys.exit(1) + else: + if conf.CONFIG_SET("HAVE_BIG_ENDIAN"): + conf.DEFINE('WORDS_BIGENDIAN', 1) # check if signal() takes a void function if conf.CHECK_CODE('return *(signal (0, 0)) (0) == 1', diff --git a/common/check.c b/common/check.c index 9f9d870..e632af5 100644 --- a/common/check.c +++ b/common/check.c @@ -39,7 +39,9 @@ static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery) if (hdr.version != TDB_VERSION) goto corrupt; - if (hdr.rwlocks != 0 && hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC) + if (hdr.rwlocks != 0 && + hdr.rwlocks != TDB_FEATURE_FLAG_MAGIC && + hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC) goto corrupt; tdb_header_hash(tdb, &h1, &h2); diff --git a/common/dump.c b/common/dump.c index 7193c1e..5f6a78b 100644 --- a/common/dump.c +++ b/common/dump.c @@ -40,7 +40,7 @@ static tdb_off_t tdb_dump_record(struct tdb_context *tdb, int hash, } printf(" rec: hash=%d offset=0x%08x next=0x%08x rec_len=%u " - "key_len=%u data_len=%u full_hash=0x%x magic=0x%x\n", + "key_len=%u data_len=%u full_hash=0x%08x magic=0x%08x\n", hash, offset, rec.next, rec.rec_len, rec.key_len, rec.data_len, rec.full_hash, rec.magic); diff --git a/common/freelist.c b/common/freelist.c index ea14dd0..2aeeb1c 100644 --- a/common/freelist.c +++ b/common/freelist.c @@ -273,7 +273,8 @@ static tdb_off_t tdb_allocate_ofs(struct tdb_context *tdb, 0 is returned if the space could not be allocated */ -tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct tdb_record *rec) +static tdb_off_t tdb_allocate_from_freelist( + struct tdb_context *tdb, tdb_len_t length, struct tdb_record *rec) { tdb_off_t rec_ptr, last_ptr, newrec_ptr; struct { @@ -282,9 +283,6 @@ tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct tdb_rec } bestfit; float multiplier = 1.0; - if (tdb_lock(tdb, -1, F_WRLCK) == -1) - return 0; - /* over-allocate to reduce fragmentation */ length *= 1.25; @@ -297,7 +295,7 @@ tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct tdb_rec /* read in the freelist top */ if (tdb_ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1) - goto fail; + return 0; bestfit.rec_ptr = 0; bestfit.last_ptr = 0; @@ -310,7 +308,7 @@ tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct tdb_rec */ while (rec_ptr) { if (tdb_rec_free_read(tdb, rec_ptr, rec) == -1) { - goto fail; + return 0; } if (rec->rec_len >= length) { @@ -344,12 +342,11 @@ tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct tdb_rec if (bestfit.rec_ptr != 0) { if (tdb_rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) { - goto fail; + return 0; } newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, rec, bestfit.last_ptr); - tdb_unlock(tdb, -1, F_WRLCK); return newrec_ptr; } @@ -357,12 +354,95 @@ tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct tdb_rec database and if we can then try again */ if (tdb_expand(tdb, length + sizeof(*rec)) == 0) goto again; - fail: - tdb_unlock(tdb, -1, F_WRLCK); + return 0; } +static bool tdb_alloc_dead( + struct tdb_context *tdb, int hash, tdb_len_t length, + tdb_off_t *rec_ptr, struct tdb_record *rec) +{ + tdb_off_t last_ptr; + + *rec_ptr = tdb_find_dead(tdb, hash, rec, length, &last_ptr); + if (*rec_ptr == 0) { + return false; + } + /* + * Unlink the record from the hash chain, it's about to be moved into + * another one. + */ + return (tdb_ofs_write(tdb, last_ptr, &rec->next) == 0); +} + +/* + * Chain "hash" is assumed to be locked + */ + +tdb_off_t tdb_allocate(struct tdb_context *tdb, int hash, tdb_len_t length, + struct tdb_record *rec) +{ + tdb_off_t ret; + int i; + + if (tdb->max_dead_records == 0) { + /* + * No dead records to expect anywhere. Do the blocking + * freelist lock without trying to steal from others + */ + goto blocking_freelist_allocate; + } + + /* + * The following loop tries to get the freelist lock nonblocking. If + * it gets the lock, allocate from there. If the freelist is busy, + * instead of waiting we try to steal dead records from other hash + * chains. + * + * Be aware that we do nonblocking locks on the other hash chains as + * well and fail gracefully. This way we avoid deadlocks (we block two + * hash chains, something which is pretty bad normally) + */ + + for (i=0; i<tdb->hash_size; i++) { + + int list; + + list = BUCKET(hash+i); + + if (tdb_lock_nonblock(tdb, list, F_WRLCK) == 0) { + bool got_dead; + + got_dead = tdb_alloc_dead(tdb, list, length, &ret, rec); + tdb_unlock(tdb, list, F_WRLCK); + + if (got_dead) { + return ret; + } + } + if (tdb_lock_nonblock(tdb, -1, F_WRLCK) == 0) { + /* + * Under the freelist lock take the chance to give + * back our dead records. + */ + tdb_purge_dead(tdb, hash); + + ret = tdb_allocate_from_freelist(tdb, length, rec); + tdb_unlock(tdb, -1, F_WRLCK); + return ret; + } + } + +blocking_freelist_allocate: + + if (tdb_lock(tdb, -1, F_WRLCK) == -1) { + return 0; + } + ret = tdb_allocate_from_freelist(tdb, length, rec); + tdb_unlock(tdb, -1, F_WRLCK); + return ret; +} /* return the size of the freelist - used to decide if we should repack diff --git a/common/io.c b/common/io.c index 11dfefd..fe47d18 100644 --- a/common/io.c +++ b/common/io.c @@ -28,6 +28,71 @@ #include "tdb_private.h" +/* + * We prepend the mutex area, so fixup offsets. See mutex.c for details. + * tdb->hdr_ofs is 0 or header.mutex_size. + * + * Note: that we only have the 4GB limit of tdb_off_t for + * tdb->map_size. The file size on disk can be 4GB + tdb->hdr_ofs! + */ + +static bool tdb_adjust_offset(struct tdb_context *tdb, off_t *off) +{ + off_t tmp = tdb->hdr_ofs + *off; + + if ((tmp < tdb->hdr_ofs) || (tmp < *off)) { + errno = EIO; + return false; + } + + *off = tmp; + return true; +} + +static ssize_t tdb_pwrite(struct tdb_context *tdb, const void *buf, + size_t count, off_t offset) +{ + if (!tdb_adjust_offset(tdb, &offset)) { + return -1; + } + return pwrite(tdb->fd, buf, count, offset); +} + +static ssize_t tdb_pread(struct tdb_context *tdb, void *buf, + size_t count, off_t offset) +{ + if (!tdb_adjust_offset(tdb, &offset)) { + return -1; + } + return pread(tdb->fd, buf, count, offset); +} + +static int tdb_ftruncate(struct tdb_context *tdb, off_t length) +{ + if (!tdb_adjust_offset(tdb, &length)) { + return -1; + } + return ftruncate(tdb->fd, length); +} + +static int tdb_fstat(struct tdb_context *tdb, struct stat *buf) +{ + int ret; + + ret = fstat(tdb->fd, buf); + if (ret == -1) { + return -1; + } + + if (buf->st_size < tdb->hdr_ofs) { + errno = EIO; + return -1; + } + buf->st_size -= tdb->hdr_ofs; + + return ret; +} + /* check for an out of bounds access - if it is out of bounds then see if the database has been expanded by someone else and expand if necessary @@ -58,7 +123,7 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len, return -1; } - if (fstat(tdb->fd, &st) == -1) { + if (tdb_fstat(tdb, &st) == -1) { tdb->ecode = TDB_ERR_IO; return -1; } @@ -122,16 +187,18 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off, tdb->ecode = TDB_ERR_IO; return -1; #else - ssize_t written = pwrite(tdb->fd, buf, len, off); + ssize_t written; + + written = tdb_pwrite(tdb, buf, len, off); + if ((written != (ssize_t)len) && (written != -1)) { /* try once more */ tdb->ecode = TDB_ERR_IO; TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only " "%zi of %u bytes at %u, trying once more\n", written, len, off)); - written = pwrite(tdb->fd, (const char *)buf+written, - len-written, - off+written); + written = tdb_pwrite(tdb, (const char *)buf+written, + len-written, off+written); } if (written == -1) { /* Ensure ecode is set for log fn. */ @@ -176,7 +243,9 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, tdb->ecode = TDB_ERR_IO; return -1; #else - ssize_t ret = pread(tdb->fd, buf, len, off); + ssize_t ret; + + ret = tdb_pread(tdb, buf, len, off); if (ret != (ssize_t)len) { /* Ensure ecode is set for log fn. */ tdb->ecode = TDB_ERR_IO; @@ -258,7 +327,8 @@ int tdb_mmap(struct tdb_context *tdb) if (should_mmap(tdb)) { tdb->map_ptr = mmap(NULL, tdb->map_size, PROT_READ|(tdb->read_only? 0:PROT_WRITE), - MAP_SHARED|MAP_FILE, tdb->fd, 0); + MAP_SHARED|MAP_FILE, tdb->fd, + tdb->hdr_ofs); /* * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!! @@ -303,12 +373,12 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad return -1; } - if (ftruncate(tdb->fd, new_size) == -1) { + if (tdb_ftruncate(tdb, new_size) == -1) { char b = 0; - ssize_t written = pwrite(tdb->fd, &b, 1, new_size - 1); + ssize_t written = tdb_pwrite(tdb, &b, 1, new_size - 1); if (written == 0) { /* try once more, potentially revealing errno */ - written = pwrite(tdb->fd, &b, 1, new_size - 1); + written = tdb_pwrite(tdb, &b, 1, new_size - 1); } if (written == 0) { /* again - give up, guessing errno */ @@ -328,10 +398,10 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad memset(buf, TDB_PAD_BYTE, sizeof(buf)); while (addition) { size_t n = addition>sizeof(buf)?sizeof(buf):addition; - ssize_t written = pwrite(tdb->fd, buf, n, size); + ssize_t written = tdb_pwrite(tdb, buf, n, size); if (written == 0) { /* prevent infinite loops: try _once_ more */ - written = pwrite(tdb->fd, buf, n, size); + written = tdb_pwrite(tdb, buf, n, size); } if (written == 0) { /* give up, trying to provide a useful errno */ @@ -437,6 +507,14 @@ int tdb_expand(struct tdb_context *tdb, tdb_off_t size) /* must know about any previous expansions by another process */ tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1); + /* + * Note: that we don't care about tdb->hdr_ofs != 0 here + * + * The 4GB limitation is just related to tdb->map_size + * and the offset calculation in the records. + * + * The file on disk can be up to 4GB + tdb->hdr_ofs + */ size = tdb_expand_adjust(tdb->map_size, size, tdb->page_size); if (!tdb_add_off_t(tdb->map_size, size, &new_size)) { diff --git a/common/lock.c b/common/lock.c index 4dfefd5..6644c40 100644 --- a/common/lock.c +++ b/common/lock.c @@ -38,6 +38,15 @@ static int fcntl_lock(struct tdb_context *tdb, struct flock fl; int cmd; +#ifdef USE_TDB_MUTEX_LOCKING + { + int ret; + if (tdb_mutex_lock(tdb, rw, off, len, waitflag, &ret)) { + return ret; + } + } +#endif + fl.l_type = rw; fl.l_whence = SEEK_SET; fl.l_start = off; @@ -110,6 +119,15 @@ static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len) fclose(locks); #endif +#ifdef USE_TDB_MUTEX_LOCKING + { + int ret; + if (tdb_mutex_unlock(tdb, rw, off, len, &ret)) { + return ret; + } + } +#endif + fl.l_type = F_UNLCK; fl.l_whence = SEEK_SET; fl.l_start = off; @@ -248,13 +266,27 @@ int tdb_allrecord_upgrade(struct tdb_context *tdb) return -1; } - ret = tdb_brlock_retry(tdb, F_WRLCK, FREELIST_TOP, 0, - TDB_LOCK_WAIT|TDB_LOCK_PROBE); + if (tdb_have_mutexes(tdb)) { + ret = tdb_mutex_allrecord_upgrade(tdb); + if (ret == -1) { + goto fail; + } + ret = tdb_brlock_retry(tdb, F_WRLCK, lock_offset(tdb->hash_size), + 0, TDB_LOCK_WAIT|TDB_LOCK_PROBE); + if (ret == -1) { + tdb_mutex_allrecord_downgrade(tdb); + } + } else { + ret = tdb_brlock_retry(tdb, F_WRLCK, FREELIST_TOP, 0, + TDB_LOCK_WAIT|TDB_LOCK_PROBE); + } + if (ret == 0) { tdb->allrecord_lock.ltype = F_WRLCK; tdb->allrecord_lock.off = 0; return 0; } +fail: TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_allrecord_upgrade failed\n")); return -1; } @@ -297,14 +329,17 @@ int tdb_nest_lock(struct tdb_context *tdb, uint32_t offset, int ltype, return 0; } - new_lck = (struct tdb_lock_type *)realloc( - tdb->lockrecs, - sizeof(*tdb->lockrecs) * (tdb->num_lockrecs+1)); - if (new_lck == NULL) { - errno = ENOMEM; - return -1; + if (tdb->num_lockrecs == tdb->lockrecs_array_length) { + new_lck = (struct tdb_lock_type *)realloc( + tdb->lockrecs, + sizeof(*tdb->lockrecs) * (tdb->num_lockrecs+1)); + if (new_lck == NULL) { + errno = ENOMEM; + return -1; + } + tdb->lockrecs_array_length = tdb->num_lockrecs+1; + tdb->lockrecs = new_lck; } - tdb->lockrecs = new_lck; /* Since fcntl locks don't nest, we do a lock for the first one, and simply bump the count for future ones */ @@ -312,9 +347,11 @@ int tdb_nest_lock(struct tdb_context *tdb, uint32_t offset, int ltype, return -1; } - tdb->lockrecs[tdb->num_lockrecs].off = offset; - tdb->lockrecs[tdb->num_lockrecs].count = 1; - tdb->lockrecs[tdb->num_lockrecs].ltype = ltype; + new_lck = &tdb->lockrecs[tdb->num_lockrecs]; + + new_lck->off = offset; + new_lck->count = 1; + new_lck->ltype = ltype; tdb->num_lockrecs++; return 0; @@ -481,10 +518,6 @@ int tdb_nest_unlock(struct tdb_context *tdb, uint32_t offset, int ltype, * a completely idle tdb we should get rid of the locked array. */ - if (tdb->num_lockrecs == 0) { - SAFE_FREE(tdb->lockrecs); - } - if (ret) TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: An error occurred unlocking!\n")); return ret; @@ -592,6 +625,8 @@ static int tdb_chainlock_gradual(struct tdb_context *tdb, int tdb_allrecord_lock(struct tdb_context *tdb, int ltype, enum tdb_lock_flags flags, bool upgradable) { + int ret; + switch (tdb_allrecord_check(tdb, ltype, flags, upgradable)) { case -1: return -1; @@ -606,16 +641,27 @@ int tdb_allrecord_lock(struct tdb_context *tdb, int ltype, * * It is (1) which cause the starvation problem, so we're only * gradual for that. */ - if (tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP, - tdb->hash_size * 4) == -1) { + + if (tdb_have_mutexes(tdb)) { + ret = tdb_mutex_allrecord_lock(tdb, ltype, flags); + } else { + ret = tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP, + tdb->hash_size * 4); + } + + if (ret == -1) { return -1; } /* Grab individual record locks. */ if (tdb_brlock(tdb, ltype, lock_offset(tdb->hash_size), 0, flags) == -1) { - tdb_brunlock(tdb, ltype, FREELIST_TOP, - tdb->hash_size * 4); + if (tdb_have_mutexes(tdb)) { + tdb_mutex_allrecord_unlock(tdb); + } else { + tdb_brunlock(tdb, ltype, FREELIST_TOP, + tdb->hash_size * 4); + } return -1; } @@ -671,9 +717,25 @@ int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype, bool mark_lock) return 0; } - if (!mark_lock && tdb_brunlock(tdb, ltype, FREELIST_TOP, 0)) { - TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed (%s)\n", strerror(errno))); - return -1; + if (!mark_lock) { + int ret; + + if (tdb_have_mutexes(tdb)) { + ret = tdb_mutex_allrecord_unlock(tdb); + if (ret == 0) { + ret = tdb_brunlock(tdb, ltype, + lock_offset(tdb->hash_size), + 0); + } + } else { + ret = tdb_brunlock(tdb, ltype, FREELIST_TOP, 0); + } + + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed " + "(%s)\n", strerror(errno))); + return -1; + } } tdb->allrecord_lock.count = 0; @@ -894,9 +956,6 @@ void tdb_release_transaction_locks(struct tdb_context *tdb) } } tdb->num_lockrecs = active; - if (tdb->num_lockrecs == 0) { - SAFE_FREE(tdb->lockrecs); - } } /* Following functions are added specifically to support CTDB. */ diff --git a/common/mutex.c b/common/mutex.c new file mode 100644 index 0000000..bdc4c28 --- /dev/null +++ b/common/mutex.c @@ -0,0 +1,1000 @@ +/* + Unix SMB/CIFS implementation. + + trivial database library + + Copyright (C) Volker Lendecke 2012,2013 + Copyright (C) Stefan Metzmacher 2013,2014 + Copyright (C) Michael Adam 2014 + + ** NOTE! The following LGPL license applies to the tdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ +#include "tdb_private.h" +#include "system/threads.h" + +#ifdef USE_TDB_MUTEX_LOCKING + +/* + * If we run with mutexes, we store the "struct tdb_mutexes" at the + * beginning of the file. We store an additional tdb_header right + * beyond the mutex area, page aligned. All the offsets within the tdb + * are relative to the area behind the mutex area. tdb->map_ptr points + * behind the mmap area as well, so the read and write path in the + * mutex case can remain unchanged. + * + * Early in the mutex development the mutexes were placed between the hash + * chain pointers and the real tdb data. This had two drawbacks: First, it + * made pointer calculations more complex. Second, we had to mmap the mutex + * area twice. One was the normal map_ptr in the tdb. This frequently changed + * from within tdb_oob. At least the Linux glibc robust mutex code assumes + * constant pointers in memory, so a constantly changing mmap area destroys + * the mutex list. So we had to mmap the first bytes of the file with a second + * mmap call. With that scheme, very weird errors happened that could be + * easily fixed by doing the mutex mmap in a second file. It seemed that + * mapping the same memory area twice does not end up in accessing the same + * physical page, looking at the mutexes in gdb it seemed that old data showed + * up after some re-mapping. To avoid a separate mutex file, the code now puts + * the real content of the tdb file after the mutex area. This way we do not + * have overlapping mmap areas, the mutex area is mmapped once and not + * changed, the tdb data area's mmap is constantly changed but does not + * overlap. + */ + +struct tdb_mutexes { + struct tdb_header hdr; + + /* protect allrecord_lock */ + pthread_mutex_t allrecord_mutex; + + /* + * F_UNLCK: free, + * F_RDLCK: shared, + * F_WRLCK: exclusive + */ + short int allrecord_lock; + + /* + * Index 0 is the freelist mutex, followed by + * one mutex per hashchain. + */ + pthread_mutex_t hashchains[1]; +}; + +bool tdb_have_mutexes(struct tdb_context *tdb) +{ + return ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) != 0); +} + +size_t tdb_mutex_size(struct tdb_context *tdb) +{ + size_t mutex_size; + + if (!tdb_have_mutexes(tdb)) { + return 0; + } + + mutex_size = sizeof(struct tdb_mutexes); + mutex_size += tdb->hash_size * sizeof(pthread_mutex_t); + + return TDB_ALIGN(mutex_size, tdb->page_size); +} + +/* + * Get the index for a chain mutex + */ +static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len, + unsigned *idx) +{ + /* + * Weird but true: We fcntl lock 1 byte at an offset 4 bytes before + * the 4 bytes of the freelist start and the hash chain that is about + * to be locked. See lock_offset() where the freelist is -1 vs the + * "+1" in TDB_HASH_TOP(). Because the mutex array is represented in + * the tdb file itself as data, we need to adjust the offset here. + */ + const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t); + + if (!tdb_have_mutexes(tdb)) { + return false; + } + if (len != 1) { + /* Possibly the allrecord lock */ + return false; + } + if (off < freelist_lock_ofs) { + /* One of the special locks */ + return false; + } + if (tdb->hash_size == 0) { + /* tdb not initialized yet, called from tdb_open_ex() */ + return false; + } + if (off >= TDB_DATA_START(tdb->hash_size)) { + /* Single record lock from traverses */ + return false; + } + + /* + * Now we know it's a freelist or hash chain lock. Those are always 4 + * byte aligned. Paranoia check. + */ + if ((off % sizeof(tdb_off_t)) != 0) { + abort(); + } + + /* + * Re-index the fcntl offset into an offset into the mutex array + */ + off -= freelist_lock_ofs; /* rebase to index 0 */ + off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */ + + *idx = off; + return true; +} + +static bool tdb_have_mutex_chainlocks(struct tdb_context *tdb) +{ + size_t i; + + for (i=0; i < tdb->num_lockrecs; i++) { + bool ret; + unsigned idx; + + ret = tdb_mutex_index(tdb, + tdb->lockrecs[i].off, + tdb->lockrecs[i].count, + &idx); + if (!ret) { + continue; + } + + if (idx == 0) { + /* this is the freelist mutex */ + continue; + } + + return true; + } + + return false; +} + +static int chain_mutex_lock(pthread_mutex_t *m, bool waitflag) +{ + int ret; + + if (waitflag) { + ret = pthread_mutex_lock(m); + } else { + ret = pthread_mutex_trylock(m); + } + if (ret != EOWNERDEAD) { + return ret; + } + + /* + * For chainlocks, we don't do any cleanup (yet?) + */ + return pthread_mutex_consistent(m); +} + +static int allrecord_mutex_lock(struct tdb_mutexes *m, bool waitflag) +{ + int ret; + + if (waitflag) { + ret = pthread_mutex_lock(&m->allrecord_mutex); + } else { + ret = pthread_mutex_trylock(&m->allrecord_mutex); + } + if (ret != EOWNERDEAD) { + return ret; + } + + /* + * The allrecord lock holder died. We need to reset the allrecord_lock + * to F_UNLCK. This should also be the indication for + * tdb_needs_recovery. + */ + m->allrecord_lock = F_UNLCK; + + return pthread_mutex_consistent(&m->allrecord_mutex); +} + +bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len, + bool waitflag, int *pret) +{ + struct tdb_mutexes *m = tdb->mutexes; + pthread_mutex_t *chain; + int ret; + unsigned idx; + bool allrecord_ok; + + if (!tdb_mutex_index(tdb, off, len, &idx)) { + return false; + } + chain = &m->hashchains[idx]; + +again: + ret = chain_mutex_lock(chain, waitflag); + if (ret == EBUSY) { + ret = EAGAIN; + } + if (ret != 0) { + errno = ret; + goto fail; + } + + if (idx == 0) { + /* + * This is a freelist lock, which is independent to + * the allrecord lock. So we're done once we got the + * freelist mutex. + */ + *pret = 0; + return true; + } + + if (tdb_have_mutex_chainlocks(tdb)) { + /* + * We can only check the allrecord lock once. If we do it with + * one chain mutex locked, we will deadlock with the allrecord + * locker process in the following way: We lock the first hash + * chain, we check for the allrecord lock. We keep the hash + * chain locked. Then the allrecord locker locks the + * allrecord_mutex. It walks the list of chain mutexes, + * locking them all in sequence. Meanwhile, we have the chain + * mutex locked, so the allrecord locker blocks trying to lock + * our chain mutex. Then we come in and try to lock the second + * chain lock, which in most cases will be the freelist. We + * see that the allrecord lock is locked and put ourselves on + * the allrecord_mutex. This will never be signalled though + * because the allrecord locker waits for us to give up the + * chain lock. + */ + + *pret = 0; + return true; + } + + /* + * Check if someone is has the allrecord lock: queue if so. + */ + + allrecord_ok = false; + + if (m->allrecord_lock == F_UNLCK) { + /* + * allrecord lock not taken + */ + allrecord_ok = true; + } + + if ((m->allrecord_lock == F_RDLCK) && (rw == F_RDLCK)) { + /* + * allrecord shared lock taken, but we only want to read + */ + allrecord_ok = true; + } + + if (allrecord_ok) { + *pret = 0; + return true; + } + + ret = pthread_mutex_unlock(chain); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(chain_mutex) failed: %s\n", strerror(ret))); + errno = ret; + goto fail; + } + ret = allrecord_mutex_lock(m, waitflag); + if (ret == EBUSY) { + ret = EAGAIN; + } + if (ret != 0) { + if (waitflag || (ret != EAGAIN)) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_%slock" + "(allrecord_mutex) failed: %s\n", + waitflag ? "" : "try_", strerror(ret))); + } + errno = ret; + goto fail; + } + ret = pthread_mutex_unlock(&m->allrecord_mutex); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(allrecord_mutex) failed: %s\n", strerror(ret))); + errno = ret; + goto fail; + } + goto again; + +fail: + *pret = -1; + return true; +} + +bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len, + int *pret) +{ + struct tdb_mutexes *m = tdb->mutexes; + pthread_mutex_t *chain; + int ret; + unsigned idx; + + if (!tdb_mutex_index(tdb, off, len, &idx)) { + return false; + } + chain = &m->hashchains[idx]; + + ret = pthread_mutex_unlock(chain); + if (ret == 0) { + *pret = 0; + return true; + } + errno = ret; + *pret = -1; + return true; +} + +int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, + enum tdb_lock_flags flags) +{ + struct tdb_mutexes *m = tdb->mutexes; + int ret; + uint32_t i; + bool waitflag = (flags & TDB_LOCK_WAIT); + int saved_errno; + + if (tdb->flags & TDB_NOLOCK) { + return 0; + } + + if (flags & TDB_LOCK_MARK_ONLY) { + return 0; + } + + ret = allrecord_mutex_lock(m, waitflag); + if (!waitflag && (ret == EBUSY)) { + errno = EAGAIN; + tdb->ecode = TDB_ERR_LOCK; + return -1; + } + if (ret != 0) { + if (!(flags & TDB_LOCK_PROBE)) { + TDB_LOG((tdb, TDB_DEBUG_TRACE, + "allrecord_mutex_lock() failed: %s\n", + strerror(ret))); + } + tdb->ecode = TDB_ERR_LOCK; + return -1; + } + + if (m->allrecord_lock != F_UNLCK) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", + (int)m->allrecord_lock)); + goto fail_unlock_allrecord_mutex; + } + m->allrecord_lock = (ltype == F_RDLCK) ? F_RDLCK : F_WRLCK; + + for (i=0; i<tdb->hash_size; i++) { + + /* ignore hashchains[0], the freelist */ + pthread_mutex_t *chain = &m->hashchains[i+1]; + + ret = chain_mutex_lock(chain, waitflag); + if (!waitflag && (ret == EBUSY)) { + errno = EAGAIN; + goto fail_unroll_allrecord_lock; + } + if (ret != 0) { + if (!(flags & TDB_LOCK_PROBE)) { + TDB_LOG((tdb, TDB_DEBUG_TRACE, + "chain_mutex_lock() failed: %s\n", + strerror(ret))); + } + errno = ret; + goto fail_unroll_allrecord_lock; + } + + ret = pthread_mutex_unlock(chain); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(chainlock) failed: %s\n", strerror(ret))); + errno = ret; + goto fail_unroll_allrecord_lock; + } + } + /* + * We leave this routine with m->allrecord_mutex locked + */ + return 0; + +fail_unroll_allrecord_lock: + m->allrecord_lock = F_UNLCK; + +fail_unlock_allrecord_mutex: + saved_errno = errno; + ret = pthread_mutex_unlock(&m->allrecord_mutex); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(allrecord_mutex) failed: %s\n", strerror(ret))); + } + errno = saved_errno; + tdb->ecode = TDB_ERR_LOCK; + return -1; +} + +int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb) +{ + struct tdb_mutexes *m = tdb->mutexes; + int ret; + uint32_t i; + + if (tdb->flags & TDB_NOLOCK) { + return 0; + } + + /* + * Our only caller tdb_allrecord_upgrade() + * garantees that we already own the allrecord lock. + * + * Which means m->allrecord_mutex is still locked by us. + */ + + if (m->allrecord_lock != F_RDLCK) { + tdb->ecode = TDB_ERR_LOCK; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", + (int)m->allrecord_lock)); + return -1; + } + + m->allrecord_lock = F_WRLCK; + + for (i=0; i<tdb->hash_size; i++) { + + /* ignore hashchains[0], the freelist */ + pthread_mutex_t *chain = &m->hashchains[i+1]; + + ret = chain_mutex_lock(chain, true); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_lock" + "(chainlock) failed: %s\n", strerror(ret))); + goto fail_unroll_allrecord_lock; + } + + ret = pthread_mutex_unlock(chain); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(chainlock) failed: %s\n", strerror(ret))); + goto fail_unroll_allrecord_lock; + } + } + + return 0; + +fail_unroll_allrecord_lock: + m->allrecord_lock = F_RDLCK; + tdb->ecode = TDB_ERR_LOCK; + return -1; +} + +void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb) +{ + struct tdb_mutexes *m = tdb->mutexes; + + /* + * Our only caller tdb_allrecord_upgrade() (in the error case) + * garantees that we already own the allrecord lock. + * + * Which means m->allrecord_mutex is still locked by us. + */ + + if (m->allrecord_lock != F_WRLCK) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", + (int)m->allrecord_lock)); + return; + } + + m->allrecord_lock = F_RDLCK; + return; +} + + +int tdb_mutex_allrecord_unlock(struct tdb_context *tdb) +{ + struct tdb_mutexes *m = tdb->mutexes; + short old; + int ret; + + if (tdb->flags & TDB_NOLOCK) { + return 0; + } + + /* + * Our only callers tdb_allrecord_unlock() and + * tdb_allrecord_lock() (in the error path) + * garantee that we already own the allrecord lock. + * + * Which means m->allrecord_mutex is still locked by us. + */ + + if ((m->allrecord_lock != F_RDLCK) && (m->allrecord_lock != F_WRLCK)) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", + (int)m->allrecord_lock)); + return -1; + } + + old = m->allrecord_lock; + m->allrecord_lock = F_UNLCK; + + ret = pthread_mutex_unlock(&m->allrecord_mutex); + if (ret != 0) { + m->allrecord_lock = old; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(allrecord_mutex) failed: %s\n", strerror(ret))); + return -1; + } + return 0; +} + +int tdb_mutex_init(struct tdb_context *tdb) +{ + struct tdb_mutexes *m; + pthread_mutexattr_t ma; + int i, ret; + + ret = tdb_mutex_mmap(tdb); + if (ret == -1) { + return -1; + } + m = tdb->mutexes; + + ret = pthread_mutexattr_init(&ma); + if (ret != 0) { + goto fail_munmap; + } + ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); + if (ret != 0) { + goto fail; + } + ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); + if (ret != 0) { + goto fail; + } + ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); + if (ret != 0) { + goto fail; + } + + for (i=0; i<tdb->hash_size+1; i++) { + pthread_mutex_t *chain = &m->hashchains[i]; + + ret = pthread_mutex_init(chain, &ma); + if (ret != 0) { + goto fail; + } + } + + m->allrecord_lock = F_UNLCK; + + ret = pthread_mutex_init(&m->allrecord_mutex, &ma); + if (ret != 0) { + goto fail; + } + ret = 0; +fail: + pthread_mutexattr_destroy(&ma); +fail_munmap: + tdb_mutex_munmap(tdb); + + if (ret == 0) { + return 0; + } + + errno = ret; + return -1; +} + +int tdb_mutex_mmap(struct tdb_context *tdb) +{ + size_t len; + void *ptr; + + len = tdb_mutex_size(tdb); + if (len == 0) { + return 0; + } + + ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE, + tdb->fd, 0); + if (ptr == MAP_FAILED) { + return -1; + } + tdb->mutexes = (struct tdb_mutexes *)ptr; + + return 0; +} + +int tdb_mutex_munmap(struct tdb_context *tdb) +{ + size_t len; + + len = tdb_mutex_size(tdb); + if (len == 0) { + return 0; + } + + return munmap(tdb->mutexes, len); +} + +static bool tdb_mutex_locking_cached; + +static bool tdb_mutex_locking_supported(void) +{ + pthread_mutexattr_t ma; + pthread_mutex_t m; + int ret; + static bool initialized; + + if (initialized) { + return tdb_mutex_locking_cached; + } + + initialized = true; + + ret = pthread_mutexattr_init(&ma); + if (ret != 0) { + return false; + } + ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutex_init(&m, &ma); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutex_lock(&m); + if (ret != 0) { + goto cleanup_m; + } + /* + * This makes sure we have real mutexes + * from a threading library instead of just + * stubs from libc. + */ + ret = pthread_mutex_lock(&m); + if (ret != EDEADLK) { + goto cleanup_lock; + } + ret = pthread_mutex_unlock(&m); + if (ret != 0) { + goto cleanup_m; + } + + tdb_mutex_locking_cached = true; + goto cleanup_m; + +cleanup_lock: + pthread_mutex_unlock(&m); +cleanup_m: + pthread_mutex_destroy(&m); +cleanup_ma: + pthread_mutexattr_destroy(&ma); + return tdb_mutex_locking_cached; +} + +static void (*tdb_robust_mutext_old_handler)(int) = SIG_ERR; +static pid_t tdb_robust_mutex_pid = -1; + +static void tdb_robust_mutex_handler(int sig) +{ + if (tdb_robust_mutex_pid != -1) { + pid_t pid; + int status; + + pid = waitpid(tdb_robust_mutex_pid, &status, WNOHANG); + if (pid == tdb_robust_mutex_pid) { + tdb_robust_mutex_pid = -1; + return; + } + } + + if (tdb_robust_mutext_old_handler == SIG_DFL) { + return; + } + if (tdb_robust_mutext_old_handler == SIG_IGN) { + return; + } + if (tdb_robust_mutext_old_handler == SIG_ERR) { + return; + } + + tdb_robust_mutext_old_handler(sig); +} + +_PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void) +{ + void *ptr; + pthread_mutex_t *m; + pthread_mutexattr_t ma; + int ret = 1; + int pipe_down[2] = { -1, -1 }; + int pipe_up[2] = { -1, -1 }; + ssize_t nread; + char c = 0; + bool ok; + int status; + static bool initialized; + + if (initialized) { + return tdb_mutex_locking_cached; + } + + initialized = true; + + ok = tdb_mutex_locking_supported(); + if (!ok) { + return false; + } + + tdb_mutex_locking_cached = false; + + ptr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_ANON, -1 /* fd */, 0); + if (ptr == MAP_FAILED) { + return false; + } + m = (pthread_mutex_t *)ptr; + + ret = pipe(pipe_down); + if (ret != 0) { + goto cleanup_mmap; + } + ret = pipe(pipe_up); + if (ret != 0) { + goto cleanup_pipe; + } + + ret = pthread_mutexattr_init(&ma); + if (ret != 0) { + goto cleanup_pipe; + } + ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutex_init(m, &ma); + if (ret != 0) { + goto cleanup_ma; + } + + tdb_robust_mutext_old_handler = signal(SIGCHLD, + tdb_robust_mutex_handler); + + tdb_robust_mutex_pid = fork(); + if (tdb_robust_mutex_pid == 0) { + size_t nwritten; + close(pipe_down[1]); + close(pipe_up[0]); + ret = pthread_mutex_lock(m); + nwritten = write(pipe_up[1], &ret, sizeof(ret)); + if (nwritten != sizeof(ret)) { + exit(1); + } + if (ret != 0) { + exit(1); + } + nread = read(pipe_down[0], &c, 1); + if (nread != 1) { + exit(1); + } + /* leave locked */ + exit(0); + } + if (tdb_robust_mutex_pid == -1) { + goto cleanup_sig_child; + } + close(pipe_down[0]); + pipe_down[0] = -1; + close(pipe_up[1]); + pipe_up[1] = -1; + + nread = read(pipe_up[0], &ret, sizeof(ret)); + if (nread != sizeof(ret)) { + goto cleanup_child; + } + + ret = pthread_mutex_trylock(m); + if (ret != EBUSY) { + if (ret == 0) { + pthread_mutex_unlock(m); + } + goto cleanup_child; + } + + if (write(pipe_down[1], &c, 1) != 1) { + goto cleanup_child; + } + + nread = read(pipe_up[0], &c, 1); + if (nread != 0) { + goto cleanup_child; + } + + while (tdb_robust_mutex_pid > 0) { + pid_t pid; + + errno = 0; + pid = waitpid(tdb_robust_mutex_pid, &status, 0); + if (pid == tdb_robust_mutex_pid) { + tdb_robust_mutex_pid = -1; + break; + } + if (pid == -1 && errno != EINTR) { + goto cleanup_child; + } + } + signal(SIGCHLD, tdb_robust_mutext_old_handler); + + ret = pthread_mutex_trylock(m); + if (ret != EOWNERDEAD) { + if (ret == 0) { + pthread_mutex_unlock(m); + } + goto cleanup_m; + } + + ret = pthread_mutex_consistent(m); + if (ret != 0) { + goto cleanup_m; + } + + ret = pthread_mutex_trylock(m); + if (ret != EDEADLK) { + pthread_mutex_unlock(m); + goto cleanup_m; + } + + ret = pthread_mutex_unlock(m); + if (ret != 0) { + goto cleanup_m; + } + + tdb_mutex_locking_cached = true; + goto cleanup_m; + +cleanup_child: + while (tdb_robust_mutex_pid > 0) { + pid_t pid; + + kill(tdb_robust_mutex_pid, SIGKILL); + + errno = 0; + pid = waitpid(tdb_robust_mutex_pid, &status, 0); + if (pid == tdb_robust_mutex_pid) { + tdb_robust_mutex_pid = -1; + break; + } + if (pid == -1 && errno != EINTR) { + break; + } + } +cleanup_sig_child: + signal(SIGCHLD, tdb_robust_mutext_old_handler); +cleanup_m: + pthread_mutex_destroy(m); +cleanup_ma: + pthread_mutexattr_destroy(&ma); +cleanup_pipe: + if (pipe_down[0] != -1) { + close(pipe_down[0]); + } + if (pipe_down[1] != -1) { + close(pipe_down[1]); + } + if (pipe_up[0] != -1) { + close(pipe_up[0]); + } + if (pipe_up[1] != -1) { + close(pipe_up[1]); + } +cleanup_mmap: + munmap(ptr, sizeof(pthread_mutex_t)); + + return tdb_mutex_locking_cached; +} + +#else + +size_t tdb_mutex_size(struct tdb_context *tdb) +{ + return 0; +} + +bool tdb_have_mutexes(struct tdb_context *tdb) +{ + return false; +} + +int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, + enum tdb_lock_flags flags) +{ + tdb->ecode = TDB_ERR_LOCK; + return -1; +} + +int tdb_mutex_allrecord_unlock(struct tdb_context *tdb) +{ + return -1; +} + +int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb) +{ + tdb->ecode = TDB_ERR_LOCK; + return -1; +} + +void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb) +{ + return; +} + +int tdb_mutex_mmap(struct tdb_context *tdb) +{ + errno = ENOSYS; + return -1; +} + +int tdb_mutex_munmap(struct tdb_context *tdb) +{ + errno = ENOSYS; + return -1; +} + +int tdb_mutex_init(struct tdb_context *tdb) +{ + errno = ENOSYS; + return -1; +} + +_PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void) +{ + return false; +} + +#endif diff --git a/common/open.c b/common/open.c index 05d7cae..16a76a3 100644 --- a/common/open.c +++ b/common/open.c @@ -76,6 +76,32 @@ static int tdb_new_database(struct tdb_context *tdb, struct tdb_header *header, if (tdb->flags & TDB_INCOMPATIBLE_HASH) newdb->rwlocks = TDB_HASH_RWLOCK_MAGIC; + /* + * We create a tdb with TDB_FEATURE_FLAG_MUTEX support, + * the flag combination and runtime feature checks + * are done by the caller already. + */ + if (tdb->flags & TDB_MUTEX_LOCKING) { + newdb->feature_flags |= TDB_FEATURE_FLAG_MUTEX; + } + + /* + * If we have any features we add the FEATURE_FLAG_MAGIC, overwriting the + * TDB_HASH_RWLOCK_MAGIC above. + */ + if (newdb->feature_flags != 0) { + newdb->rwlocks = TDB_FEATURE_FLAG_MAGIC; + } + + /* + * It's required for some following code pathes + * to have the fields on 'tdb' up-to-date. + * + * E.g. tdb_mutex_size() requires it + */ + tdb->feature_flags = newdb->feature_flags; + tdb->hash_size = newdb->hash_size; + if (tdb->flags & TDB_INTERNAL) { tdb->map_size = size; tdb->map_ptr = (char *)newdb; @@ -90,6 +116,11 @@ static int tdb_new_database(struct tdb_context *tdb, struct tdb_header *header, if (ftruncate(tdb->fd, 0) == -1) goto fail; + if (newdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { + newdb->mutex_size = tdb_mutex_size(tdb); + tdb->hdr_ofs = newdb->mutex_size; + } + /* This creates an endian-converted header, as if read from disk */ CONVERT(*newdb); memcpy(header, newdb, sizeof(*header)); @@ -99,6 +130,37 @@ static int tdb_new_database(struct tdb_context *tdb, struct tdb_header *header, if (!tdb_write_all(tdb->fd, newdb, size)) goto fail; + if (newdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { + + /* + * Now we init the mutex area + * followed by a second header. + */ + + ret = ftruncate( + tdb->fd, + newdb->mutex_size + sizeof(struct tdb_header)); + if (ret == -1) { + goto fail; + } + ret = tdb_mutex_init(tdb); + if (ret == -1) { + goto fail; + } + + /* + * Write a second header behind the mutexes. That's the area + * that will be mmapp'ed. + */ + ret = lseek(tdb->fd, newdb->mutex_size, SEEK_SET); + if (ret == -1) { + goto fail; + } + if (!tdb_write_all(tdb->fd, newdb, size)) { + goto fail; + } + } + ret = 0; fail: SAFE_FREE(newdb); @@ -165,11 +227,76 @@ static bool check_header_hash(struct tdb_context *tdb, return check_header_hash(tdb, header, false, m1, m2); } +static bool tdb_mutex_open_ok(struct tdb_context *tdb, + const struct tdb_header *header) +{ + int locked; + + locked = tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, + TDB_LOCK_NOWAIT|TDB_LOCK_PROBE); + + if ((locked == -1) && (tdb->ecode == TDB_ERR_LOCK)) { + /* + * CLEAR_IF_FIRST still active. The tdb was created on this + * host, so we can assume the mutex implementation is + * compatible. Important for tools like tdbdump on a still + * open locking.tdb. + */ + goto check_local_settings; + } + + /* + * We got the CLEAR_IF_FIRST lock. That means the database was + * potentially copied from somewhere else. The mutex implementation + * might be incompatible. + */ + + if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) { + /* + * Should not happen + */ + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok: " + "failed to release ACTIVE_LOCK on %s: %s\n", + tdb->name, strerror(errno))); + return false; + } + + if (tdb->flags & TDB_NOLOCK) { + /* + * We don't look at locks, so it does not matter to have a + * compatible mutex implementation. Allow the open. + */ + return true; + } + +check_local_settings: + + if (!(tdb->flags & TDB_MUTEX_LOCKING)) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok[%s]: " + "Can use mutexes only with " + "MUTEX_LOCKING or NOLOCK\n", + tdb->name)); + return false; + } + + if (tdb_mutex_size(tdb) != header->mutex_size) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok[%s]: " + "Mutex size changed from %u to %u\n.", + tdb->name, + (unsigned int)header->mutex_size, + (unsigned int)tdb_mutex_size(tdb))); + return false; + } + + return true; +} + _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, int open_flags, mode_t mode, const struct tdb_logging_context *log_ctx, tdb_hash_func hash_fn) { + int orig_errno = errno; struct tdb_header header; struct tdb_context *tdb; struct stat st; @@ -179,6 +306,7 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td unsigned v; const char *hash_alg; uint32_t magic1, magic2; + int ret; ZERO_STRUCT(header); @@ -188,6 +316,14 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td goto fail; } tdb_io_init(tdb); + + if (tdb_flags & TDB_INTERNAL) { + tdb_flags |= TDB_INCOMPATIBLE_HASH; + } + if (tdb_flags & TDB_MUTEX_LOCKING) { + tdb_flags |= TDB_INCOMPATIBLE_HASH; + } + tdb->fd = -1; #ifdef TDB_TRACE tdb->tracefd = -1; @@ -275,6 +411,64 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td goto fail; } + if (tdb->flags & TDB_MUTEX_LOCKING) { + /* + * Here we catch bugs in the callers, + * the runtime check for existing tdb's comes later. + */ + + if (!(tdb->flags & TDB_CLEAR_IF_FIRST)) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " + "invalid flags for %s - TDB_MUTEX_LOCKING " + "requires TDB_CLEAR_IF_FIRST\n", name)); + errno = EINVAL; + goto fail; + } + + if (tdb->flags & TDB_INTERNAL) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " + "invalid flags for %s - TDB_MUTEX_LOCKING and " + "TDB_INTERNAL are not allowed together\n", name)); + errno = EINVAL; + goto fail; + } + + if (tdb->flags & TDB_NOMMAP) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " + "invalid flags for %s - TDB_MUTEX_LOCKING and " + "TDB_NOMMAP are not allowed together\n", name)); + errno = EINVAL; + goto fail; + } + + if (tdb->read_only) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " + "invalid flags for %s - TDB_MUTEX_LOCKING " + "not allowed read only\n", name)); + errno = EINVAL; + goto fail; + } + + /* + * The callers should have called + * tdb_runtime_check_for_robust_mutexes() + * before using TDB_MUTEX_LOCKING! + * + * This makes sure the caller understands + * that the locking may behave a bit differently + * than with pure fcntl locking. E.g. multiple + * read locks are not supported. + */ + if (!tdb_runtime_check_for_robust_mutexes()) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " + "invalid flags for %s - TDB_MUTEX_LOCKING " + "requires support for robust_mutexes\n", + name)); + errno = ENOSYS; + goto fail; + } + } + if (getenv("TDB_NO_FSYNC")) { tdb->flags |= TDB_NOSYNC; } @@ -320,7 +514,6 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td if ((tdb_flags & TDB_CLEAR_IF_FIRST) && (!tdb->read_only) && (locked = (tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE) == 0))) { - int ret; ret = tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0, TDB_LOCK_WAIT); if (ret == -1) { @@ -380,16 +573,56 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td tdb->flags |= TDB_CONVERT; tdb_convert(&header, sizeof(header)); } - if (fstat(tdb->fd, &st) == -1) + + /* + * We only use st.st_dev and st.st_ino from the raw fstat() + * call, everything else needs to use tdb_fstat() in order + * to skip tdb->hdr_ofs! + */ + if (fstat(tdb->fd, &st) == -1) { goto fail; + } + tdb->device = st.st_dev; + tdb->inode = st.st_ino; + ZERO_STRUCT(st); if (header.rwlocks != 0 && + header.rwlocks != TDB_FEATURE_FLAG_MAGIC && header.rwlocks != TDB_HASH_RWLOCK_MAGIC) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: spinlocks no longer supported\n")); + errno = ENOSYS; goto fail; } tdb->hash_size = header.hash_size; + if (header.rwlocks == TDB_FEATURE_FLAG_MAGIC) { + tdb->feature_flags = header.feature_flags; + } + + if (tdb->feature_flags & ~TDB_SUPPORTED_FEATURE_FLAGS) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: unsupported " + "features in tdb %s: 0x%08x (supported: 0x%08x)\n", + name, (unsigned)tdb->feature_flags, + (unsigned)TDB_SUPPORTED_FEATURE_FLAGS)); + errno = ENOSYS; + goto fail; + } + + if (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { + if (!tdb_mutex_open_ok(tdb, &header)) { + errno = EINVAL; + goto fail; + } + + /* + * We need to remember the hdr_ofs + * also for the TDB_NOLOCK case + * if the current library doesn't support + * mutex locking. + */ + tdb->hdr_ofs = header.mutex_size; + } + if ((header.magic1_hash == 0) && (header.magic2_hash == 0)) { /* older TDB without magic hash references */ tdb->hash_fn = tdb_old_hash; @@ -411,28 +644,36 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td } /* Is it already in the open list? If so, fail. */ - if (tdb_already_open(st.st_dev, st.st_ino)) { + if (tdb_already_open(tdb->device, tdb->inode)) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " "%s (%d,%d) is already open in this process\n", - name, (int)st.st_dev, (int)st.st_ino)); + name, (int)tdb->device, (int)tdb->inode)); errno = EBUSY; goto fail; } - /* Beware truncation! */ - tdb->map_size = st.st_size; - if (tdb->map_size != st.st_size) { - /* Ensure ecode is set for log fn. */ - tdb->ecode = TDB_ERR_IO; - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " - "len %llu too large!\n", (long long)st.st_size)); + /* + * We had tdb_mmap(tdb) here before, + * but we need to use tdb_fstat(), + * which is triggered from tdb_oob() before calling tdb_mmap(). + * As this skips tdb->hdr_ofs. + */ + tdb->map_size = 0; + ret = tdb->methods->tdb_oob(tdb, 0, 1, 0); + if (ret == -1) { errno = EIO; goto fail; } - tdb->device = st.st_dev; - tdb->inode = st.st_ino; - tdb_mmap(tdb); + if (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { + if (!(tdb->flags & TDB_NOLOCK)) { + ret = tdb_mutex_mmap(tdb); + if (ret != 0) { + goto fail; + } + } + } + if (locked) { if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " @@ -484,6 +725,7 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td } tdb->next = tdbs; tdbs = tdb; + errno = orig_errno; return tdb; fail: @@ -542,6 +784,9 @@ _PUBLIC_ int tdb_close(struct tdb_context *tdb) else tdb_munmap(tdb); } + + tdb_mutex_munmap(tdb); + SAFE_FREE(tdb->name); if (tdb->fd != -1) { ret = close(tdb->fd); @@ -613,6 +858,11 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: open failed (%s)\n", strerror(errno))); goto fail; } + /* + * We only use st.st_dev and st.st_ino from the raw fstat() + * call, everything else needs to use tdb_fstat() in order + * to skip tdb->hdr_ofs! + */ if (fstat(tdb->fd, &st) != 0) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: fstat failed (%s)\n", strerror(errno))); goto fail; @@ -621,7 +871,16 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: file dev/inode has changed!\n")); goto fail; } - if (tdb_mmap(tdb) != 0) { + ZERO_STRUCT(st); + + /* + * We had tdb_mmap(tdb) here before, + * but we need to use tdb_fstat(), + * which is triggered from tdb_oob() before calling tdb_mmap(). + * As this skips tdb->hdr_ofs. + */ + tdb->map_size = 0; + if (tdb->methods->tdb_oob(tdb, 0, 1, 0) != 0) { goto fail; } #endif /* fake pread or pwrite */ @@ -629,6 +888,7 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock) /* We may still think we hold the active lock. */ tdb->num_lockrecs = 0; SAFE_FREE(tdb->lockrecs); + tdb->lockrecs_array_length = 0; if (active_lock && tdb_nest_lock(tdb, ACTIVE_LOCK, F_RDLCK, TDB_LOCK_WAIT) == -1) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: failed to obtain active lock\n")); diff --git a/common/summary.c b/common/summary.c index 3c6f755..d786132 100644 --- a/common/summary.c +++ b/common/summary.c @@ -18,9 +18,12 @@ #include "tdb_private.h" #define SUMMARY_FORMAT \ - "Size of file/data: %u/%zu\n" \ + "Size of file/data: %llu/%zu\n" \ + "Header offset/logical size: %zu/%zu\n" \ "Number of records: %zu\n" \ "Incompatible hash: %s\n" \ + "Active/supported feature flags: 0x%08x/0x%08x\n" \ + "Robust mutexes locking: %s\n" \ "Smallest/average/largest keys: %zu/%zu/%zu\n" \ "Smallest/average/largest data: %zu/%zu/%zu\n" \ "Smallest/average/largest padding: %zu/%zu/%zu\n" \ @@ -87,12 +90,14 @@ static size_t get_hash_length(struct tdb_context *tdb, unsigned int i) _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) { + off_t file_size; tdb_off_t off, rec_off; - struct tally freet, keys, data, dead, extra, hash, uncoal; + struct tally freet, keys, data, dead, extra, hashval, uncoal; struct tdb_record rec; char *ret = NULL; bool locked; - size_t len, unc = 0; + size_t unc = 0; + int len; struct tdb_record recovery; /* Read-only databases use no locking at all: it's best-effort. @@ -114,7 +119,7 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) tally_init(&data); tally_init(&dead); tally_init(&extra); - tally_init(&hash); + tally_init(&hashval); tally_init(&uncoal); for (off = TDB_DATA_START(tdb->hash_size); @@ -161,18 +166,17 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) tally_add(&uncoal, unc - 1); for (off = 0; off < tdb->hash_size; off++) - tally_add(&hash, get_hash_length(tdb, off)); + tally_add(&hashval, get_hash_length(tdb, off)); - /* 20 is max length of a %zu. */ - len = strlen(SUMMARY_FORMAT) + 35*20 + 1; - ret = (char *)malloc(len); - if (!ret) - goto unlock; + file_size = tdb->hdr_ofs + tdb->map_size; - snprintf(ret, len, SUMMARY_FORMAT, - tdb->map_size, keys.total+data.total, + len = asprintf(&ret, SUMMARY_FORMAT, + (unsigned long long)file_size, keys.total+data.total, + (size_t)tdb->hdr_ofs, (size_t)tdb->map_size, keys.num, (tdb->hash_fn == tdb_jenkins_hash)?"yes":"no", + (unsigned)tdb->feature_flags, TDB_SUPPORTED_FEATURE_FLAGS, + (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX)?"yes":"no", keys.min, tally_mean(&keys), keys.max, data.min, tally_mean(&data), data.max, extra.min, tally_mean(&extra), extra.max, @@ -180,20 +184,23 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) dead.min, tally_mean(&dead), dead.max, freet.num, freet.min, tally_mean(&freet), freet.max, - hash.num, - hash.min, tally_mean(&hash), hash.max, + hashval.num, + hashval.min, tally_mean(&hashval), hashval.max, uncoal.total, uncoal.min, tally_mean(&uncoal), uncoal.max, - keys.total * 100.0 / tdb->map_size, - data.total * 100.0 / tdb->map_size, - extra.total * 100.0 / tdb->map_size, - freet.total * 100.0 / tdb->map_size, - dead.total * 100.0 / tdb->map_size, + keys.total * 100.0 / file_size, + data.total * 100.0 / file_size, + extra.total * 100.0 / file_size, + freet.total * 100.0 / file_size, + dead.total * 100.0 / file_size, (keys.num + freet.num + dead.num) * (sizeof(struct tdb_record) + sizeof(uint32_t)) - * 100.0 / tdb->map_size, + * 100.0 / file_size, tdb->hash_size * sizeof(tdb_off_t) - * 100.0 / tdb->map_size); + * 100.0 / file_size); + if (len == -1) { + goto unlock; + } unlock: if (locked) { diff --git a/common/tdb.c b/common/tdb.c index 6256a05..ae98c96 100644 --- a/common/tdb.c +++ b/common/tdb.c @@ -345,13 +345,16 @@ static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash) /* * Purge all DEAD records from a hash chain */ -static int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash) +int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash) { int res = -1; struct tdb_record rec; tdb_off_t rec_ptr; - if (tdb_lock(tdb, -1, F_WRLCK) == -1) { + if (tdb_lock_nonblock(tdb, -1, F_WRLCK) == -1) { + /* + * Don't block the freelist if not strictly necessary + */ return -1; } @@ -387,16 +390,20 @@ static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash) struct tdb_record rec; int ret; + rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, &rec); + if (rec_ptr == 0) { + return -1; + } + if (tdb->max_dead_records != 0) { + uint32_t magic = TDB_DEAD_MAGIC; + /* * Allow for some dead records per hash chain, mainly for * tdb's with a very high create/delete rate like locking.tdb. */ - if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1) - return -1; - if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) { /* * Don't let the per-chain freelist grow too large, @@ -405,22 +412,14 @@ static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash) tdb_purge_dead(tdb, hash); } - if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) { - tdb_unlock(tdb, BUCKET(hash), F_WRLCK); - return -1; - } - /* * Just mark the record as dead. */ - rec.magic = TDB_DEAD_MAGIC; - ret = tdb_rec_write(tdb, rec_ptr, &rec); + ret = tdb_ofs_write( + tdb, rec_ptr + offsetof(struct tdb_record, magic), + &magic); } else { - if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, - &rec))) - return -1; - ret = tdb_do_delete(tdb, rec_ptr, &rec); } @@ -428,7 +427,7 @@ static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash) tdb_increment_seqnum(tdb); } - if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0) + if (tdb_unlock(tdb, BUCKET(hash), F_WRLCK) != 0) TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n")); return ret; } @@ -446,13 +445,21 @@ _PUBLIC_ int tdb_delete(struct tdb_context *tdb, TDB_DATA key) /* * See if we have a dead record around with enough space */ -static tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash, - struct tdb_record *r, tdb_len_t length) +tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash, + struct tdb_record *r, tdb_len_t length, + tdb_off_t *p_last_ptr) { - tdb_off_t rec_ptr; + tdb_off_t rec_ptr, last_ptr; + tdb_off_t best_rec_ptr = 0; + tdb_off_t best_last_ptr = 0; + struct tdb_record best = { .rec_len = UINT32_MAX }; + + length += sizeof(tdb_off_t); /* tailer */ + + last_ptr = TDB_HASH_TOP(hash); /* read in the hash top */ - if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) + if (tdb_ofs_read(tdb, last_ptr, &rec_ptr) == -1) return 0; /* keep looking until we find the right record */ @@ -460,16 +467,23 @@ static tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash, if (tdb_rec_read(tdb, rec_ptr, r) == -1) return 0; - if (TDB_DEAD(r) && r->rec_len >= length) { - /* - * First fit for simple coding, TODO: change to best - * fit - */ - return rec_ptr; + if (TDB_DEAD(r) && (r->rec_len >= length) && + (r->rec_len < best.rec_len)) { + best_rec_ptr = rec_ptr; + best_last_ptr = last_ptr; + best = *r; } + last_ptr = rec_ptr; rec_ptr = r->next; } - return 0; + + if (best.rec_len == UINT32_MAX) { + return 0; + } + + *r = best; + *p_last_ptr = best_last_ptr; + return best_rec_ptr; } static int _tdb_store(struct tdb_context *tdb, TDB_DATA key, @@ -506,55 +520,8 @@ static int _tdb_store(struct tdb_context *tdb, TDB_DATA key, if (flag != TDB_INSERT) tdb_delete_hash(tdb, key, hash); - if (tdb->max_dead_records != 0) { - /* - * Allow for some dead records per hash chain, look if we can - * find one that can hold the new record. We need enough space - * for key, data and tailer. If we find one, we don't have to - * consult the central freelist. - */ - rec_ptr = tdb_find_dead( - tdb, hash, &rec, - key.dsize + dbuf.dsize + sizeof(tdb_off_t)); - - if (rec_ptr != 0) { - rec.key_len = key.dsize; - rec.data_len = dbuf.dsize; - rec.full_hash = hash; - rec.magic = TDB_MAGIC; - if (tdb_rec_write(tdb, rec_ptr, &rec) == -1 - || tdb->methods->tdb_write( - tdb, rec_ptr + sizeof(rec), - key.dptr, key.dsize) == -1 - || tdb->methods->tdb_write( - tdb, rec_ptr + sizeof(rec) + key.dsize, - dbuf.dptr, dbuf.dsize) == -1) { - goto fail; - } - goto done; - } - } - - /* - * We have to allocate some space from the freelist, so this means we - * have to lock it. Use the chance to purge all the DEAD records from - * the hash chain under the freelist lock. - */ - - if (tdb_lock(tdb, -1, F_WRLCK) == -1) { - goto fail; - } - - if ((tdb->max_dead_records != 0) - && (tdb_purge_dead(tdb, hash) == -1)) { - tdb_unlock(tdb, -1, F_WRLCK); - goto fail; - } - /* we have to allocate some space */ - rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec); - - tdb_unlock(tdb, -1, F_WRLCK); + rec_ptr = tdb_allocate(tdb, hash, key.dsize + dbuf.dsize, &rec); if (rec_ptr == 0) { goto fail; @@ -756,6 +723,15 @@ _PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flags) return; } + if ((flags & TDB_NOLOCK) && + (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) && + (tdb->mutexes == NULL)) { + tdb->ecode = TDB_ERR_LOCK; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: " + "Can not remove NOLOCK flag on mutexed databases")); + return; + } + if (flags & TDB_ALLOW_NESTING) { tdb->flags |= TDB_DISALLOW_NESTING; } diff --git a/common/tdb_private.h b/common/tdb_private.h index ce92188..de8d9e6 100644 --- a/common/tdb_private.h +++ b/common/tdb_private.h @@ -53,6 +53,7 @@ typedef uint32_t tdb_off_t; #define TDB_RECOVERY_MAGIC (0xf53bc0e7U) #define TDB_RECOVERY_INVALID_MAGIC (0x0) #define TDB_HASH_RWLOCK_MAGIC (0xbad1a51U) +#define TDB_FEATURE_FLAG_MAGIC (0xbad1a52U) #define TDB_ALIGNMENT 4 #define DEFAULT_HASH_SIZE 131 #define FREELIST_TOP (sizeof(struct tdb_header)) @@ -68,6 +69,12 @@ typedef uint32_t tdb_off_t; #define TDB_PAD_BYTE 0x42 #define TDB_PAD_U32 0x42424242 +#define TDB_FEATURE_FLAG_MUTEX 0x00000001 + +#define TDB_SUPPORTED_FEATURE_FLAGS ( \ + TDB_FEATURE_FLAG_MUTEX | \ + 0) + /* NB assumes there is a local variable called "tdb" that is the * current context, also takes doubly-parenthesized print-style * argument. */ @@ -152,7 +159,9 @@ struct tdb_header { tdb_off_t sequence_number; /* used when TDB_SEQNUM is set */ uint32_t magic1_hash; /* hash of TDB_MAGIC_FOOD. */ uint32_t magic2_hash; /* hash of TDB_MAGIC. */ - tdb_off_t reserved[27]; + uint32_t feature_flags; + tdb_len_t mutex_size; /* set if TDB_FEATURE_FLAG_MUTEX is set */ + tdb_off_t reserved[25]; }; struct tdb_lock_type { @@ -186,6 +195,8 @@ struct tdb_methods { int (*tdb_expand_file)(struct tdb_context *, tdb_off_t , tdb_off_t ); }; +struct tdb_mutexes; + struct tdb_context { char *name; /* the name of the database */ void *map_ptr; /* where it is currently mapped */ @@ -197,8 +208,14 @@ struct tdb_context { struct tdb_lock_type allrecord_lock; /* .offset == upgradable */ int num_lockrecs; struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */ + int lockrecs_array_length; + + tdb_off_t hdr_ofs; /* this is 0 or header.mutex_size */ + struct tdb_mutexes *mutexes; /* mmap of the mutex area */ + enum TDB_ERROR ecode; /* error code for last tdb error */ uint32_t hash_size; + uint32_t feature_flags; uint32_t flags; /* the flags passed to tdb_open */ struct tdb_traverse_lock travlocks; /* current traversal locks */ struct tdb_context *next; /* all tdbs to avoid multiple opens */ @@ -254,7 +271,8 @@ int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d); int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d); void *tdb_convert(void *buf, uint32_t size); int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec); -tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct tdb_record *rec); +tdb_off_t tdb_allocate(struct tdb_context *tdb, int hash, tdb_len_t length, + struct tdb_record *rec); int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d); int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d); int tdb_lock_record(struct tdb_context *tdb, tdb_off_t off); @@ -271,6 +289,10 @@ int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key, void *private_data); tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype, struct tdb_record *rec); +tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash, + struct tdb_record *r, tdb_len_t length, + tdb_off_t *p_last_ptr); +int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash); void tdb_io_init(struct tdb_context *tdb); int tdb_expand(struct tdb_context *tdb, tdb_off_t size); tdb_off_t tdb_expand_adjust(tdb_off_t map_size, tdb_off_t size, int page_size); @@ -286,4 +308,20 @@ bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret); /* tdb_off_t and tdb_len_t right now are both uint32_t */ #define tdb_add_len_t tdb_add_off_t + +size_t tdb_mutex_size(struct tdb_context *tdb); +bool tdb_have_mutexes(struct tdb_context *tdb); +int tdb_mutex_init(struct tdb_context *tdb); +int tdb_mutex_mmap(struct tdb_context *tdb); +int tdb_mutex_munmap(struct tdb_context *tdb); +bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len, + bool waitflag, int *pret); +bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len, + int *pret); +int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, + enum tdb_lock_flags flags); +int tdb_mutex_allrecord_unlock(struct tdb_context *tdb); +int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb); +void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb); + #endif /* TDB_PRIVATE_H */ diff --git a/common/transaction.c b/common/transaction.c index a2c3bbd..caef0be 100644 --- a/common/transaction.c +++ b/common/transaction.c @@ -421,7 +421,8 @@ static int _tdb_transaction_start(struct tdb_context *tdb, enum tdb_lock_flags lockflags) { /* some sanity checks */ - if (tdb->read_only || (tdb->flags & TDB_INTERNAL) || tdb->traverse_read) { + if (tdb->read_only || (tdb->flags & (TDB_INTERNAL|TDB_MUTEX_LOCKING)) + || tdb->traverse_read) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: cannot start a transaction on a read-only or internal db\n")); tdb->ecode = TDB_ERR_EINVAL; return -1; diff --git a/docs/README b/docs/README index fe0e258..be2224f 100644 --- a/docs/README +++ b/docs/README @@ -64,7 +64,7 @@ TDB_CONTEXT *tdb_open(char *name, int hash_size, int tdb_flags, possible tdb_flags are: TDB_CLEAR_IF_FIRST - clear database if we are the only one with it open - TDB_INTERNAL - don't use a file, instaed store the data in + TDB_INTERNAL - don't use a file, instead store the data in memory. The filename is ignored in this case. TDB_NOLOCK - don't do any locking TDB_NOMMAP - don't use mmap diff --git a/docs/mutex.txt b/docs/mutex.txt new file mode 100644 index 0000000..7625662 --- /dev/null +++ b/docs/mutex.txt @@ -0,0 +1,136 @@ +Tdb is a hashtable database with multiple concurrent writer and external +record lock support. For speed reasons, wherever possible tdb uses a shared +memory mapped area for data access. In its currently released form, it uses +fcntl byte-range locks to coordinate access to the data itself. + +The tdb data is organized as a hashtable. Hash collisions are dealt with by +forming a linked list of records that share a hash value. The individual +linked lists are protected across processes with 1-byte fcntl locks on the +starting pointer of the linked list representing a hash value. + +The external locking API of tdb allows to lock individual records. Instead of +really locking individual records, the tdb API locks a complete linked list +with a fcntl lock. + +The external locking API of tdb also allows to lock the complete database, and +ctdb uses this facility to freeze databases during a recovery. While the +so-called allrecord lock is held, all linked lists and all individual records +are frozen alltogether. Tdb achieves this by locking the complete file range +with a single fcntl lock. Individual 1-byte locks for the linked lists +conflict with this. Access to records is prevented by the one large fnctl byte +range lock. + +Fcntl locks have been chosen for tdb for two reasons: First they are portable +across all current unixes. Secondly they provide auto-cleanup. If a process +dies while holding a fcntl lock, the lock is given up as if it was explicitly +unlocked. Thus fcntl locks provide a very robust locking scheme, if a process +dies for any reason the database will not stay blocked until reboot. This +robustness is very important for long-running services, a reboot is not an +option for most users of tdb. + +Unfortunately, during stress testing, fcntl locks have turned out to be a major +problem for performance. The particular problem that was seen happens when +ctdb on a busy server does a recovery. A recovery means that ctdb has to +freeze all tdb databases for some time, usually a few seconds. This is done +with the allrecord lock. During the recovery phase on a busy server many smbd +processes try to access the tdb file with blocking fcntl calls. The specific +test in question easily reproduces 7,000 processes piling up waiting for +1-byte fcntl locks. When ctdb is done with the recovery, it gives up the +allrecord lock, covering the whole file range. All 7,000 processes waiting for +1-byte fcntl locks are woken up, trying to acquire their lock. The special +implementation of fcntl locks in Linux (up to 2013-02-12 at least) protects +all fcntl lock operations with a single system-wide spinlock. If 7,000 process +waiting for the allrecord lock to become released this leads to a thundering +herd condition, all CPUs are spinning on that single spinlock. + +Functionally the kernel is fine, eventually the thundering herd slows down and +every process correctly gets his share and locking range, but the performance +of the system while the herd is active is worse than expected. + +The thundering herd is only the worst case scenario for fcntl lock use. The +single spinlock for fcntl operations is also a performance penalty for normal +operations. In the cluster case, every read and write SMB request has to do +two fcntl calls to provide correct SMB mandatory locks. The single spinlock +is one source of serialization for the SMB read/write requests, limiting the +parallelism that can be achieved in a multi-core system. + +While trying to tune his servers, Ira Cooper, Samba Team member, found fcntl +locks to be a problem on Solaris as well. Ira pointed out that there is a +potential alternative locking mechanism that might be more scalable: Process +shared robust mutexes, as defined by Posix 2008 for example via + +http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_mutexattr_setpshared.html +http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_mutexattr_setrobust.html + +Pthread mutexes provide one of the core mechanisms in posix threads to protect +in-process data structures from concurrent access by multiple threads. In the +Linux implementation, a pthread_mutex_t is represented by a data structure in +user space that requires no kernel calls in the uncontended case for locking +and unlocking. Locking and unlocking in the uncontended case is implemented +purely in user space with atomic CPU instructions and thus are very fast. + +The setpshared functions indicate to the kernel that the mutex is about to be +shared between processes in a common shared memory area. + +The process shared posix mutexes have the potential to replace fcntl locking +to coordinate mmap access for tdbs. However, they are missing the criticial +auto-cleanup property that fcntl provides when a process dies. A process that +dies hard while holding a shared mutex has no chance to clean up the protected +data structures and unlock the shared mutex. Thus with a pure process shared +mutex the mutex will remain locked forever until the data structures are +re-initialized from scratch. + +With the robust mutexes defined by Posix the process shared mutexes have been +extended with a limited auto-cleanup property. If a mutex has been declared +robust, when a process exits while holding that mutex, the next process trying +to lock the mutex will get the special error message EOWNERDEAD. This informs +the caller that the data structures the mutex protects are potentially corrupt +and need to be cleaned up. + +The error message EOWNERDEAD when trying to lock a mutex is an extension over +the fcntl functionality. A process that does a blocking fcntl lock call is not +informed about whether the lock was explicitly freed by a process still alive +or due to an unplanned process exit. At the time of this writing (February +2013), at least Linux and OpenSolaris also implement the robustness feature of +process-shared mutexes. + +Converting the tdb locking mechanism from fcntl to mutexes has to take care of +both types of locks that are used on tdb files. + +The easy part is to use mutexes to replace the 1-byte linked list locks +covering the individual hashes. Those can be represented by a mutex each. + +Covering the allrecord lock is more difficult. The allrecord lock uses a fcntl +lock spanning all hash list locks simultaneously. This basic functionality is +not easily possible with mutexes. A mutex carries 1 bit of information, a +fcntl lock can carry an arbitrary amount of information. + +In order to support the allrecord lock, we have an allrecord_lock variable +protected by an allrecord_mutex. The coordination between the allrecord lock +and the chainlocks works like this: + +- Getting a chain lock works like this: + + 1. get chain mutex + 2. return success if allrecord_lock is F_UNLCK (not locked) + 3. return success if allrecord_lock is F_RDLCK (locked readonly) + and we only need a read lock. + 4. release chain mutex + 5. wait for allrecord_mutex + 6. unlock allrecord_mutex + 7. goto 1. + +- Getting the allrecord lock: + + 1. get the allrecord mutex + 2. return error if allrecord_lock is not F_UNLCK (it's locked) + 3. set allrecord_lock to the desired value. + 4. in a loop: lock(blocking) / unlock each chain mutex. + 5. return success. + +- allrecord lock upgrade: + + 1. check we already have the allrecord lock with F_RDLCK. + 3. set allrecord_lock to F_WRLCK + 4. in a loop: lock(blocking) / unlock each chain mutex. + 5. return success. diff --git a/include/tdb.h b/include/tdb.h index e371e33..5ea5e60 100644 --- a/include/tdb.h +++ b/include/tdb.h @@ -80,6 +80,9 @@ extern "C" { #define TDB_ALLOW_NESTING 512 /** Allow transactions to nest */ #define TDB_DISALLOW_NESTING 1024 /** Disallow transactions to nest */ #define TDB_INCOMPATIBLE_HASH 2048 /** Better hashing: can't be opened by tdb < 1.2.6. */ +#define TDB_MUTEX_LOCKING 4096 /** optimized locking using robust mutexes if supported, + only with tdb >= 1.3.0 and TDB_CLEAR_IF_FIRST + after checking tdb_runtime_check_for_robust_mutexes() */ /** The tdb error codes */ enum TDB_ERROR {TDB_SUCCESS=0, TDB_ERR_CORRUPT, TDB_ERR_IO, TDB_ERR_LOCK, @@ -132,7 +135,7 @@ struct tdb_logging_context { * @param[in] tdb_flags The flags to use to open the db:\n\n * TDB_CLEAR_IF_FIRST - Clear database if we are the * only one with it open\n - * TDB_INTERNAL - Don't use a file, instaed store the + * TDB_INTERNAL - Don't use a file, instead store the * data in memory. The filename is * ignored in this case.\n * TDB_NOLOCK - Don't do any locking\n @@ -143,6 +146,11 @@ struct tdb_logging_context { * default 5.\n * TDB_ALLOW_NESTING - Allow transactions to nest.\n * TDB_DISALLOW_NESTING - Disallow transactions to nest.\n + * TDB_INCOMPATIBLE_HASH - Better hashing: can't be opened by tdb < 1.2.6.\n + * TDB_MUTEX_LOCKING - Optimized locking using robust mutexes if supported, + * can't be opened by tdb < 1.3.0. + * Only valid in combination with TDB_CLEAR_IF_FIRST + * after checking tdb_runtime_check_for_robust_mutexes()\n * * @param[in] open_flags Flags for the open(2) function. * @@ -168,7 +176,7 @@ struct tdb_context *tdb_open(const char *name, int hash_size, int tdb_flags, * @param[in] tdb_flags The flags to use to open the db:\n\n * TDB_CLEAR_IF_FIRST - Clear database if we are the * only one with it open\n - * TDB_INTERNAL - Don't use a file, instaed store the + * TDB_INTERNAL - Don't use a file, instead store the * data in memory. The filename is * ignored in this case.\n * TDB_NOLOCK - Don't do any locking\n @@ -179,6 +187,11 @@ struct tdb_context *tdb_open(const char *name, int hash_size, int tdb_flags, * default 5.\n * TDB_ALLOW_NESTING - Allow transactions to nest.\n * TDB_DISALLOW_NESTING - Disallow transactions to nest.\n + * TDB_INCOMPATIBLE_HASH - Better hashing: can't be opened by tdb < 1.2.6.\n + * TDB_MUTEX_LOCKING - Optimized locking using robust mutexes if supported, + * can't be opened by tdb < 1.3.0. + * Only valid in combination with TDB_CLEAR_IF_FIRST + * after checking tdb_runtime_check_for_robust_mutexes()\n * * @param[in] open_flags Flags for the open(2) function. * @@ -402,7 +415,7 @@ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA key); /** * @brief Traverse the entire database. * - * While travering the function fn(tdb, key, data, state) is called on each + * While traversing the function fn(tdb, key, data, state) is called on each * element. If fn is NULL then it is not called. A non-zero return value from * fn() indicates that the traversal should stop. Traversal callbacks may not * start transactions. @@ -777,7 +790,7 @@ void tdb_enable_seqnum(struct tdb_context *tdb); * @brief Increment the tdb sequence number. * * This only works if the tdb has been opened using the TDB_SEQNUM flag or - * enabled useing tdb_enable_seqnum(). + * enabled using tdb_enable_seqnum(). * * @param[in] tdb The database to increment the sequence number. * @@ -842,6 +855,27 @@ int tdb_rescue(struct tdb_context *tdb, void (*walk) (TDB_DATA key, TDB_DATA data, void *private_data), void *private_data); +/** + * @brief Check if support for TDB_MUTEX_LOCKING is available at runtime. + * + * On some systems the API for pthread_mutexattr_setrobust() is not available. + * On other systems there are some bugs in the interaction between glibc and + * the linux kernel. + * + * This function provides a runtime check if robust mutexes are really + * available. + * + * This needs to be called and return true before TDB_MUTEX_LOCKING + * can be used at runtime. + * + * @note This calls fork(), but the SIGCHILD handling should be transparent. + * + * @return true if supported, false otherwise. + * + * @see TDB_MUTEX_LOCKING + */ +bool tdb_runtime_check_for_robust_mutexes(void); + /* @} ******************************************************************/ /* Low level locking functions: use with care */ diff --git a/lib/replace/cwrap.c b/lib/replace/cwrap.c new file mode 100644 index 0000000..adc5c1e --- /dev/null +++ b/lib/replace/cwrap.c @@ -0,0 +1,46 @@ +/* + * Unix SMB/CIFS implementation. + * + * Replaceable functions by cwrap + * + * Copyright (c) 2014 Andreas Schneider <asn@samba.org> + * + * ** NOTE! The following LGPL license applies to the replace + * ** library. This does NOT imply that all of Samba is released + * ** under the LGPL + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "replace.h" + +bool nss_wrapper_enabled(void) +{ + return false; +} + +bool nss_wrapper_hosts_enabled(void) +{ + return false; +} + +bool socket_wrapper_enabled(void) +{ + return false; +} + +bool uid_wrapper_enabled(void) +{ + return false; +} diff --git a/lib/replace/getaddrinfo.c b/lib/replace/getaddrinfo.c index c5cd52b..8440d8e 100644 --- a/lib/replace/getaddrinfo.c +++ b/lib/replace/getaddrinfo.c @@ -321,12 +321,8 @@ void rep_freeaddrinfo(struct addrinfo *res) for (;res; res = next) { next = res->ai_next; - if (res->ai_canonname) { - free(res->ai_canonname); - } - if (res->ai_addr) { - free(res->ai_addr); - } + free(res->ai_canonname); + free(res->ai_addr); free(res); } } diff --git a/lib/replace/getifaddrs.c b/lib/replace/getifaddrs.c index 8da022f..c2d20f8 100644 --- a/lib/replace/getifaddrs.c +++ b/lib/replace/getifaddrs.c @@ -23,8 +23,6 @@ License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#define SOCKET_WRAPPER_NOT_REPLACE - #include "replace.h" #include "system/network.h" @@ -113,11 +111,23 @@ int rep_getifaddrs(struct ifaddrs **ifap) for (i=n-1; i>=0; i--) { if (ioctl(fd, SIOCGIFFLAGS, &ifr[i]) == -1) { freeifaddrs(*ifap); + close(fd); return -1; } curif = calloc(1, sizeof(struct ifaddrs)); + if (curif == NULL) { + freeifaddrs(*ifap); + close(fd); + return -1; + } curif->ifa_name = strdup(ifr[i].ifr_name); + if (curif->ifa_name == NULL) { + free(curif); + freeifaddrs(*ifap); + close(fd); + return -1; + } curif->ifa_flags = ifr[i].ifr_flags; curif->ifa_dstaddr = NULL; curif->ifa_data = NULL; @@ -126,11 +136,28 @@ int rep_getifaddrs(struct ifaddrs **ifap) curif->ifa_addr = NULL; if (ioctl(fd, SIOCGIFADDR, &ifr[i]) != -1) { curif->ifa_addr = sockaddr_dup(&ifr[i].ifr_addr); + if (curif->ifa_addr == NULL) { + free(curif->ifa_name); + free(curif); + freeifaddrs(*ifap); + close(fd); + return -1; + } } curif->ifa_netmask = NULL; if (ioctl(fd, SIOCGIFNETMASK, &ifr[i]) != -1) { curif->ifa_netmask = sockaddr_dup(&ifr[i].ifr_addr); + if (curif->ifa_netmask == NULL) { + if (curif->ifa_addr != NULL) { + free(curif->ifa_addr); + } + free(curif->ifa_name); + free(curif); + freeifaddrs(*ifap); + close(fd); + return -1; + } } if (lastif == NULL) { diff --git a/lib/replace/repdir_getdents.c b/lib/replace/repdir_getdents.c deleted file mode 100644 index afc634a..0000000 --- a/lib/replace/repdir_getdents.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - Unix SMB/CIFS implementation. - - Copyright (C) Andrew Tridgell 2005 - - ** NOTE! The following LGPL license applies to the replace - ** library. This does NOT imply that all of Samba is released - ** under the LGPL - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see <http://www.gnu.org/licenses/>. -*/ -/* - a replacement for opendir/readdir/telldir/seekdir/closedir for BSD systems - - This is needed because the existing directory handling in FreeBSD - and OpenBSD (and possibly NetBSD) doesn't correctly handle unlink() - on files in a directory where telldir() has been used. On a block - boundary it will occasionally miss a file when seekdir() is used to - return to a position previously recorded with telldir(). - - This also fixes a severe performance and memory usage problem with - telldir() on BSD systems. Each call to telldir() in BSD adds an - entry to a linked list, and those entries are cleaned up on - closedir(). This means with a large directory closedir() can take an - arbitrary amount of time, causing network timeouts as millions of - telldir() entries are freed - - Note! This replacement code is not portable. It relies on getdents() - always leaving the file descriptor at a seek offset that is a - multiple of DIR_BUF_SIZE. If the code detects that this doesn't - happen then it will abort(). It also does not handle directories - with offsets larger than can be stored in a long, - - This code is available under other free software licenses as - well. Contact the author. -*/ - -#include <stdlib.h> -#include <sys/stat.h> -#include <unistd.h> -#include <sys/types.h> -#include <errno.h> -#include <fcntl.h> -#include <dirent.h> - -#define DIR_BUF_BITS 9 -#define DIR_BUF_SIZE (1<<DIR_BUF_BITS) - -struct dir_buf { - int fd; - int nbytes, ofs; - off_t seekpos; - char buf[DIR_BUF_SIZE]; -}; - -DIR *opendir(const char *dname) -{ - struct dir_buf *d; - struct stat sb; - d = malloc(sizeof(*d)); - if (d == NULL) { - errno = ENOMEM; - return NULL; - } - d->fd = open(dname, O_RDONLY); - if (d->fd == -1) { - free(d); - return NULL; - } - if (fstat(d->fd, &sb) < 0) { - close(d->fd); - free(d); - return NULL; - } - if (!S_ISDIR(sb.st_mode)) { - close(d->fd); - free(d); - errno = ENOTDIR; - return NULL; - } - d->ofs = 0; - d->seekpos = 0; - d->nbytes = 0; - return (DIR *)d; -} - -struct dirent *readdir(DIR *dir) -{ - struct dir_buf *d = (struct dir_buf *)dir; - struct dirent *de; - - if (d->ofs >= d->nbytes) { - d->seekpos = lseek(d->fd, 0, SEEK_CUR); - d->nbytes = getdents(d->fd, d->buf, DIR_BUF_SIZE); - d->ofs = 0; - } - if (d->ofs >= d->nbytes) { - return NULL; - } - de = (struct dirent *)&d->buf[d->ofs]; - d->ofs += de->d_reclen; - return de; -} - -long telldir(DIR *dir) -{ - struct dir_buf *d = (struct dir_buf *)dir; - if (d->ofs >= d->nbytes) { - d->seekpos = lseek(d->fd, 0, SEEK_CUR); - d->ofs = 0; - d->nbytes = 0; - } - /* this relies on seekpos always being a multiple of - DIR_BUF_SIZE. Is that always true on BSD systems? */ - if (d->seekpos & (DIR_BUF_SIZE-1)) { - abort(); - } - return d->seekpos + d->ofs; -} - -void seekdir(DIR *dir, long ofs) -{ - struct dir_buf *d = (struct dir_buf *)dir; - d->seekpos = lseek(d->fd, ofs & ~(DIR_BUF_SIZE-1), SEEK_SET); - d->nbytes = getdents(d->fd, d->buf, DIR_BUF_SIZE); - d->ofs = 0; - while (d->ofs < (ofs & (DIR_BUF_SIZE-1))) { - if (readdir(dir) == NULL) break; - } -} - -void rewinddir(DIR *dir) -{ - seekdir(dir, 0); -} - -int closedir(DIR *dir) -{ - struct dir_buf *d = (struct dir_buf *)dir; - int r = close(d->fd); - if (r != 0) { - return r; - } - free(d); - return 0; -} - -#ifndef dirfd -/* darn, this is a macro on some systems. */ -int dirfd(DIR *dir) -{ - struct dir_buf *d = (struct dir_buf *)dir; - return d->fd; -} -#endif diff --git a/lib/replace/repdir_getdirentries.c b/lib/replace/repdir_getdirentries.c deleted file mode 100644 index 197e593..0000000 --- a/lib/replace/repdir_getdirentries.c +++ /dev/null @@ -1,183 +0,0 @@ -/* - Unix SMB/CIFS implementation. - - Copyright (C) Andrew Tridgell 2005 - - ** NOTE! The following LGPL license applies to the replace - ** library. This does NOT imply that all of Samba is released - ** under the LGPL - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see <http://www.gnu.org/licenses/>. -*/ -/* - a replacement for opendir/readdir/telldir/seekdir/closedir for BSD - systems using getdirentries - - This is needed because the existing directory handling in FreeBSD - and OpenBSD (and possibly NetBSD) doesn't correctly handle unlink() - on files in a directory where telldir() has been used. On a block - boundary it will occasionally miss a file when seekdir() is used to - return to a position previously recorded with telldir(). - - This also fixes a severe performance and memory usage problem with - telldir() on BSD systems. Each call to telldir() in BSD adds an - entry to a linked list, and those entries are cleaned up on - closedir(). This means with a large directory closedir() can take an - arbitrary amount of time, causing network timeouts as millions of - telldir() entries are freed - - Note! This replacement code is not portable. It relies on - getdirentries() always leaving the file descriptor at a seek offset - that is a multiple of DIR_BUF_SIZE. If the code detects that this - doesn't happen then it will abort(). It also does not handle - directories with offsets larger than can be stored in a long, - - This code is available under other free software licenses as - well. Contact the author. -*/ - -#include "replace.h" -#include <stdlib.h> -#include <sys/stat.h> -#include <unistd.h> -#include <sys/types.h> -#include <errno.h> -#include <fcntl.h> -#include <dirent.h> - -#define DIR_BUF_BITS 9 -#define DIR_BUF_SIZE (1<<DIR_BUF_BITS) - -struct dir_buf { - int fd; - int nbytes, ofs; - off_t seekpos; - char buf[DIR_BUF_SIZE]; -}; - -DIR *opendir(const char *dname) -{ - struct dir_buf *d; - struct stat sb; - d = malloc(sizeof(*d)); - if (d == NULL) { - errno = ENOMEM; - return NULL; - } - d->fd = open(dname, O_RDONLY); - if (d->fd == -1) { - free(d); - return NULL; - } - if (fstat(d->fd, &sb) < 0) { - close(d->fd); - free(d); - return NULL; - } - if (!S_ISDIR(sb.st_mode)) { - close(d->fd); - free(d); - errno = ENOTDIR; - return NULL; - } - d->ofs = 0; - d->seekpos = 0; - d->nbytes = 0; - return (DIR *)d; -} - -struct dirent *readdir(DIR *dir) -{ - struct dir_buf *d = (struct dir_buf *)dir; - struct dirent *de; - - if (d->ofs >= d->nbytes) { - long pos; - d->nbytes = getdirentries(d->fd, d->buf, DIR_BUF_SIZE, &pos); - d->seekpos = pos; - d->ofs = 0; - } - if (d->ofs >= d->nbytes) { - return NULL; - } - de = (struct dirent *)&d->buf[d->ofs]; - d->ofs += de->d_reclen; - return de; -} - -#ifdef TELLDIR_TAKES_CONST_DIR -long telldir(const DIR *dir) -#else -long telldir(DIR *dir) -#endif -{ - struct dir_buf *d = (struct dir_buf *)dir; - if (d->ofs >= d->nbytes) { - d->seekpos = lseek(d->fd, 0, SEEK_CUR); - d->ofs = 0; - d->nbytes = 0; - } - /* this relies on seekpos always being a multiple of - DIR_BUF_SIZE. Is that always true on BSD systems? */ - if (d->seekpos & (DIR_BUF_SIZE-1)) { - abort(); - } - return d->seekpos + d->ofs; -} - -#ifdef SEEKDIR_RETURNS_INT -int seekdir(DIR *dir, long ofs) -#else -void seekdir(DIR *dir, long ofs) -#endif -{ - struct dir_buf *d = (struct dir_buf *)dir; - long pos; - d->seekpos = lseek(d->fd, ofs & ~(DIR_BUF_SIZE-1), SEEK_SET); - d->nbytes = getdirentries(d->fd, d->buf, DIR_BUF_SIZE, &pos); - d->ofs = 0; - while (d->ofs < (ofs & (DIR_BUF_SIZE-1))) { - if (readdir(dir) == NULL) break; - } -#ifdef SEEKDIR_RETURNS_INT - return -1; -#endif -} - -void rewinddir(DIR *dir) -{ - seekdir(dir, 0); -} - -int closedir(DIR *dir) -{ - struct dir_buf *d = (struct dir_buf *)dir; - int r = close(d->fd); - if (r != 0) { - return r; - } - free(d); - return 0; -} - -#ifndef dirfd -/* darn, this is a macro on some systems. */ -int dirfd(DIR *dir) -{ - struct dir_buf *d = (struct dir_buf *)dir; - return d->fd; -} -#endif - - diff --git a/lib/replace/replace.c b/lib/replace/replace.c index 37edb31..2a9ca3e 100644 --- a/lib/replace/replace.c +++ b/lib/replace/replace.c @@ -84,7 +84,7 @@ size_t rep_strlcpy(char *d, const char *s, size_t bufsize) be one more than the maximum resulting string length */ size_t rep_strlcat(char *d, const char *s, size_t bufsize) { - size_t len1 = strlen(d); + size_t len1 = strnlen(d, bufsize); size_t len2 = strlen(s); size_t ret = len1 + len2; diff --git a/lib/replace/replace.h b/lib/replace/replace.h index c0b7997..f940898 100644 --- a/lib/replace/replace.h +++ b/lib/replace/replace.h @@ -899,4 +899,9 @@ int usleep(useconds_t); void rep_setproctitle(const char *fmt, ...) PRINTF_ATTRIBUTE(1, 2); #endif +bool nss_wrapper_enabled(void); +bool nss_wrapper_hosts_enabled(void); +bool socket_wrapper_enabled(void); +bool uid_wrapper_enabled(void); + #endif /* _LIBREPLACE_REPLACE_H */ diff --git a/lib/replace/system/filesys.h b/lib/replace/system/filesys.h index c8ac2b4..b234f10 100644 --- a/lib/replace/system/filesys.h +++ b/lib/replace/system/filesys.h @@ -206,17 +206,6 @@ #define mkdir(d,m) _mkdir(d) #endif -#ifdef UID_WRAPPER -# ifndef UID_WRAPPER_DISABLE -# ifndef UID_WRAPPER_NOT_REPLACE -# define UID_WRAPPER_REPLACE -# endif /* UID_WRAPPER_NOT_REPLACE */ -# include "../uid_wrapper/uid_wrapper.h" -# endif /* UID_WRAPPER_DISABLE */ -#else /* UID_WRAPPER */ -# define uwrap_enabled() 0 -#endif /* UID_WRAPPER */ - /* this allows us to use a uniform error handling for our xattr wrappers diff --git a/lib/replace/system/network.h b/lib/replace/system/network.h index 7cb8d7b..273cb71 100644 --- a/lib/replace/system/network.h +++ b/lib/replace/system/network.h @@ -367,24 +367,4 @@ struct addrinfo { #endif /* HAVE_LINUX_IPV6_V6ONLY_26 */ #endif /* HAVE_IPV6 */ -#ifdef SOCKET_WRAPPER -#ifndef SOCKET_WRAPPER_DISABLE -#ifndef SOCKET_WRAPPER_NOT_REPLACE -#define SOCKET_WRAPPER_REPLACE -#endif /* SOCKET_WRAPPER_NOT_REPLACE */ -#include "../socket_wrapper/socket_wrapper.h" -#endif /* SOCKET_WRAPPER_DISABLE */ -#endif /* SOCKET_WRAPPER */ - -#ifdef UID_WRAPPER -# ifndef UID_WRAPPER_DISABLE -# ifndef UID_WRAPPER_NOT_REPLACE -# define UID_WRAPPER_REPLACE -# endif /* UID_WRAPPER_NOT_REPLACE */ -# include "../uid_wrapper/uid_wrapper.h" -# endif /* UID_WRAPPER_DISABLE */ -#else /* UID_WRAPPER */ -# define uwrap_enabled() 0 -#endif /* UID_WRAPPER */ - #endif diff --git a/lib/replace/system/passwd.h b/lib/replace/system/passwd.h index 8257e06..f8b9a77 100644 --- a/lib/replace/system/passwd.h +++ b/lib/replace/system/passwd.h @@ -27,9 +27,6 @@ */ -/* this needs to be included before nss_wrapper.h on some systems */ -#include <unistd.h> - #ifdef HAVE_PWD_H #include <pwd.h> #endif @@ -77,9 +74,6 @@ #define PASSWORD_LENGTH 8 #endif -#if defined(HAVE_PUTPRPWNAM) && defined(AUTH_CLEARTEXT_SEG_CHARS) -#define OSF1_ENH_SEC 1 -#endif #ifndef ALLOW_CHANGE_PASSWORD #if (defined(HAVE_TERMIOS_H) && defined(HAVE_DUP2) && defined(HAVE_SETSID)) @@ -91,24 +85,4 @@ #define ULTRIX_AUTH 1 #endif -#ifdef NSS_WRAPPER -#ifndef NSS_WRAPPER_DISABLE -#ifndef NSS_WRAPPER_NOT_REPLACE -#define NSS_WRAPPER_REPLACE -#endif /* NSS_WRAPPER_NOT_REPLACE */ -#include "../nss_wrapper/nss_wrapper.h" -#endif /* NSS_WRAPPER_DISABLE */ -#endif /* NSS_WRAPPER */ - -#ifdef UID_WRAPPER -# ifndef UID_WRAPPER_DISABLE -# ifndef UID_WRAPPER_NOT_REPLACE -# define UID_WRAPPER_REPLACE -# endif /* UID_WRAPPER_NOT_REPLACE */ -# include "../uid_wrapper/uid_wrapper.h" -# endif /* UID_WRAPPER_DISABLE */ -#else /* UID_WRAPPER */ -# define uwrap_enabled() 0 -#endif /* UID_WRAPPER */ - #endif diff --git a/lib/replace/system/threads.h b/lib/replace/system/threads.h new file mode 100644 index 0000000..25d3502 --- /dev/null +++ b/lib/replace/system/threads.h @@ -0,0 +1,48 @@ +#ifndef _system_threads_h +#define _system_threads_h +/* + Unix SMB/CIFS implementation. + + macros to go along with the lib/replace/ portability layer code + + Copyright (C) Volker Lendecke 2012 + + ** NOTE! The following LGPL license applies to the replace + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <pthread.h> + +#if defined(HAVE_PTHREAD_MUTEXATTR_SETROBUST_NP) && \ + !defined(HAVE_PTHREAD_MUTEXATTR_SETROBUST) + +#define pthread_mutexattr_setrobust pthread_mutexattr_setrobust_np + +/* + * We assume that PTHREAD_MUTEX_ROBUST_NP goes along with + * pthread_mutexattr_setrobust_np() + */ +#define PTHREAD_MUTEX_ROBUST PTHREAD_MUTEX_ROBUST_NP + +#endif + +#if defined(HAVE_PTHREAD_MUTEX_CONSISTENT_NP) && \ + !defined(HAVE_PTHREAD_MUTEX_CONSISTENT) +#define pthread_mutex_consistent pthread_mutex_consistent_np +#endif + +#endif diff --git a/lib/replace/wscript b/lib/replace/wscript index 2117f56..a26de0f 100644 --- a/lib/replace/wscript +++ b/lib/replace/wscript @@ -89,9 +89,13 @@ struct foo bar = { .y = 'X', .x = 1 }; sys/sockio.h sys/un.h''', together=True) conf.CHECK_HEADERS('sys/uio.h ifaddrs.h direct.h dirent.h') conf.CHECK_HEADERS('windows.h winsock2.h ws2tcpip.h') - conf.CHECK_HEADERS('libintl.h errno.h') + conf.CHECK_HEADERS('errno.h') conf.CHECK_HEADERS('gcrypt.h getopt.h iconv.h') - conf.CHECK_HEADERS('sys/inotify.h memory.h nss.h sasl/sasl.h') + conf.CHECK_HEADERS('memory.h nss.h sasl/sasl.h') + + conf.CHECK_FUNCS_IN('inotify_init', 'inotify', checklibc=True, + headers='sys/inotify.h') + conf.CHECK_HEADERS('security/pam_appl.h zlib.h asm/unistd.h') conf.CHECK_HEADERS('aio.h sys/unistd.h rpc/rpc.h rpc/nettype.h alloca.h float.h') @@ -363,19 +367,124 @@ removeea setea headers='netinet/in.h arpa/nameser.h resolv.h') - if not conf.CHECK_FUNCS_IN('gettext', 'intl', checklibc=True, headers='libintl.h'): - # Some hosts need lib iconv for linking with lib intl - # So we try with flags just in case it helps. - oldflags = conf.env['LDFLAGS_INTL'] - conf.env['LDFLAGS_INTL'] = "-liconv" - if not conf.CHECK_LIB('intl'): - conf.env['LDFLAGS_INTL'] = oldflags + # try to find libintl (if --without-gettext is not given) + conf.env.intl_libs='' + if not Options.options.disable_gettext: + # any extra path given to look at? + if not Options.options.gettext_location == 'None': + conf.env['CFLAGS'].extend(["-I%s" % Options.options.gettext_location]); + conf.env['LDFLAGS'].extend(["-L%s" % Options.options.gettext_location]); + else: + conf.env['CFLAGS'].extend(["-I/usr/local"]); + conf.env['LDFLAGS'].extend(["-L/usr/local"]); + conf.CHECK_HEADERS('libintl.h') + conf.CHECK_LIB('intl') + conf.CHECK_DECLS('dgettext gettext bindtextdomain textdomain bind_textdomain_codeset', headers="libintl.h") + # *textdomain functions are not strictly necessary + conf.CHECK_FUNCS_IN('bindtextdomain textdomain bind_textdomain_codeset', + '', checklibc=True, headers='libintl.h') + # gettext and dgettext must exist + # on some systems (the ones with glibc, those are in libc) + if conf.CHECK_FUNCS_IN('dgettext gettext', '', checklibc=True, headers='libintl.h'): + # save for dependency definitions + conf.env.intl_libs='' + # others (e.g. FreeBSD) have seperate libintl + elif conf.CHECK_FUNCS_IN('dgettext gettext', 'intl', checklibc=False, headers='libintl.h'): + # save for dependency definitions + conf.env.intl_libs='intl' + # recheck with libintl + conf.CHECK_FUNCS_IN('bindtextdomain textdomain bind_textdomain_codeset', + 'intl', checklibc=False, headers='libintl.h') else: - conf.CHECK_FUNCS_IN('gettext', 'intl', checklibc=True, headers='libintl.h') + # Some hosts need lib iconv for linking with lib intl + # So we try with flags just in case it helps. + oldflags = list(conf.env['EXTRA_LDFLAGS']); + conf.env['EXTRA_LDFLAGS'].extend(["-liconv"]) + conf.CHECK_FUNCS_IN('dgettext gettext bindtextdomain textdomain bind_textdomain_codeset', + 'intl', checklibc=False, headers='libintl.h') + conf.env['EXTRA_LDFLAGS'] = oldflags + if conf.env['HAVE_GETTEXT'] and conf.env['HAVE_DGETTEXT']: + # save for dependency definitions + conf.env.intl_libs='iconv intl' + + # did we find both prototypes and a library to link against? + # if not, unset the detected values (see Bug #9911) + if not (conf.env['HAVE_GETTEXT'] and conf.env['HAVE_DECL_GETTEXT']): + conf.undefine('HAVE_GETTEXT') + conf.undefine('HAVE_DECL_GETTEXT') + if not (conf.env['HAVE_DGETTEXT'] and conf.env['HAVE_DECL_DGETTEXT']): + conf.undefine('HAVE_DGETTEXT') + conf.undefine('HAVE_DECL_DGETTEXT') + + # did the user insist on gettext (--with-gettext)? + if Options.options.gettext_location != 'None' and (not conf.env['HAVE_GETTEXT'] or not conf.env['HAVE_DGETTEXT']): + conf.fatal('library gettext not found at specified location') - conf.CHECK_FUNCS_IN('dgettext gettext', 'intl', headers='libintl.h') conf.CHECK_FUNCS_IN('pthread_create', 'pthread', checklibc=True, headers='pthread.h') + PTHREAD_CFLAGS='error' + PTHREAD_LDFLAGS='error' + + if PTHREAD_LDFLAGS == 'error': + if conf.CHECK_FUNCS_IN('pthread_attr_init', 'pthread'): + PTHREAD_CFLAGS='-D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS' + PTHREAD_LDFLAGS='-lpthread' + if PTHREAD_LDFLAGS == 'error': + if conf.CHECK_FUNCS_IN('pthread_attr_init', 'pthreads'): + PTHREAD_CFLAGS='-D_THREAD_SAFE' + PTHREAD_LDFLAGS='-lpthreads' + if PTHREAD_LDFLAGS == 'error': + if conf.CHECK_FUNCS_IN('pthread_attr_init', 'c_r'): + PTHREAD_CFLAGS='-D_THREAD_SAFE -pthread' + PTHREAD_LDFLAGS='-pthread' + if PTHREAD_LDFLAGS == 'error': + if conf.CHECK_FUNCS('pthread_attr_init'): + PTHREAD_CFLAGS='-D_REENTRANT' + PTHREAD_LDFLAGS='-lpthread' + # especially for HP-UX, where the CHECK_FUNC macro fails to test for + # pthread_attr_init. On pthread_mutex_lock it works there... + if PTHREAD_LDFLAGS == 'error': + if conf.CHECK_FUNCS_IN('pthread_mutex_lock', 'pthread'): + PTHREAD_CFLAGS='-D_REENTRANT' + PTHREAD_LDFLAGS='-lpthread' + + if PTHREAD_CFLAGS != 'error' and PTHREAD_LDFLAGS != 'error': + if conf.CONFIG_SET('replace_add_global_pthread'): + conf.ADD_CFLAGS(PTHREAD_CFLAGS) + conf.ADD_LDFLAGS(PTHREAD_LDFLAGS) + conf.CHECK_HEADERS('pthread.h') + conf.DEFINE('HAVE_PTHREAD', '1') + + if conf.CONFIG_SET('HAVE_PTHREAD'): + + conf.CHECK_DECLS('pthread_mutexattr_setrobust', headers='pthread.h') + if not conf.CONFIG_SET('HAVE_DECL_PTHREAD_MUTEXATTR_SETROBUST'): + conf.CHECK_DECLS('pthread_mutexattr_setrobust_np', + headers='pthread.h') + + conf.CHECK_FUNCS_IN('pthread_mutexattr_setrobust', 'pthread', + checklibc=True, headers='pthread.h') + if not conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST'): + conf.CHECK_FUNCS_IN('pthread_mutexattr_setrobust_np', 'pthread', + checklibc=True, headers='pthread.h') + + conf.CHECK_DECLS('pthread_mutex_consistent', headers='pthread.h') + if not conf.CONFIG_SET('HAVE_DECL_PTHREAD_MUTEX_CONSISTENT'): + conf.CHECK_DECLS('pthread_mutex_consistent_np', + headers='pthread.h') + + conf.CHECK_FUNCS_IN('pthread_mutex_consistent', 'pthread', + checklibc=True, headers='pthread.h') + if not conf.CONFIG_SET('HAVE_PTHREAD_MUTEX_CONSISTENT'): + conf.CHECK_FUNCS_IN('pthread_mutex_consistent_np', 'pthread', + checklibc=True, headers='pthread.h') + + if ((conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST') or + conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST_NP')) and + (conf.CONFIG_SET('HAVE_PTHREAD_MUTEX_CONSISTENT') or + conf.CONFIG_SET('HAVE_PTHREAD_MUTEX_CONSISTENT_NP'))): + conf.DEFINE('HAVE_ROBUST_MUTEXES', 1) + conf.CHECK_FUNCS_IN('crypt', 'crypt', checklibc=True) conf.CHECK_VARIABLE('rl_event_hook', define='HAVE_DECL_RL_EVENT_HOOK', always=True, @@ -515,7 +624,6 @@ removeea setea #define %s 1 #define NO_CONFIG_H 1 #define AUTOCONF_TEST 1 - #define SOCKET_WRAPPER_NOT_REPLACE #include "replace.c" #include "inet_ntop.c" #include "snprintf.c" @@ -571,12 +679,13 @@ def build(bld): REPLACE_HOSTCC_SOURCE, use_hostcc=True, use_global_deps=False, - cflags='-DSOCKET_WRAPPER_DISABLE=1 -DNSS_WRAPPER_DISABLE=1 -DUID_WRAPPER_DISABLE=1 -D_SAMBA_HOSTCC_', + cflags='-D_SAMBA_HOSTCC_', group='compiler_libraries', deps = extra_libs ) REPLACE_SOURCE = REPLACE_HOSTCC_SOURCE + REPLACE_SOURCE += ' cwrap.c' if not bld.CONFIG_SET('HAVE_CRYPT'): REPLACE_SOURCE += ' crypt.c' if not bld.CONFIG_SET('HAVE_DLOPEN'): REPLACE_SOURCE += ' dlfcn.c' @@ -627,6 +736,8 @@ def build(bld): target='stdbool.h', enabled = not bld.CONFIG_SET('HAVE_STDBOOL_H')) + bld.SAMBA_SUBSYSTEM('samba_intl', source='', use_global_deps=False,deps=bld.env.intl_libs) + def dist(): '''makes a tarball for distribution''' samba_dist.dist() diff --git a/lib/replace/xattr.c b/lib/replace/xattr.c index a26ff67..ce52d1a 100644 --- a/lib/replace/xattr.c +++ b/lib/replace/xattr.c @@ -25,6 +25,7 @@ License along with this library; if not, see <http://www.gnu.org/licenses/>. */ +#define UID_WRAPPER_NOT_REPLACE #include "replace.h" #include "system/filesys.h" #include "system/dir.h" @@ -194,6 +195,10 @@ static ssize_t bsd_attr_list (int type, extattr_arg arg, char *list, size_t size char *buf; /* Iterate through extattr(2) namespaces */ for(t = 0; t < ARRAY_SIZE(extattr); t++) { + if (t != EXTATTR_NAMESPACE_USER && geteuid() != 0) { + /* ignore all but user namespace when we are not root, see bug 10247 */ + continue; + } switch(type) { #if defined(HAVE_EXTATTR_LIST_FILE) case 0: diff --git a/man/tdbbackup.8.xml b/man/tdbbackup.8.xml index c15cc14..30a658d 100644 --- a/man/tdbbackup.8.xml +++ b/man/tdbbackup.8.xml @@ -22,6 +22,7 @@ <arg choice="opt">-s suffix</arg> <arg choice="opt">-v</arg> <arg choice="opt">-h</arg> + <arg choice="opt">-l</arg> </cmdsynopsis> </refsynopsisdiv> @@ -54,7 +55,7 @@ <varlistentry> <term>-s suffix</term> <listitem><para> - The <command>-s</command> option allows the adminisistrator to specify a file + The <command>-s</command> option allows the administrator to specify a file backup extension. This way it is possible to keep a history of tdb backup files by using a new suffix for each backup. </para> </listitem> @@ -63,11 +64,22 @@ <varlistentry> <term>-v</term> <listitem><para> - The <command>-v</command> will check the database for damages (currupt data) + The <command>-v</command> will check the database for damages (corrupt data) which if detected causes the backup to be restored. </para></listitem> </varlistentry> + <varlistentry> + <term>-l</term> + <listitem><para> + This options disables any locking, by passing TDB_NOLOCK + to tdb_open_ex(). Only use this for database files which + are not used by any other process! And also only if it is otherwise not + possible to open the database, e.g. databases which were created with + mutex locking. + </para></listitem> + </varlistentry> + </variablelist> </refsect1> diff --git a/man/tdbtool.8.xml b/man/tdbtool.8.xml index bc5c001..cedc7eb 100644 --- a/man/tdbtool.8.xml +++ b/man/tdbtool.8.xml @@ -24,6 +24,7 @@ <cmdsynopsis> <command>tdbtool</command> + <arg choice="opt">-l</arg> <arg choice="plain"> <replaceable>TDBFILE</replaceable> </arg> @@ -48,6 +49,26 @@ </refsect1> +<refsect1> + <title>OPTIONS</title> + + <variablelist> + + <varlistentry> + <term>-l</term> + <listitem><para> + This options disables any locking, by passing TDB_NOLOCK + to tdb_open_ex(). Only use this for database files which + are not used by any other process! And also only if it is otherwise not + possible to open the database, e.g. databases which were created with + mutex locking. + </para></listitem> + </varlistentry> + + </variablelist> +</refsect1> + + <refsect1> <title>COMMANDS</title> @@ -201,6 +222,14 @@ <varlistentry> <term> + <option>repack</option> + </term> + <listitem><para>Repack a database using a temporary file to remove fragmentation. + </para></listitem> + </varlistentry> + + <varlistentry> + <term> <option>quit</option> </term> <listitem><para>Exit <command>tdbtool</command>. @@ -220,7 +249,7 @@ <refsect1> <title>VERSION</title> - <para>This man page is correct for version 3.0.25 of the Samba suite.</para> + <para>This man page is correct for version 3.6 of the Samba suite.</para> </refsect1> <refsect1> @@ -28,10 +28,6 @@ #include "replace.h" #include "system/filesys.h" -#ifndef Py_RETURN_NONE -#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None -#endif - /* Include tdb headers */ #include <tdb.h> @@ -95,9 +91,10 @@ static PyObject *py_tdb_open(PyTypeObject *type, PyObject *args, PyObject *kwarg int hash_size = 0, tdb_flags = TDB_DEFAULT, flags = O_RDWR, mode = 0600; TDB_CONTEXT *ctx; PyTdbObject *ret; - const char *kwnames[] = { "name", "hash_size", "tdb_flags", "flags", "mode", NULL }; + const char *_kwnames[] = { "name", "hash_size", "tdb_flags", "flags", "mode", NULL }; + char **kwnames = discard_const_p(char *, _kwnames); - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|siiii", (char **)kwnames, &name, &hash_size, &tdb_flags, &flags, &mode)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|siiii", kwnames, &name, &hash_size, &tdb_flags, &flags, &mode)) return NULL; if (name == NULL) { @@ -552,13 +549,21 @@ static PyObject *obj_get_seqnum(PyTdbObject *self, void *closure) static PyGetSetDef tdb_object_getsetters[] = { - { (char *)"hash_size", (getter)obj_get_hash_size, NULL, NULL }, - { (char *)"map_size", (getter)obj_get_map_size, NULL, NULL }, - { (char *)"freelist_size", (getter)obj_get_freelist_size, NULL, NULL }, - { (char *)"flags", (getter)obj_get_flags, NULL, NULL }, - { (char *)"max_dead", NULL, (setter)obj_set_max_dead, NULL }, - { (char *)"filename", (getter)obj_get_filename, NULL, (char *)"The filename of this TDB file."}, - { (char *)"seqnum", (getter)obj_get_seqnum, NULL, NULL }, + { discard_const_p(char, "hash_size"), + (getter)obj_get_hash_size, NULL, NULL }, + { discard_const_p(char, "map_size"), + (getter)obj_get_map_size, NULL, NULL }, + { discard_const_p(char, "freelist_size"), + (getter)obj_get_freelist_size, NULL, NULL }, + { discard_const_p(char, "flags"), + (getter)obj_get_flags, NULL, NULL }, + { discard_const_p(char, "max_dead"), + NULL, (setter)obj_set_max_dead, NULL }, + { discard_const_p(char, "filename"), + (getter)obj_get_filename, NULL, + discard_const_p(char, "The filename of this TDB file.") }, + { discard_const_p(char, "seqnum"), + (getter)obj_get_seqnum, NULL, NULL }, { NULL } }; diff --git a/python/tests/simple.py b/python/tests/simple.py index 7e295a8..4751f9b 100644 --- a/python/tests/simple.py +++ b/python/tests/simple.py @@ -20,13 +20,8 @@ class OpenTdbTests(TestCase): class CloseTdbTests(TestCase): def test_double_close(self): - # No hash size in tdb2. - if tdb.__version__.startswith("2"): - self.tdb = tdb.Tdb(tempfile.mkstemp()[1], tdb.DEFAULT, - os.O_CREAT|os.O_RDWR) - else: - self.tdb = tdb.Tdb(tempfile.mkstemp()[1], 0, tdb.DEFAULT, - os.O_CREAT|os.O_RDWR) + self.tdb = tdb.Tdb(tempfile.mkstemp()[1], 0, tdb.DEFAULT, + os.O_CREAT|os.O_RDWR) self.assertNotEqual(None, self.tdb) # ensure that double close does not crash python @@ -52,12 +47,8 @@ class SimpleTdbTests(TestCase): def setUp(self): super(SimpleTdbTests, self).setUp() - if tdb.__version__.startswith("2"): - self.tdb = tdb.Tdb(tempfile.mkstemp()[1], tdb.DEFAULT, - os.O_CREAT|os.O_RDWR) - else: - self.tdb = tdb.Tdb(tempfile.mkstemp()[1], 0, tdb.DEFAULT, - os.O_CREAT|os.O_RDWR) + self.tdb = tdb.Tdb(tempfile.mkstemp()[1], 0, tdb.DEFAULT, + os.O_CREAT|os.O_RDWR) self.assertNotEqual(None, self.tdb) def tearDown(self): @@ -70,8 +61,7 @@ class SimpleTdbTests(TestCase): self.tdb.lock_all() def test_max_dead(self): - if not tdb.__version__.startswith("2"): - self.tdb.max_dead = 20 + self.tdb.max_dead = 20 def test_unlockall(self): self.tdb.lock_all() @@ -82,8 +72,7 @@ class SimpleTdbTests(TestCase): self.tdb.read_unlock_all() def test_reopen(self): - if not tdb.__version__.startswith("2"): - self.tdb.reopen() + self.tdb.reopen() def test_store(self): self.tdb.store("bar", "bla") @@ -91,15 +80,14 @@ class SimpleTdbTests(TestCase): def test_getitem(self): self.tdb["bar"] = "foo" - if not tdb.__version__.startswith("2"): - self.tdb.reopen() + self.tdb.reopen() self.assertEquals("foo", self.tdb["bar"]) def test_delete(self): self.tdb["bar"] = "foo" del self.tdb["bar"] self.assertRaises(KeyError, lambda: self.tdb["bar"]) - + def test_contains(self): self.tdb["bla"] = "bloe" self.assertTrue("bla" in self.tdb) @@ -108,16 +96,13 @@ class SimpleTdbTests(TestCase): self.assertRaises(KeyError, lambda: self.tdb["bla"]) def test_hash_size(self): - if not tdb.__version__.startswith("2"): - self.tdb.hash_size + self.tdb.hash_size def test_map_size(self): - if not tdb.__version__.startswith("2"): - self.tdb.map_size + self.tdb.map_size def test_freelist_size(self): - if not tdb.__version__.startswith("2"): - self.tdb.freelist_size + self.tdb.freelist_size def test_name(self): self.tdb.filename @@ -165,19 +150,17 @@ class SimpleTdbTests(TestCase): self.assertEquals(0, len(list(self.tdb))) def test_repack(self): - if not tdb.__version__.startswith("2"): - self.tdb["foo"] = "abc" - self.tdb["bar"] = "def" - del self.tdb["foo"] - self.tdb.repack() + self.tdb["foo"] = "abc" + self.tdb["bar"] = "def" + del self.tdb["foo"] + self.tdb.repack() def test_seqnum(self): - if not tdb.__version__.startswith("2"): - self.tdb.enable_seqnum() - seq1 = self.tdb.seqnum - self.tdb.increment_seqnum_nonblock() - seq2 = self.tdb.seqnum - self.assertEquals(seq2-seq1, 1) + self.tdb.enable_seqnum() + seq1 = self.tdb.seqnum + self.tdb.increment_seqnum_nonblock() + seq2 = self.tdb.seqnum + self.assertEquals(seq2-seq1, 1) def test_len(self): self.assertEquals(0, len(list(self.tdb))) @@ -185,12 +168,8 @@ class SimpleTdbTests(TestCase): self.assertEquals(1, len(list(self.tdb))) def test_add_flags(self): - if tdb.__version__.startswith("2"): - self.tdb.add_flag(tdb.NOMMAP) - self.tdb.remove_flag(tdb.NOMMAP) - else: - self.tdb.add_flags(tdb.NOMMAP) - self.tdb.remove_flags(tdb.NOMMAP) + self.tdb.add_flags(tdb.NOMMAP) + self.tdb.remove_flags(tdb.NOMMAP) class VersionTests(TestCase): diff --git a/test/external-agent.c b/test/external-agent.c index 8140e70..443d382 100644 --- a/test/external-agent.c +++ b/test/external-agent.c @@ -27,7 +27,7 @@ static enum agent_return do_operation(enum operation op, const char *name) return OTHER_FAILURE; } - k.dptr = (void *)name; + k.dptr = discard_const_p(uint8_t, name); k.dsize = strlen(name); locking_would_block = 0; @@ -87,6 +87,15 @@ static enum agent_return do_operation(enum operation op, const char *name) ret = tdb_close(tdb) == 0 ? SUCCESS : OTHER_FAILURE; tdb = NULL; break; + case PING: + ret = SUCCESS; + break; + case UNMAP: + ret = tdb_munmap(tdb) == 0 ? SUCCESS : OTHER_FAILURE; + if (ret == SUCCESS) { + tdb->flags |= TDB_NOMMAP; + } + break; default: ret = OTHER_FAILURE; } @@ -99,29 +108,29 @@ static enum agent_return do_operation(enum operation op, const char *name) struct agent { int cmdfd, responsefd; + pid_t pid; }; /* Do this before doing any tdb stuff. Return handle, or NULL. */ struct agent *prepare_external_agent(void) { - int pid, ret; + int ret; int command[2], response[2]; char name[1+PATH_MAX]; + struct agent *agent = malloc(sizeof(*agent)); if (pipe(command) != 0 || pipe(response) != 0) { fprintf(stderr, "pipe failed: %s\n", strerror(errno)); exit(1); } - pid = fork(); - if (pid < 0) { + agent->pid = fork(); + if (agent->pid < 0) { fprintf(stderr, "fork failed: %s\n", strerror(errno)); exit(1); } - if (pid != 0) { - struct agent *agent = malloc(sizeof(*agent)); - + if (agent->pid != 0) { close(command[0]); close(response[1]); agent->cmdfd = command[1]; @@ -146,6 +155,20 @@ struct agent *prepare_external_agent(void) exit(0); } +void shutdown_agent(struct agent *agent) +{ + pid_t p; + + close(agent->cmdfd); + close(agent->responsefd); + p = waitpid(agent->pid, NULL, WNOHANG); + if (p == 0) { + kill(agent->pid, SIGKILL); + } + waitpid(agent->pid, NULL, 0); + free(agent); +} + /* Ask the external agent to try to do an operation. */ enum agent_return external_agent_operation(struct agent *agent, enum operation op, @@ -193,6 +216,8 @@ const char *operation_name(enum operation op) case CHECK: return "CHECK"; case NEEDS_RECOVERY: return "NEEDS_RECOVERY"; case CLOSE: return "CLOSE"; + case PING: return "PING"; + case UNMAP: return "UNMAP"; } return "**INVALID**"; } diff --git a/test/external-agent.h b/test/external-agent.h index dffdca9..de9d0ac 100644 --- a/test/external-agent.h +++ b/test/external-agent.h @@ -13,10 +13,13 @@ enum operation { CHECK, NEEDS_RECOVERY, CLOSE, + PING, + UNMAP, }; /* Do this before doing any tdb stuff. Return handle, or -1. */ struct agent *prepare_external_agent(void); +void shutdown_agent(struct agent *agent); enum agent_return { SUCCESS, diff --git a/test/run-3G-file.c b/test/run-3G-file.c index 3ee9de1..748c972 100644 --- a/test/run-3G-file.c +++ b/test/run-3G-file.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> #include "logging.h" @@ -22,12 +23,12 @@ static int tdb_expand_file_sparse(struct tdb_context *tdb, return -1; } - if (ftruncate(tdb->fd, size+addition) == -1) { + if (tdb_ftruncate(tdb, size+addition) == -1) { char b = 0; - ssize_t written = pwrite(tdb->fd, &b, 1, (size+addition) - 1); + ssize_t written = tdb_pwrite(tdb, &b, 1, (size+addition) - 1); if (written == 0) { /* try once more, potentially revealing errno */ - written = pwrite(tdb->fd, &b, 1, (size+addition) - 1); + written = tdb_pwrite(tdb, &b, 1, (size+addition) - 1); } if (written == 0) { /* again - give up, guessing errno */ @@ -66,7 +67,7 @@ int main(int argc, char *argv[]) { struct tdb_context *tdb; TDB_DATA key, orig_data, data; - uint32_t hash; + uint32_t hashval; tdb_off_t rec_ptr; struct tdb_record rec; int ret; @@ -79,9 +80,9 @@ int main(int argc, char *argv[]) tdb->methods = &large_io_methods; key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; + key.dptr = discard_const_p(uint8_t, "hi"); orig_data.dsize = strlen("world"); - orig_data.dptr = (void *)"world"; + orig_data.dptr = discard_const_p(uint8_t, "world"); /* Enlarge the file (internally multiplies by 2). */ ret = tdb_expand(tdb, 1500000000); @@ -113,8 +114,8 @@ int main(int argc, char *argv[]) free(data.dptr); /* That currently fills at the end, make sure that's true. */ - hash = tdb->hash_fn(&key); - rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec); + hashval = tdb->hash_fn(&key); + rec_ptr = tdb_find_lock_hash(tdb, key, hashval, F_RDLCK, &rec); ok1(rec_ptr); ok1(rec_ptr > 2U*1024*1024*1024); tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK); diff --git a/test/run-bad-tdb-header.c b/test/run-bad-tdb-header.c index b00fb89..9d29fdf 100644 --- a/test/run-bad-tdb-header.c +++ b/test/run-bad-tdb-header.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> #include "logging.h" diff --git a/test/run-check.c b/test/run-check.c index 05f7aec..ce389a2 100644 --- a/test/run-check.c +++ b/test/run-check.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> #include "logging.h" @@ -26,9 +27,9 @@ int main(int argc, char *argv[]) ok1(tdb_check(tdb, NULL, NULL) == 0); key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; + key.dptr = discard_const_p(uint8_t, "hi"); data.dsize = strlen("world"); - data.dptr = (void *)"world"; + data.dptr = discard_const_p(uint8_t, "world"); ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); ok1(tdb_check(tdb, NULL, NULL) == 0); diff --git a/test/run-corrupt.c b/test/run-corrupt.c index 1a3c769..e6fc751 100644 --- a/test/run-corrupt.c +++ b/test/run-corrupt.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> #include "logging.h" @@ -60,9 +61,9 @@ static void check_test(struct tdb_context *tdb) ok1(tdb_check(tdb, NULL, NULL) == 0); - key.dptr = (void *)"hello"; + key.dptr = discard_const_p(uint8_t, "hello"); data.dsize = strlen("world"); - data.dptr = (void *)"world"; + data.dptr = discard_const_p(uint8_t, "world"); /* Key and data size respectively. */ dsize = ksize = 0; diff --git a/test/run-die-during-transaction.c b/test/run-die-during-transaction.c index 6e3a70d..c636d87 100644 --- a/test/run-die-during-transaction.c +++ b/test/run-die-during-transaction.c @@ -19,6 +19,7 @@ static int ftruncate_check(int fd, off_t length); #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> #include <stdbool.h> @@ -156,7 +157,7 @@ reset: /* Put key for agent to fetch. */ key.dsize = strlen(KEY_STRING); - key.dptr = (void *)KEY_STRING; + key.dptr = discard_const_p(uint8_t, KEY_STRING); if (tdb_store(tdb, key, key, TDB_INSERT) != 0) return false; diff --git a/test/run-endian.c b/test/run-endian.c index b19ffd3..9d4d5f5 100644 --- a/test/run-endian.c +++ b/test/run-endian.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> #include "logging.h" @@ -25,9 +26,9 @@ int main(int argc, char *argv[]) ok1(tdb); key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; + key.dptr = discard_const_p(uint8_t, "hi"); data.dsize = strlen("world"); - data.dptr = (void *)"world"; + data.dptr = discard_const_p(uint8_t, "world"); ok1(tdb_store(tdb, key, data, TDB_MODIFY) < 0); ok1(tdb_error(tdb) == TDB_ERR_NOEXIST); @@ -52,7 +53,7 @@ int main(int argc, char *argv[]) ok1(tdb); key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; + key.dptr = discard_const_p(uint8_t, "hi"); data = tdb_fetch(tdb, key); ok1(data.dsize == strlen("world")); ok1(memcmp(data.dptr, "world", strlen("world")) == 0); diff --git a/test/run-incompatible.c b/test/run-incompatible.c index 628927c..b8e95b5 100644 --- a/test/run-incompatible.c +++ b/test/run-incompatible.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> @@ -61,7 +62,7 @@ int main(int argc, char *argv[]) NULL); ok1(tdb); ok1(log_count == 0); - d.dptr = (void *)"Hello"; + d.dptr = discard_const_p(uint8_t, "Hello"); d.dsize = 5; ok1(tdb_store(tdb, d, d, TDB_INSERT) == 0); tdb_close(tdb); @@ -106,7 +107,7 @@ int main(int argc, char *argv[]) NULL); ok1(tdb); ok1(log_count == 0); - d.dptr = (void *)"Hello"; + d.dptr = discard_const_p(uint8_t, "Hello"); d.dsize = 5; ok1(tdb_store(tdb, d, d, TDB_INSERT) == 0); tdb_close(tdb); @@ -153,7 +154,7 @@ int main(int argc, char *argv[]) tdb_dumb_hash); ok1(tdb); ok1(log_count == 0); - d.dptr = (void *)"Hello"; + d.dptr = discard_const_p(uint8_t, "Hello"); d.dsize = 5; ok1(tdb_store(tdb, d, d, TDB_INSERT) == 0); tdb_close(tdb); diff --git a/test/run-marklock-deadlock.c b/test/run-marklock-deadlock.c new file mode 100644 index 0000000..ff03a11 --- /dev/null +++ b/test/run-marklock-deadlock.c @@ -0,0 +1,278 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include <stdlib.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdarg.h> +#include "logging.h" + +static TDB_DATA key, data; + +static void do_chainlock(const char *name, int tdb_flags, int up, int down) +{ + struct tdb_context *tdb; + int ret; + ssize_t nread, nwritten; + char c = 0; + + tdb = tdb_open_ex(name, 3, tdb_flags, + O_RDWR|O_CREAT, 0755, &taplogctx, NULL); + ok(tdb, "tdb_open_ex should succeed"); + + ret = tdb_chainlock(tdb, key); + ok(ret == 0, "tdb_chainlock should succeed"); + + nwritten = write(up, &c, sizeof(c)); + ok(nwritten == sizeof(c), "write should succeed"); + + nread = read(down, &c, sizeof(c)); + ok(nread == sizeof(c), "read should succeed"); + + exit(0); +} + +static void do_allrecord_lock(const char *name, int tdb_flags, int up, int down) +{ + struct tdb_context *tdb; + int ret; + ssize_t nread, nwritten; + char c = 0; + + tdb = tdb_open_ex(name, 3, tdb_flags, + O_RDWR|O_CREAT, 0755, &taplogctx, NULL); + ok(tdb, "tdb_open_ex should succeed"); + + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); + ok(ret == 0, "tdb_allrecord_lock should succeed"); + + nwritten = write(up, &c, sizeof(c)); + ok(nwritten == sizeof(c), "write should succeed"); + + nread = read(down, &c, sizeof(c)); + ok(nread == sizeof(c), "read should succeed"); + + exit(0); +} + +/* The code should barf on TDBs created with rwlocks. */ +static int do_tests(const char *name, int tdb_flags) +{ + struct tdb_context *tdb; + int ret; + pid_t chainlock_child, allrecord_child; + int chainlock_down[2]; + int chainlock_up[2]; + int allrecord_down[2]; + int allrecord_up[2]; + char c; + ssize_t nread, nwritten; + + key.dsize = strlen("hi"); + key.dptr = discard_const_p(uint8_t, "hi"); + data.dsize = strlen("world"); + data.dptr = discard_const_p(uint8_t, "world"); + + ret = pipe(chainlock_down); + ok(ret == 0, "pipe should succeed"); + + ret = pipe(chainlock_up); + ok(ret == 0, "pipe should succeed"); + + ret = pipe(allrecord_down); + ok(ret == 0, "pipe should succeed"); + + ret = pipe(allrecord_up); + ok(ret == 0, "pipe should succeed"); + + chainlock_child = fork(); + ok(chainlock_child != -1, "fork should succeed"); + + if (chainlock_child == 0) { + close(chainlock_up[0]); + close(chainlock_down[1]); + close(allrecord_up[0]); + close(allrecord_up[1]); + close(allrecord_down[0]); + close(allrecord_down[1]); + do_chainlock(name, tdb_flags, + chainlock_up[1], chainlock_down[0]); + exit(0); + } + close(chainlock_up[1]); + close(chainlock_down[0]); + + nread = read(chainlock_up[0], &c, sizeof(c)); + ok(nread == sizeof(c), "read should succeed"); + + /* + * Now we have a process holding a chainlock. Start another process + * trying the allrecord lock. This will block. + */ + + allrecord_child = fork(); + ok(allrecord_child != -1, "fork should succeed"); + + if (allrecord_child == 0) { + close(chainlock_up[0]); + close(chainlock_up[1]); + close(chainlock_down[0]); + close(chainlock_down[1]); + close(allrecord_up[0]); + close(allrecord_down[1]); + do_allrecord_lock(name, tdb_flags, + allrecord_up[1], allrecord_down[0]); + exit(0); + } + close(allrecord_up[1]); + close(allrecord_down[0]); + + poll(NULL, 0, 500); + + tdb = tdb_open_ex(name, 3, tdb_flags, + O_RDWR|O_CREAT, 0755, &taplogctx, NULL); + ok(tdb, "tdb_open_ex should succeed"); + + /* + * Someone already holds a chainlock, but we're able to get the + * freelist lock. + * + * The freelist lock/mutex is independent from the allrecord lock/mutex. + */ + + ret = tdb_chainlock_nonblock(tdb, key); + ok(ret == -1, "tdb_chainlock_nonblock should not succeed"); + + ret = tdb_lock_nonblock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_lock_nonblock should succeed"); + + ret = tdb_unlock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_unlock should succeed"); + + /* + * We have someone else having done the lock for us. Just mark it. + */ + + ret = tdb_chainlock_mark(tdb, key); + ok(ret == 0, "tdb_chainlock_mark should succeed"); + + /* + * The tdb_store below will block the freelist. In one version of the + * mutex patches, the freelist was already blocked here by the + * allrecord child, which was waiting for the chainlock child to give + * up its chainlock. Make sure that we don't run into this + * deadlock. To excercise the deadlock, just comment out the "ok" + * line. + * + * The freelist lock/mutex is independent from the allrecord lock/mutex. + */ + + ret = tdb_lock_nonblock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_lock_nonblock should succeed"); + + ret = tdb_unlock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_unlock should succeed"); + + ret = tdb_store(tdb, key, data, TDB_INSERT); + ok(ret == 0, "tdb_store should succeed"); + + ret = tdb_chainlock_unmark(tdb, key); + ok(ret == 0, "tdb_chainlock_unmark should succeed"); + + nwritten = write(chainlock_down[1], &c, sizeof(c)); + ok(nwritten == sizeof(c), "write should succeed"); + + nread = read(chainlock_up[0], &c, sizeof(c)); + ok(nread == 0, "read should succeed"); + + nread = read(allrecord_up[0], &c, sizeof(c)); + ok(nread == sizeof(c), "read should succeed"); + + /* + * Someone already holds the allrecord lock, but we're able to get the + * freelist lock. + * + * The freelist lock/mutex is independent from the allrecord lock/mutex. + */ + + ret = tdb_chainlock_nonblock(tdb, key); + ok(ret == -1, "tdb_chainlock_nonblock should not succeed"); + + ret = tdb_lockall_nonblock(tdb); + ok(ret == -1, "tdb_lockall_nonblock should not succeed"); + + ret = tdb_lock_nonblock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_lock_nonblock should succeed"); + + ret = tdb_unlock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_unlock should succeed"); + + /* + * We have someone else having done the lock for us. Just mark it. + */ + + ret = tdb_lockall_mark(tdb); + ok(ret == 0, "tdb_lockall_mark should succeed"); + + ret = tdb_lock_nonblock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_lock_nonblock should succeed"); + + ret = tdb_unlock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_unlock should succeed"); + + ret = tdb_store(tdb, key, data, TDB_REPLACE); + ok(ret == 0, "tdb_store should succeed"); + + ret = tdb_lockall_unmark(tdb); + ok(ret == 0, "tdb_lockall_unmark should succeed"); + + nwritten = write(allrecord_down[1], &c, sizeof(c)); + ok(nwritten == sizeof(c), "write should succeed"); + + nread = read(allrecord_up[0], &c, sizeof(c)); + ok(nread == 0, "read should succeed"); + + close(chainlock_up[0]); + close(chainlock_down[1]); + close(allrecord_up[0]); + close(allrecord_down[1]); + diag("%s tests done", name); + return exit_status(); +} + +int main(int argc, char *argv[]) +{ + int ret; + bool mutex_support; + + mutex_support = tdb_runtime_check_for_robust_mutexes(); + + ret = do_tests("marklock-deadlock-fcntl.tdb", + TDB_CLEAR_IF_FIRST | + TDB_INCOMPATIBLE_HASH); + ok(ret == 0, "marklock-deadlock-fcntl.tdb tests should succeed"); + + if (!mutex_support) { + skip(1, "No robust mutex support, " + "skipping marklock-deadlock-mutex.tdb tests"); + return exit_status(); + } + + ret = do_tests("marklock-deadlock-mutex.tdb", + TDB_CLEAR_IF_FIRST | + TDB_MUTEX_LOCKING | + TDB_INCOMPATIBLE_HASH); + ok(ret == 0, "marklock-deadlock-mutex.tdb tests should succeed"); + + return exit_status(); +} diff --git a/test/run-mutex-allrecord-bench.c b/test/run-mutex-allrecord-bench.c new file mode 100644 index 0000000..b81e597 --- /dev/null +++ b/test/run-mutex-allrecord-bench.c @@ -0,0 +1,82 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include <stdlib.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdarg.h> + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static double timeval_elapsed2(const struct timeval *tv1, const struct timeval *tv2) +{ + return (tv2->tv_sec - tv1->tv_sec) + + (tv2->tv_usec - tv1->tv_usec)*1.0e-6; +} + +static double timeval_elapsed(const struct timeval *tv) +{ + struct timeval tv2; + gettimeofday(&tv2, NULL); + return timeval_elapsed2(tv, &tv2); +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret; + struct timeval start; + double elapsed; + bool runtime_support; + + runtime_support = tdb_runtime_check_for_robust_mutexes(); + + if (!runtime_support) { + skip(1, "No robust mutex support"); + return exit_status(); + } + + key.dsize = strlen("hi"); + key.dptr = discard_const_p(uint8_t, "hi"); + data.dsize = strlen("world"); + data.dptr = discard_const_p(uint8_t, "world"); + + tdb = tdb_open_ex("mutex-allrecord-bench.tdb", 1000000, + TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING| + TDB_CLEAR_IF_FIRST, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed"); + + gettimeofday(&start, NULL); + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); + elapsed = timeval_elapsed(&start); + + ok(ret == 0, "tdb_allrecord_lock should succeed"); + + diag("allrecord_lock took %f seconds", elapsed); + + return exit_status(); +} diff --git a/test/run-mutex-allrecord-block.c b/test/run-mutex-allrecord-block.c new file mode 100644 index 0000000..fcd3b4f --- /dev/null +++ b/test/run-mutex-allrecord-block.c @@ -0,0 +1,120 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include <stdlib.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdarg.h> + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static int do_child(int tdb_flags, int to, int from) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret; + char c = 0; + + tdb = tdb_open_ex("mutex-allrecord-block.tdb", 3, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed"); + + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); + ok(ret == 0, "tdb_allrecord_lock should succeed"); + + write(to, &c, sizeof(c)); + + read(from, &c, sizeof(c)); + + ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); + ok(ret == 0, "tdb_allrecord_unlock should succeed"); + + return 0; +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret, status; + pid_t child, wait_ret; + int fromchild[2]; + int tochild[2]; + char c; + int tdb_flags; + bool runtime_support; + + runtime_support = tdb_runtime_check_for_robust_mutexes(); + + if (!runtime_support) { + skip(1, "No robust mutex support"); + return exit_status(); + } + + key.dsize = strlen("hi"); + key.dptr = discard_const_p(uint8_t, "hi"); + data.dsize = strlen("world"); + data.dptr = discard_const_p(uint8_t, "world"); + + pipe(fromchild); + pipe(tochild); + + tdb_flags = TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING| + TDB_CLEAR_IF_FIRST; + + child = fork(); + if (child == 0) { + close(fromchild[0]); + close(tochild[1]); + return do_child(tdb_flags, fromchild[1], tochild[0]); + } + close(fromchild[1]); + close(tochild[0]); + + read(fromchild[0], &c, sizeof(c)); + + tdb = tdb_open_ex("mutex-allrecord-block.tdb", 0, + tdb_flags, O_RDWR|O_CREAT, 0755, + &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed"); + + ret = tdb_chainlock_nonblock(tdb, key); + ok(ret == -1, "tdb_chainlock_nonblock should not succeed"); + + write(tochild[1], &c, sizeof(c)); + + ret = tdb_chainlock(tdb, key); + ok(ret == 0, "tdb_chainlock should not succeed"); + + ret = tdb_chainunlock(tdb, key); + ok(ret == 0, "tdb_chainunlock should succeed"); + + wait_ret = wait(&status); + ok(wait_ret == child, "child should have exited correctly"); + + diag("done"); + return exit_status(); +} diff --git a/test/run-mutex-allrecord-trylock.c b/test/run-mutex-allrecord-trylock.c new file mode 100644 index 0000000..4b683db --- /dev/null +++ b/test/run-mutex-allrecord-trylock.c @@ -0,0 +1,113 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include <stdlib.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdarg.h> + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static int do_child(int tdb_flags, int to, int from) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret; + char c = 0; + + tdb = tdb_open_ex("mutex-allrecord-trylock.tdb", 3, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed"); + + ret = tdb_chainlock(tdb, key); + ok(ret == 0, "tdb_chainlock should succeed"); + + write(to, &c, sizeof(c)); + + read(from, &c, sizeof(c)); + + ret = tdb_chainunlock(tdb, key); + ok(ret == 0, "tdb_chainunlock should succeed"); + + return 0; +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret, status; + pid_t child, wait_ret; + int fromchild[2]; + int tochild[2]; + char c; + int tdb_flags; + bool runtime_support; + + runtime_support = tdb_runtime_check_for_robust_mutexes(); + + if (!runtime_support) { + skip(1, "No robust mutex support"); + return exit_status(); + } + + key.dsize = strlen("hi"); + key.dptr = discard_const_p(uint8_t, "hi"); + data.dsize = strlen("world"); + data.dptr = discard_const_p(uint8_t, "world"); + + pipe(fromchild); + pipe(tochild); + + tdb_flags = TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING| + TDB_CLEAR_IF_FIRST; + + child = fork(); + if (child == 0) { + close(fromchild[0]); + close(tochild[1]); + return do_child(tdb_flags, fromchild[1], tochild[0]); + } + close(fromchild[1]); + close(tochild[0]); + + read(fromchild[0], &c, sizeof(c)); + + tdb = tdb_open_ex("mutex-allrecord-trylock.tdb", 0, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed"); + + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_NOWAIT, false); + ok(ret == -1, "tdb_allrecord_lock (nowait) should not succeed"); + + write(tochild[1], &c, sizeof(c)); + + wait_ret = wait(&status); + ok(wait_ret == child, "child should have exited correctly"); + + diag("done"); + return exit_status(); +} diff --git a/test/run-mutex-die.c b/test/run-mutex-die.c new file mode 100644 index 0000000..4b8eac1 --- /dev/null +++ b/test/run-mutex-die.c @@ -0,0 +1,269 @@ +#include "../common/tdb_private.h" +#include "lock-tracking.h" +static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset); +static ssize_t write_check(int fd, const void *buf, size_t count); +static int ftruncate_check(int fd, off_t length); + +#define pwrite pwrite_check +#define write write_check +#define fcntl fcntl_with_lockcheck +#define ftruncate ftruncate_check + +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include <stdlib.h> +#include <stdbool.h> +#include <stdarg.h> +#include "external-agent.h" +#include "logging.h" + +#undef write +#undef pwrite +#undef fcntl +#undef ftruncate + +static int target, current; +#define TEST_DBNAME "run-mutex-die.tdb" +#define KEY_STRING "helloworld" + +static void maybe_die(int fd) +{ + if (target == 0) { + return; + } + current += 1; + if (current == target) { + _exit(1); + } +} + +static ssize_t pwrite_check(int fd, + const void *buf, size_t count, off_t offset) +{ + ssize_t ret; + + maybe_die(fd); + + ret = pwrite(fd, buf, count, offset); + if (ret != count) + return ret; + + maybe_die(fd); + return ret; +} + +static ssize_t write_check(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + maybe_die(fd); + + ret = write(fd, buf, count); + if (ret != count) + return ret; + + maybe_die(fd); + return ret; +} + +static int ftruncate_check(int fd, off_t length) +{ + int ret; + + maybe_die(fd); + + ret = ftruncate(fd, length); + + maybe_die(fd); + return ret; +} + +static enum agent_return flakey_ops(struct agent *a) +{ + enum agent_return ret; + + /* + * Run in the external agent child + */ + + ret = external_agent_operation(a, OPEN_WITH_CLEAR_IF_FIRST, TEST_DBNAME); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed to open: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, UNMAP, ""); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed to unmap: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, STORE, "xyz"); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed to store: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, STORE, KEY_STRING); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed store: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, FETCH, KEY_STRING); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed find key: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, PING, ""); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed ping: %s\n", + agent_return_name(ret)); + return ret; + } + return ret; +} + +static bool prep_db(void) { + struct tdb_context *tdb; + TDB_DATA key; + TDB_DATA data; + + key.dptr = discard_const_p(uint8_t, KEY_STRING); + key.dsize = strlen((char *)key.dptr); + data.dptr = discard_const_p(uint8_t, "foo"); + data.dsize = strlen((char *)data.dptr); + + unlink(TEST_DBNAME); + + tdb = tdb_open_ex( + TEST_DBNAME, 2, + TDB_INCOMPATIBLE_HASH|TDB_MUTEX_LOCKING|TDB_CLEAR_IF_FIRST, + O_CREAT|O_TRUNC|O_RDWR, 0600, &taplogctx, NULL); + if (tdb == NULL) { + return false; + } + + if (tdb_store(tdb, key, data, TDB_INSERT) != 0) { + return false; + } + + tdb_close(tdb); + tdb = NULL; + + forget_locking(); + + return true; +} + +static bool test_db(void) { + struct tdb_context *tdb; + int ret; + + tdb = tdb_open_ex( + TEST_DBNAME, 1024, TDB_INCOMPATIBLE_HASH, + O_RDWR, 0600, &taplogctx, NULL); + + if (tdb == NULL) { + perror("tdb_open_ex failed"); + return false; + } + + ret = tdb_traverse(tdb, NULL, NULL); + if (ret == -1) { + perror("traverse failed"); + goto fail; + } + + tdb_close(tdb); + + forget_locking(); + + return true; + +fail: + tdb_close(tdb); + return false; +} + +static bool test_one(void) +{ + enum agent_return ret; + + ret = AGENT_DIED; + target = 19; + + while (ret != SUCCESS) { + struct agent *agent; + + { + int child_target = target; + bool pret; + target = 0; + pret = prep_db(); + ok1(pret); + target = child_target; + } + + agent = prepare_external_agent(); + + ret = flakey_ops(agent); + + diag("Agent (target=%d) returns %s", + target, agent_return_name(ret)); + + if (ret == SUCCESS) { + ok((target > 19), "At least one AGENT_DIED expected"); + } else { + ok(ret == AGENT_DIED, "AGENT_DIED expected"); + } + + shutdown_agent(agent); + + { + int child_target = target; + bool tret; + target = 0; + tret = test_db(); + ok1(tret); + target = child_target; + } + + target += 1; + } + + return true; +} + +int main(int argc, char *argv[]) +{ + bool ret; + bool runtime_support; + + runtime_support = tdb_runtime_check_for_robust_mutexes(); + + if (!runtime_support) { + skip(1, "No robust mutex support"); + return exit_status(); + } + + plan_tests(12); + unlock_callback = maybe_die; + + ret = test_one(); + ok1(ret); + + diag("done"); + return exit_status(); +} diff --git a/test/run-mutex-openflags2.c b/test/run-mutex-openflags2.c new file mode 100644 index 0000000..57ac7e3 --- /dev/null +++ b/test/run-mutex-openflags2.c @@ -0,0 +1,152 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include <stdlib.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <poll.h> +#include <stdarg.h> + +static TDB_DATA key, data; + +static void log_void(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ +} + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static int do_child(int fd) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + struct tdb_logging_context nolog_ctx = { log_void, NULL }; + char c; + + read(fd, &c, 1); + + tdb = tdb_open_ex("mutex-openflags2.tdb", 0, + TDB_DEFAULT, + O_RDWR|O_CREAT, 0755, &nolog_ctx, NULL); + ok((tdb == NULL) && (errno == EINVAL), "TDB_DEFAULT without " + "TDB_MUTEX_LOCKING should fail with EINVAL - %d", errno); + + tdb = tdb_open_ex("mutex-openflags2.tdb", 0, + TDB_CLEAR_IF_FIRST, + O_RDWR|O_CREAT, 0755, &nolog_ctx, NULL); + ok((tdb == NULL) && (errno == EINVAL), "TDB_CLEAR_IF_FIRST without " + "TDB_MUTEX_LOCKING should fail with EINVAL - %d", errno); + + tdb = tdb_open_ex("mutex-openflags2.tdb", 0, + TDB_CLEAR_IF_FIRST | + TDB_MUTEX_LOCKING | + TDB_INTERNAL, + O_RDWR|O_CREAT, 0755, &nolog_ctx, NULL); + ok((tdb == NULL) && (errno == EINVAL), "TDB_MUTEX_LOCKING with " + "TDB_INTERNAL should fail with EINVAL - %d", errno); + + tdb = tdb_open_ex("mutex-openflags2.tdb", 0, + TDB_CLEAR_IF_FIRST | + TDB_MUTEX_LOCKING | + TDB_NOMMAP, + O_RDWR|O_CREAT, 0755, &nolog_ctx, NULL); + ok((tdb == NULL) && (errno == EINVAL), "TDB_MUTEX_LOCKING with " + "TDB_NOMMAP should fail with EINVAL - %d", errno); + + tdb = tdb_open_ex("mutex-openflags2.tdb", 0, + TDB_CLEAR_IF_FIRST | + TDB_MUTEX_LOCKING, + O_RDONLY, 0755, &nolog_ctx, NULL); + ok((tdb == NULL) && (errno == EINVAL), "TDB_MUTEX_LOCKING with " + "O_RDONLY should fail with EINVAL - %d", errno); + + tdb = tdb_open_ex("mutex-openflags2.tdb", 0, + TDB_CLEAR_IF_FIRST | + TDB_MUTEX_LOCKING, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok((tdb != NULL), "TDB_MUTEX_LOCKING with TDB_CLEAR_IF_FIRST" + "TDB_NOMMAP should work - %d", errno); + + return 0; +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + struct tdb_logging_context nolog_ctx = { log_void, NULL }; + int ret, status; + pid_t child, wait_ret; + int pipefd[2]; + char c = 0; + bool runtime_support; + + runtime_support = tdb_runtime_check_for_robust_mutexes(); + + ret = pipe(pipefd); + ok1(ret == 0); + + key.dsize = strlen("hi"); + key.dptr = discard_const_p(uint8_t, "hi"); + data.dsize = strlen("world"); + data.dptr = discard_const_p(uint8_t, "world"); + + tdb = tdb_open_ex("mutex-openflags2.tdb", 0, + TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING, + O_RDWR|O_CREAT, 0755, &nolog_ctx, NULL); + ok((tdb == NULL) && (errno == EINVAL), "TDB_MUTEX_LOCKING without " + "TDB_CLEAR_IF_FIRST should fail with EINVAL - %d", errno); + + if (!runtime_support) { + tdb = tdb_open_ex("mutex-openflags2.tdb", 0, + TDB_CLEAR_IF_FIRST| + TDB_MUTEX_LOCKING, + O_RDWR|O_CREAT, 0755, &nolog_ctx, NULL); + ok((tdb == NULL) && (errno == ENOSYS), "TDB_MUTEX_LOCKING without " + "runtime support should fail with ENOSYS - %d", errno); + + skip(1, "No robust mutex support"); + return exit_status(); + } + + child = fork(); + if (child == 0) { + return do_child(pipefd[0]); + } + + tdb = tdb_open_ex("mutex-openflags2.tdb", 0, + TDB_CLEAR_IF_FIRST| + TDB_MUTEX_LOCKING, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok((tdb != NULL), "tdb_open_ex with mutexes should succeed"); + + write(pipefd[1], &c, 1); + + wait_ret = wait(&status); + ok((wait_ret == child) && (status == 0), + "child should have exited correctly"); + + diag("done"); + return exit_status(); +} diff --git a/test/run-mutex-trylock.c b/test/run-mutex-trylock.c new file mode 100644 index 0000000..c96b635 --- /dev/null +++ b/test/run-mutex-trylock.c @@ -0,0 +1,122 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include <stdlib.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdarg.h> + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static int do_child(int tdb_flags, int to, int from) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret; + char c = 0; + + tdb = tdb_open_ex("mutex-trylock.tdb", 0, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed"); + + ret = tdb_chainlock(tdb, key); + ok(ret == 0, "tdb_chainlock should succeed"); + + write(to, &c, sizeof(c)); + + read(from, &c, sizeof(c)); + + ret = tdb_chainunlock(tdb, key); + ok(ret == 0, "tdb_chainunlock should succeed"); + + write(to, &c, sizeof(c)); + + return 0; +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret, status; + pid_t child, wait_ret; + int fromchild[2]; + int tochild[2]; + char c; + int tdb_flags; + bool runtime_support; + + runtime_support = tdb_runtime_check_for_robust_mutexes(); + + if (!runtime_support) { + skip(1, "No robust mutex support"); + return exit_status(); + } + + key.dsize = strlen("hi"); + key.dptr = discard_const_p(uint8_t, "hi"); + data.dsize = strlen("world"); + data.dptr = discard_const_p(uint8_t, "world"); + + pipe(fromchild); + pipe(tochild); + + tdb_flags = TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING| + TDB_CLEAR_IF_FIRST; + + child = fork(); + if (child == 0) { + close(fromchild[0]); + close(tochild[1]); + return do_child(tdb_flags, fromchild[1], tochild[0]); + } + close(fromchild[1]); + close(tochild[0]); + + read(fromchild[0], &c, sizeof(c)); + + tdb = tdb_open_ex("mutex-trylock.tdb", 0, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed"); + + ret = tdb_chainlock_nonblock(tdb, key); + ok(ret == -1, "tdb_chainlock_nonblock should not succeed"); + + write(tochild[1], &c, sizeof(c)); + + read(fromchild[0], &c, sizeof(c)); + + ret = tdb_chainlock_nonblock(tdb, key); + ok(ret == 0, "tdb_chainlock_nonblock should succeed"); + ret = tdb_chainunlock(tdb, key); + ok(ret == 0, "tdb_chainunlock should succeed"); + + wait_ret = wait(&status); + ok(wait_ret == child, "child should have exited correctly"); + + diag("done"); + return exit_status(); +} diff --git a/test/run-mutex1.c b/test/run-mutex1.c new file mode 100644 index 0000000..eb75946 --- /dev/null +++ b/test/run-mutex1.c @@ -0,0 +1,138 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include <stdlib.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdarg.h> + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static int do_child(int tdb_flags, int to, int from) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret; + char c = 0; + + tdb = tdb_open_ex("mutex1.tdb", 0, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed"); + + ret = tdb_chainlock(tdb, key); + ok(ret == 0, "tdb_chainlock should succeed"); + + write(to, &c, sizeof(c)); + read(from, &c, sizeof(c)); + + ret = tdb_chainunlock(tdb, key); + ok(ret == 0, "tdb_chainunlock should succeed"); + + write(to, &c, sizeof(c)); + read(from, &c, sizeof(c)); + + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); + ok(ret == 0, "tdb_allrecord_lock should succeed"); + + write(to, &c, sizeof(c)); + read(from, &c, sizeof(c)); + + ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); + ok(ret == 0, "tdb_allrecord_lock should succeed"); + + return 0; +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret, status; + pid_t child, wait_ret; + int fromchild[2]; + int tochild[2]; + char c; + int tdb_flags; + bool runtime_support; + + runtime_support = tdb_runtime_check_for_robust_mutexes(); + + if (!runtime_support) { + skip(1, "No robust mutex support"); + return exit_status(); + } + + key.dsize = strlen("hi"); + key.dptr = discard_const_p(uint8_t, "hi"); + data.dsize = strlen("world"); + data.dptr = discard_const_p(uint8_t, "world"); + + pipe(fromchild); + pipe(tochild); + + tdb_flags = TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING| + TDB_CLEAR_IF_FIRST; + + child = fork(); + if (child == 0) { + close(fromchild[0]); + close(tochild[1]); + return do_child(tdb_flags, fromchild[1], tochild[0]); + } + close(fromchild[1]); + close(tochild[0]); + + read(fromchild[0], &c, sizeof(c)); + + tdb = tdb_open_ex("mutex1.tdb", 0, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed"); + + write(tochild[1], &c, sizeof(c)); + read(fromchild[0], &c, sizeof(c)); + + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); + ok(ret == 0, "tdb_allrecord_lock should succeed"); + + ret = tdb_store(tdb, key, data, 0); + ok(ret == 0, "tdb_store should succeed"); + + ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); + ok(ret == 0, "tdb_allrecord_unlock should succeed"); + + write(tochild[1], &c, sizeof(c)); + read(fromchild[0], &c, sizeof(c)); + write(tochild[1], &c, sizeof(c)); + + ret = tdb_delete(tdb, key); + ok(ret == 0, "tdb_delete should succeed"); + + wait_ret = wait(&status); + ok(wait_ret == child, "child should have exited correctly"); + + diag("done"); + return exit_status(); +} diff --git a/test/run-nested-transactions.c b/test/run-nested-transactions.c index 8c84bca..864adf2 100644 --- a/test/run-nested-transactions.c +++ b/test/run-nested-transactions.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> #include <stdbool.h> @@ -21,7 +22,7 @@ int main(int argc, char *argv[]) plan_tests(27); key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; + key.dptr = discard_const_p(uint8_t, "hi"); tdb = tdb_open_ex("run-nested-transactions.tdb", 1024, TDB_CLEAR_IF_FIRST|TDB_DISALLOW_NESTING, @@ -30,7 +31,7 @@ int main(int argc, char *argv[]) /* Nesting disallowed. */ ok1(tdb_transaction_start(tdb) == 0); - data.dptr = (void *)"world"; + data.dptr = discard_const_p(uint8_t, "world"); data.dsize = strlen("world"); ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); data = tdb_fetch(tdb, key); diff --git a/test/run-nested-traverse.c b/test/run-nested-traverse.c index 37d57c0..22ee3e2 100644 --- a/test/run-nested-traverse.c +++ b/test/run-nested-traverse.c @@ -11,6 +11,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #undef fcntl #include <stdlib.h> @@ -74,8 +75,8 @@ int main(int argc, char *argv[]) == SUCCESS); key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; - data.dptr = (void *)"world"; + key.dptr = discard_const_p(uint8_t, "hi"); + data.dptr = discard_const_p(uint8_t, "world"); data.dsize = strlen("world"); ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); diff --git a/test/run-no-lock-during-traverse.c b/test/run-no-lock-during-traverse.c index 0a72282..737a32f 100644 --- a/test/run-no-lock-during-traverse.c +++ b/test/run-no-lock-during-traverse.c @@ -13,6 +13,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> #include "logging.h" @@ -30,7 +31,7 @@ static bool prepare_entries(struct tdb_context *tdb) key.dsize = sizeof(i); key.dptr = (void *)&i; data.dsize = strlen("world"); - data.dptr = (void *)"world"; + data.dptr = discard_const_p(uint8_t, "world"); if (tdb_store(tdb, key, data, 0) != 0) return false; diff --git a/test/run-oldhash.c b/test/run-oldhash.c index 535336c..aaee6f6 100644 --- a/test/run-oldhash.c +++ b/test/run-oldhash.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> #include "logging.h" diff --git a/test/run-open-during-transaction.c b/test/run-open-during-transaction.c index a825e62..1605376 100644 --- a/test/run-open-during-transaction.c +++ b/test/run-open-during-transaction.c @@ -20,6 +20,7 @@ static int ftruncate_check(int fd, off_t length); #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> #include <stdbool.h> @@ -153,7 +154,7 @@ int main(int argc, char *argv[]) unlock_callback = after_unlock; for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) { clear_if_first = (flags[i] & TDB_CLEAR_IF_FIRST); - diag("Test with %s and %s\n", + diag("Test with %s and %s", clear_if_first ? "CLEAR" : "DEFAULT", (flags[i] & TDB_NOMMAP) ? "no mmap" : "mmap"); unlink(TEST_DBNAME); @@ -165,8 +166,8 @@ int main(int argc, char *argv[]) opened = true; ok1(tdb_transaction_start(tdb) == 0); key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; - data.dptr = (void *)"world"; + key.dptr = discard_const_p(uint8_t, "hi"); + data.dptr = discard_const_p(uint8_t, "world"); data.dsize = strlen("world"); ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); diff --git a/test/run-readonly-check.c b/test/run-readonly-check.c index fdd9507..c5e0f7d 100644 --- a/test/run-readonly-check.c +++ b/test/run-readonly-check.c @@ -11,6 +11,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> #include "logging.h" @@ -27,9 +28,9 @@ int main(int argc, char *argv[]) ok1(tdb); key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; + key.dptr = discard_const_p(uint8_t, "hi"); data.dsize = strlen("world"); - data.dptr = (void *)"world"; + data.dptr = discard_const_p(uint8_t, "world"); ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); ok1(tdb_check(tdb, NULL, NULL) == 0); diff --git a/test/run-rescue-find_entry.c b/test/run-rescue-find_entry.c index 25f4f1c..5d6f8f7 100644 --- a/test/run-rescue-find_entry.c +++ b/test/run-rescue-find_entry.c @@ -10,6 +10,7 @@ #include "../common/check.c" #include "../common/hash.c" #include "../common/rescue.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> #include "logging.h" diff --git a/test/run-rescue.c b/test/run-rescue.c index a26c493..e43f53b 100644 --- a/test/run-rescue.c +++ b/test/run-rescue.c @@ -10,6 +10,7 @@ #include "../common/check.c" #include "../common/hash.c" #include "../common/rescue.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> #include "logging.h" @@ -29,7 +30,7 @@ static inline bool tdb_deq(TDB_DATA a, TDB_DATA b) static inline TDB_DATA tdb_mkdata(const void *p, size_t len) { TDB_DATA d; - d.dptr = (void *)p; + d.dptr = discard_const_p(uint8_t, p); d.dsize = len; return d; } @@ -53,7 +54,7 @@ static void count_records(TDB_DATA key, TDB_DATA data, void *_wd) struct walk_data *wd = _wd; if (!tdb_deq(key, wd->key) || !tdb_deq(data, wd->data)) - diag("%.*s::%.*s\n", + diag("%.*s::%.*s", (int)key.dsize, key.dptr, (int)data.dsize, data.dptr); wd->count++; } @@ -76,9 +77,9 @@ int main(int argc, char *argv[]) O_CREAT|O_TRUNC|O_RDWR, 0600, &log_ctx, NULL); wd.key.dsize = strlen("hi"); - wd.key.dptr = (void *)"hi"; + wd.key.dptr = discard_const_p(uint8_t, "hi"); wd.data.dsize = strlen("world"); - wd.data.dptr = (void *)"world"; + wd.data.dptr = discard_const_p(uint8_t, "world"); wd.count = 0; wd.fail = false; diff --git a/test/run-rwlock-check.c b/test/run-rwlock-check.c index 8b8072d..2ac9dc3 100644 --- a/test/run-rwlock-check.c +++ b/test/run-rwlock-check.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> diff --git a/test/run-summary.c b/test/run-summary.c index 2231284..8b9a1a0 100644 --- a/test/run-summary.c +++ b/test/run-summary.c @@ -10,6 +10,7 @@ #include "../common/check.c" #include "../common/hash.c" #include "../common/summary.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> diff --git a/test/run-transaction-expand.c b/test/run-transaction-expand.c index 1271d92..d36b894 100644 --- a/test/run-transaction-expand.c +++ b/test/run-transaction-expand.c @@ -37,6 +37,7 @@ static inline int fake_fdatasync(int fd) #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> #include "logging.h" @@ -46,7 +47,7 @@ static void write_record(struct tdb_context *tdb, size_t extra_len, { TDB_DATA key; key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; + key.dptr = discard_const_p(uint8_t, "hi"); data->dsize += extra_len; tdb_transaction_start(tdb); @@ -73,6 +74,11 @@ int main(int argc, char *argv[]) data.dsize = 0; data.dptr = calloc(1000, getpagesize()); + if (data.dptr == NULL) { + diag("Unable to allocate memory for data.dptr"); + tdb_close(tdb); + exit(1); + } /* Simulate a slowly growing record. */ for (i = 0; i < 1000; i++) diff --git a/test/run-traverse-in-transaction.c b/test/run-traverse-in-transaction.c index bcdc354..17d6412 100644 --- a/test/run-traverse-in-transaction.c +++ b/test/run-traverse-in-transaction.c @@ -11,6 +11,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #undef fcntl_with_lockcheck #include <stdlib.h> @@ -54,8 +55,8 @@ int main(int argc, char *argv[]) ok1(tdb); key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; - data.dptr = (void *)"world"; + key.dptr = discard_const_p(uint8_t, "hi"); + data.dptr = discard_const_p(uint8_t, "world"); data.dsize = strlen("world"); ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); diff --git a/test/run-wronghash-fail.c b/test/run-wronghash-fail.c index 74bbc30..c44b0f5 100644 --- a/test/run-wronghash-fail.c +++ b/test/run-wronghash-fail.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> @@ -34,7 +35,7 @@ int main(int argc, char *argv[]) O_CREAT|O_RDWR|O_TRUNC, 0600, &log_ctx, NULL); ok1(tdb); ok1(log_count == 0); - d.dptr = (void *)"Hello"; + d.dptr = discard_const_p(uint8_t, "Hello"); d.dsize = 5; ok1(tdb_store(tdb, d, d, TDB_INSERT) == 0); tdb_close(tdb); diff --git a/test/run-zero-append.c b/test/run-zero-append.c index 36bf699..f9eba1b 100644 --- a/test/run-zero-append.c +++ b/test/run-zero-append.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> #include "logging.h" @@ -25,8 +26,8 @@ int main(int argc, char *argv[]) /* Tickle bug on appending zero length buffer to zero length buffer. */ key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; - data.dptr = (void *)"world"; + key.dptr = discard_const_p(uint8_t, "hi"); + data.dptr = discard_const_p(uint8_t, "world"); data.dsize = 0; ok1(tdb_append(tdb, key, data) == 0); @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include <stdlib.h> #include "logging.h" @@ -24,9 +25,9 @@ int main(int argc, char *argv[]) ok1(tdb); key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; + key.dptr = discard_const_p(uint8_t, "hi"); data.dsize = strlen("world"); - data.dptr = (void *)"world"; + data.dptr = discard_const_p(uint8_t, "world"); ok1(tdb_store(tdb, key, data, TDB_MODIFY) < 0); ok1(tdb_error(tdb) == TDB_ERR_NOEXIST); diff --git a/test/tap-interface.h b/test/tap-interface.h index d9ed6e8..8f742d8 100644 --- a/test/tap-interface.h +++ b/test/tap-interface.h @@ -31,9 +31,28 @@ #endif #define plan_tests(num) -#define ok(e, ...) do { if (e) { (void)printf("."); } else { fprintf(stderr, __VA_ARGS__); exit(1); } } while(0) +#define fail(...) do { \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + fflush(stderr); \ + exit(1); \ +} while(0) +#define diag(...) do { \ + fprintf(stdout, __VA_ARGS__); \ + fprintf(stdout, "\n"); \ + fflush(stdout); \ +} while(0) +#define pass(...) do { \ + fprintf(stdout, "."); \ + fflush(stdout); \ +} while(0) +#define ok(e, ...) do { \ + if (e) { \ + pass(); \ + } else { \ + fail(__VA_ARGS__); \ + } \ +} while(0) #define ok1(e) ok((e), "%s:%s", __location__, #e) -#define pass(...) printf(".") -#define fail(...) do { fprintf(stderr, __VA_ARGS__); exit(1); } while(0) -#define diag printf +#define skip(n, ...) diag(__VA_ARGS__) #define exit_status() 0 diff --git a/tools/tdbbackup.c b/tools/tdbbackup.c index 276a281..eb33e25 100644 --- a/tools/tdbbackup.c +++ b/tools/tdbbackup.c @@ -104,7 +104,8 @@ static int test_fn(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, void *state) only doing the backup if its OK this function is also used for restore */ -static int backup_tdb(const char *old_name, const char *new_name, int hash_size) +static int backup_tdb(const char *old_name, const char *new_name, + int hash_size, int nolock) { TDB_CONTEXT *tdb; TDB_CONTEXT *tdb_new; @@ -122,7 +123,8 @@ static int backup_tdb(const char *old_name, const char *new_name, int hash_size) } /* open the old tdb */ - tdb = tdb_open_ex(old_name, 0, 0, + tdb = tdb_open_ex(old_name, 0, + TDB_DEFAULT | (nolock ? TDB_NOLOCK : 0), O_RDWR, 0, &log_ctx, NULL); if (!tdb) { printf("Failed to open %s\n", old_name); @@ -249,7 +251,7 @@ static int verify_tdb(const char *fname, const char *bak_name) /* count is < 0 means an error */ if (count < 0) { printf("restoring %s\n", fname); - return backup_tdb(bak_name, fname, 0); + return backup_tdb(bak_name, fname, 0, 0); } printf("%s : %d records\n", fname, count); @@ -279,6 +281,7 @@ static void usage(void) printf(" -s suffix set the backup suffix\n"); printf(" -v verify mode (restore if corrupt)\n"); printf(" -n hashsize set the new hash size for the backup\n"); + printf(" -l open without locking to back up mutex dbs\n"); } int main(int argc, char *argv[]) @@ -288,11 +291,12 @@ static void usage(void) int c; int verify = 0; int hashsize = 0; + int nolock = 0; const char *suffix = ".bak"; log_ctx.log_fn = tdb_log; - while ((c = getopt(argc, argv, "vhs:n:")) != -1) { + while ((c = getopt(argc, argv, "vhs:n:l")) != -1) { switch (c) { case 'h': usage(); @@ -306,6 +310,9 @@ static void usage(void) case 'n': hashsize = atoi(optarg); break; + case 'l': + nolock = 1; + break; } } @@ -329,7 +336,8 @@ static void usage(void) } } else { if (file_newer(fname, bak_name) && - backup_tdb(fname, bak_name, hashsize) != 0) { + backup_tdb(fname, bak_name, hashsize, + nolock) != 0) { ret = 1; } } diff --git a/tools/tdbdump.c b/tools/tdbdump.c index e66ea56..9a0a7fe 100644 --- a/tools/tdbdump.c +++ b/tools/tdbdump.c @@ -99,15 +99,23 @@ static int dump_tdb(const char *fname, const char *keyname, bool emergency) TDB_CONTEXT *tdb; TDB_DATA key, value; struct tdb_logging_context logfn = { log_stderr }; + int tdb_flags = TDB_DEFAULT; - tdb = tdb_open_ex(fname, 0, 0, O_RDONLY, 0, &logfn, NULL); + /* + * Note: that O_RDONLY implies TDB_NOLOCK, but we want to make it + * explicit as it's important when working on databases which were + * created with mutex locking. + */ + tdb_flags |= TDB_NOLOCK; + + tdb = tdb_open_ex(fname, 0, tdb_flags, O_RDONLY, 0, &logfn, NULL); if (!tdb) { printf("Failed to open %s\n", fname); return 1; } if (emergency) { - return tdb_rescue(tdb, emergency_walk, keyname) == 0; + return tdb_rescue(tdb, emergency_walk, discard_const(keyname)) == 0; } if (!keyname) { return tdb_traverse(tdb, traverse_fn, NULL) == -1 ? 1 : 0; diff --git a/tools/tdbtool.c b/tools/tdbtool.c index d007796..2f93e33 100644 --- a/tools/tdbtool.c +++ b/tools/tdbtool.c @@ -36,6 +36,7 @@ char *line; TDB_DATA iterate_kbuf; char cmdline[1024]; static int disable_mmap; +static int disable_lock; enum commands { CMD_CREATE_TDB, @@ -118,6 +119,33 @@ static double _end_timer(void) } #ifdef PRINTF_ATTRIBUTE +static void tdb_log_open(struct tdb_context *tdb, enum tdb_debug_level level, + const char *format, ...) PRINTF_ATTRIBUTE(3,4); +#endif +static void tdb_log_open(struct tdb_context *tdb, enum tdb_debug_level level, + const char *format, ...) +{ + const char *mutex_msg = + "Can use mutexes only with MUTEX_LOCKING or NOLOCK\n"; + char *p; + va_list ap; + + p = strstr(format, mutex_msg); + if (p != NULL) { + /* + * Yes, this is a hack, but we don't want to see this + * message on first open, but we want to see + * everything else. + */ + return; + } + + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); +} + +#ifdef PRINTF_ATTRIBUTE static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const char *format, ...) PRINTF_ATTRIBUTE(3,4); #endif static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const char *format, ...) @@ -222,11 +250,14 @@ static void terror(const char *why) static void create_tdb(const char *tdbname) { - struct tdb_logging_context log_ctx; + struct tdb_logging_context log_ctx = { NULL, NULL}; log_ctx.log_fn = tdb_log; if (tdb) tdb_close(tdb); - tdb = tdb_open_ex(tdbname, 0, TDB_CLEAR_IF_FIRST | (disable_mmap?TDB_NOMMAP:0), + tdb = tdb_open_ex(tdbname, 0, + TDB_CLEAR_IF_FIRST | + (disable_mmap?TDB_NOMMAP:0) | + (disable_lock?TDB_NOLOCK:0), O_RDWR | O_CREAT | O_TRUNC, 0600, &log_ctx, NULL); if (!tdb) { printf("Could not create %s: %s\n", tdbname, strerror(errno)); @@ -235,12 +266,32 @@ static void create_tdb(const char *tdbname) static void open_tdb(const char *tdbname) { - struct tdb_logging_context log_ctx; - log_ctx.log_fn = tdb_log; + struct tdb_logging_context log_ctx = { NULL, NULL }; + log_ctx.log_fn = tdb_log_open; if (tdb) tdb_close(tdb); - tdb = tdb_open_ex(tdbname, 0, disable_mmap?TDB_NOMMAP:0, O_RDWR, 0600, + tdb = tdb_open_ex(tdbname, 0, + (disable_mmap?TDB_NOMMAP:0) | + (disable_lock?TDB_NOLOCK:0), + O_RDWR, 0600, &log_ctx, NULL); + + log_ctx.log_fn = tdb_log; + if (tdb != NULL) { + tdb_set_logging_function(tdb, &log_ctx); + } + + if ((tdb == NULL) && (errno == EINVAL)) { + /* + * Retry NOLOCK and readonly. There we want to see all + * error messages. + */ + tdb = tdb_open_ex(tdbname, 0, + (disable_mmap?TDB_NOMMAP:0) |TDB_NOLOCK, + O_RDONLY, 0600, + &log_ctx, NULL); + } + if (!tdb) { printf("Could not open %s: %s\n", tdbname, strerror(errno)); } @@ -736,6 +787,13 @@ int main(int argc, char *argv[]) arg2 = NULL; arg2len = 0; + if (argv[1] && (strcmp(argv[1], "-l") == 0)) { + disable_lock = 1; + argv[1] = argv[0]; + argv += 1; + argc -= 1; + } + if (argv[1]) { cmdname = "open"; arg1 = argv[1]; diff --git a/tools/tdbtorture.c b/tools/tdbtorture.c index a23d154..3e26f65 100644 --- a/tools/tdbtorture.c +++ b/tools/tdbtorture.c @@ -33,6 +33,7 @@ static int always_transaction = 0; static int hash_size = 2; static int loopnum; static int count_pipe; +static bool mutex = false; static struct tdb_logging_context log_ctx; #ifdef PRINTF_ATTRIBUTE @@ -119,6 +120,7 @@ static void addrec_db(void) #if TRANSACTION_PROB if (in_transaction == 0 && + ((tdb_get_flags(db) & TDB_MUTEX_LOCKING) == 0) && (always_transaction || random() % TRANSACTION_PROB == 0)) { if (tdb_transaction_start(db) != 0) { fatal("tdb_transaction_start failed"); @@ -216,7 +218,7 @@ static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, static void usage(void) { - printf("Usage: tdbtorture [-t] [-k] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n"); + printf("Usage: tdbtorture [-t] [-k] [-m] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n"); exit(0); } @@ -230,7 +232,13 @@ static void send_count_and_suicide(int sig) static int run_child(const char *filename, int i, int seed, unsigned num_loops, unsigned start) { - db = tdb_open_ex(filename, hash_size, TDB_DEFAULT, + int tdb_flags = TDB_DEFAULT|TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH; + + if (mutex) { + tdb_flags |= TDB_MUTEX_LOCKING; + } + + db = tdb_open_ex(filename, hash_size, tdb_flags, O_RDWR | O_CREAT, 0600, &log_ctx, NULL); if (!db) { fatal("db open failed"); @@ -302,7 +310,7 @@ int main(int argc, char * const *argv) log_ctx.log_fn = tdb_log; - while ((c = getopt(argc, argv, "n:l:s:H:thk")) != -1) { + while ((c = getopt(argc, argv, "n:l:s:H:thkm")) != -1) { switch (c) { case 'n': num_procs = strtol(optarg, NULL, 0); @@ -322,6 +330,13 @@ int main(int argc, char * const *argv) case 'k': kill_random = 1; break; + case 'm': + mutex = tdb_runtime_check_for_robust_mutexes(); + if (!mutex) { + printf("tdb_runtime_check_for_robust_mutexes() returned false\n"); + exit(1); + } + break; default: usage(); } @@ -342,7 +357,15 @@ int main(int argc, char * const *argv) } pids = (pid_t *)calloc(sizeof(pid_t), num_procs); + if (pids == NULL) { + perror("Unable to allocate memory for pids"); + exit(1); + } done = (int *)calloc(sizeof(int), num_procs); + if (done == NULL) { + perror("Unable to allocate memory for done"); + exit(1); + } if (pipe(pfds) != 0) { perror("Creating pipe"); @@ -435,7 +458,13 @@ int main(int argc, char * const *argv) done: if (error_count == 0) { - db = tdb_open_ex(test_tdb, hash_size, TDB_DEFAULT, + int tdb_flags = TDB_DEFAULT; + + if (mutex) { + tdb_flags |= TDB_NOLOCK; + } + + db = tdb_open_ex(test_tdb, hash_size, tdb_flags, O_RDWR, 0, &log_ctx, NULL); if (!db) { fatal("db open failed\n"); @@ -1,7 +1,7 @@ #!/usr/bin/env python APPNAME = 'tdb' -VERSION = '1.2.12' +VERSION = '1.3.0' blddir = 'bin' @@ -17,10 +17,47 @@ import wafsamba, samba_dist, Options, Logs samba_dist.DIST_DIRS('lib/tdb:. lib/replace:lib/replace buildtools:buildtools') +tdb1_unit_tests = [ + 'run-3G-file', + 'run-bad-tdb-header', + 'run', + 'run-check', + 'run-corrupt', + 'run-die-during-transaction', + 'run-endian', + 'run-incompatible', + 'run-nested-transactions', + 'run-nested-traverse', + 'run-no-lock-during-traverse', + 'run-oldhash', + 'run-open-during-transaction', + 'run-readonly-check', + 'run-rescue', + 'run-rescue-find_entry', + 'run-rwlock-check', + 'run-summary', + 'run-transaction-expand', + 'run-traverse-in-transaction', + 'run-wronghash-fail', + 'run-zero-append', + 'run-marklock-deadlock', + 'run-mutex-openflags2', + 'run-mutex-trylock', + 'run-mutex-allrecord-bench', + 'run-mutex-allrecord-trylock', + 'run-mutex-allrecord-block', + 'run-mutex-die', + 'run-mutex1', +] + def set_options(opt): opt.BUILTIN_DEFAULT('replace') opt.PRIVATE_EXTENSION_DEFAULT('tdb', noextension='tdb') opt.RECURSE('lib/replace') + opt.add_option('--disable-tdb-mutex-locking', + help=("Disable the use of pthread robust mutexes"), + action="store_true", dest='disable_tdb_mutex_locking', + default=False) if opt.IN_LAUNCH_DIR(): opt.add_option('--disable-python', help=("disable the pytdb module"), @@ -28,6 +65,11 @@ def set_options(opt): def configure(conf): + conf.env.disable_tdb_mutex_locking = getattr(Options.options, + 'disable_tdb_mutex_locking', + False) + if not conf.env.disable_tdb_mutex_locking: + conf.env.replace_add_global_pthread = True conf.RECURSE('lib/replace') conf.env.standalone_tdb = conf.IN_LAUNCH_DIR() @@ -43,6 +85,11 @@ def configure(conf): conf.env.disable_python = getattr(Options.options, 'disable_python', False) + if (conf.CONFIG_SET('HAVE_ROBUST_MUTEXES') and + conf.env.building_tdb and + not conf.env.disable_tdb_mutex_locking): + conf.define('USE_TDB_MUTEX_LOCKING', 1) + conf.CHECK_XSLTPROC_MANPAGES() if not conf.env.disable_python: @@ -62,10 +109,12 @@ def configure(conf): def build(bld): bld.RECURSE('lib/replace') - COMMON_SRC = bld.SUBDIR('common', - '''check.c error.c tdb.c traverse.c - freelistcheck.c lock.c dump.c freelist.c - io.c open.c transaction.c hash.c summary.c rescue.c''') + COMMON_FILES='''check.c error.c tdb.c traverse.c + freelistcheck.c lock.c dump.c freelist.c + io.c open.c transaction.c hash.c summary.c rescue.c + mutex.c''' + + COMMON_SRC = bld.SUBDIR('common', COMMON_FILES) if bld.env.standalone_tdb: bld.env.PKGCONFIGDIR = '${LIBDIR}/pkgconfig' @@ -74,9 +123,15 @@ def build(bld): private_library = True if not bld.CONFIG_SET('USING_SYSTEM_TDB'): + + tdb_deps = 'replace' + + if bld.CONFIG_SET('USE_TDB_MUTEX_LOCKING'): + tdb_deps += ' pthread' + bld.SAMBA_LIBRARY('tdb', COMMON_SRC, - deps='replace', + deps=tdb_deps, includes='include', abi_directory='ABI', abi_match='tdb_*', @@ -112,53 +167,14 @@ def build(bld): # FIXME: This hardcoded list is stupid, stupid, stupid. bld.SAMBA_SUBSYSTEM('tdb-test-helpers', 'test/external-agent.c test/lock-tracking.c test/logging.c', - 'replace', + tdb_deps, includes='include') - bld.SAMBA_BINARY('tdb1-run-3G-file', 'test/run-3G-file.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-bad-tdb-header', 'test/run-bad-tdb-header.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run', 'test/run.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-check', 'test/run-check.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-corrupt', 'test/run-corrupt.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-die-during-transaction', 'test/run-die-during-transaction.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-endian', 'test/run-endian.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-incompatible', 'test/run-incompatible.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-nested-transactions', 'test/run-nested-transactions.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-nested-traverse', 'test/run-nested-traverse.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-no-lock-during-traverse', 'test/run-no-lock-during-traverse.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-oldhash', 'test/run-oldhash.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-open-during-transaction', 'test/run-open-during-transaction.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-readonly-check', 'test/run-readonly-check.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-rescue', 'test/run-rescue.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-rescue-find_entry', 'test/run-rescue-find_entry.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-rwlock-check', 'test/run-rwlock-check.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-summary', 'test/run-summary.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-transaction-expand', 'test/run-transaction-expand.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-traverse-in-transaction', 'test/run-traverse-in-transaction.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-wronghash-fail', 'test/run-wronghash-fail.c', - 'replace tdb-test-helpers', includes='include', install=False) - bld.SAMBA_BINARY('tdb1-run-zero-append', 'test/run-zero-append.c', - 'replace tdb-test-helpers', includes='include', install=False) + for t in tdb1_unit_tests: + b = "tdb1-" + t + s = "test/" + t + ".c" + bld.SAMBA_BINARY(b, s, 'replace tdb-test-helpers', + includes='include', install=False) if not bld.CONFIG_SET('USING_SYSTEM_PYTDB'): bld.SAMBA_PYTHON('pytdb', @@ -189,7 +205,8 @@ def testonly(ctx): if not os.path.exists(link): os.symlink(os.path.abspath(os.path.join(env.cwd, 'test')), link) - for f in 'tdb1-run-3G-file', 'tdb1-run-bad-tdb-header', 'tdb1-run', 'tdb1-run-check', 'tdb1-run-corrupt', 'tdb1-run-die-during-transaction', 'tdb1-run-endian', 'tdb1-run-incompatible', 'tdb1-run-nested-transactions', 'tdb1-run-nested-traverse', 'tdb1-run-no-lock-during-traverse', 'tdb1-run-oldhash', 'tdb1-run-open-during-transaction', 'tdb1-run-readonly-check', 'tdb1-run-rescue', 'tdb1-run-rescue-find_entry', 'tdb1-run-rwlock-check', 'tdb1-run-summary', 'tdb1-run-transaction-expand', 'tdb1-run-traverse-in-transaction', 'tdb1-run-wronghash-fail', 'tdb1-run-zero-append': + for t in tdb1_unit_tests: + f = "tdb1-" + t cmd = "cd " + testdir + " && " + os.path.abspath(os.path.join(Utils.g_module.blddir, f)) + " > test-output 2>&1" print("..." + f) ret = samba_utils.RUN_COMMAND(cmd) |