summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2014-06-06 11:42:25 +0200
committerLennart Poettering <lennart@poettering.net>2014-06-06 14:37:40 +0200
commitd6797c920e9eb70f46a893c00fdd9ecb86d15f84 (patch)
tree7029ba9333ceb289752c85f154f4fa1350fa941d
parentc8835999c33c0443bf91e1a8fa6dd716a8ff0b0f (diff)
namespace: beef up read-only bind mount logic
Instead of blindly creating another bind mount for read-only mounts, check if there's already one we can use, and if so, use it. Also, recursively mark all submounts read-only too. Also, ignore autofs mounts when remounting read-only unless they are already triggered.
-rw-r--r--man/systemd.exec.xml14
-rw-r--r--src/core/namespace.c28
-rw-r--r--src/nspawn/nspawn.c25
-rw-r--r--src/shared/util.c145
-rw-r--r--src/shared/util.h2
5 files changed, 183 insertions, 31 deletions
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index c5bb55c55..c419424d9 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -777,8 +777,8 @@
<term><varname>ReadOnlyDirectories=</varname></term>
<term><varname>InaccessibleDirectories=</varname></term>
- <listitem><para>Sets up a new
- file system namespace for executed
+ <listitem><para>Sets up a new file
+ system namespace for executed
processes. These options may be used
to limit access a process might have
to the main file system
@@ -799,16 +799,14 @@
processes inside the namespace. Note
that restricting access with these
options does not extend to submounts
- of a directory. You must list
- submounts separately in these settings
- to ensure the same limited
- access. These options may be specified
+ of a directory that are created later
+ on. These options may be specified
more than once in which case all
directories listed will have limited
access from within the namespace. If
the empty string is assigned to this
- option, the specific list is reset, and
- all prior assignments have no
+ option, the specific list is reset,
+ and all prior assignments have no
effect.</para>
<para>Paths in
<varname>ReadOnlyDirectories=</varname>
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 43b904580..f11065ee4 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -280,9 +280,6 @@ static int apply_mount(
switch (m->mode) {
- case PRIVATE_DEV:
- return mount_dev(m);
-
case INACCESSIBLE:
/* First, get rid of everything that is below if there
@@ -295,8 +292,9 @@ static int apply_mount(
case READONLY:
case READWRITE:
- what = m->path;
- break;
+ /* Nothing to mount here, we just later toggle the
+ * MS_RDONLY bit for the mount point */
+ return 0;
case PRIVATE_TMP:
what = tmp_dir;
@@ -306,6 +304,9 @@ static int apply_mount(
what = var_tmp_dir;
break;
+ case PRIVATE_DEV:
+ return mount_dev(m);
+
default:
assert_not_reached("Unknown mode");
}
@@ -316,7 +317,7 @@ static int apply_mount(
if (r >= 0)
log_debug("Successfully mounted %s to %s", what, m->path);
else if (m->ignore && errno == ENOENT)
- r = 0;
+ return 0;
return r;
}
@@ -326,14 +327,17 @@ static int make_read_only(BindMount *m) {
assert(m);
- if (m->mode != INACCESSIBLE && m->mode != READONLY)
- return 0;
+ if (IN_SET(m->mode, INACCESSIBLE, READONLY))
+ r = bind_remount_recursive(m->path, true);
+ else if (m->mode == READWRITE)
+ r = bind_remount_recursive(m->path, false);
+ else
+ r = 0;
- r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
- if (r < 0 && !(m->ignore && errno == ENOENT))
- return -errno;
+ if (m->ignore && r == -ENOENT)
+ return 0;
- return 0;
+ return r;
}
int setup_namespace(
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index a1d77244f..19fb086e7 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -635,7 +635,7 @@ static int mount_all(const char *dest) {
return r;
}
-static int mount_binds(const char *dest, char **l, unsigned long flags) {
+static int mount_binds(const char *dest, char **l, bool ro) {
char **x, **y;
STRV_FOREACH_PAIR(x, y, l) {
@@ -686,9 +686,12 @@ static int mount_binds(const char *dest, char **l, unsigned long flags) {
return -errno;
}
- if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
- log_error("mount(%s) failed: %m", where);
- return -errno;
+ if (ro) {
+ r = bind_remount_recursive(where, true);
+ if (r < 0) {
+ log_error("Read-Only bind mount failed: %s", strerror(-r));
+ return r;
+ }
}
}
@@ -2941,15 +2944,17 @@ int main(int argc, char *argv[]) {
/* Turn directory into bind mount */
if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
- log_error("Failed to make bind mount.");
+ log_error("Failed to make bind mount: %m");
goto child_fail;
}
- if (arg_read_only)
- if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
- log_error("Failed to make read-only.");
+ if (arg_read_only) {
+ k = bind_remount_recursive(arg_directory, true);
+ if (k < 0) {
+ log_error("Failed to make tree read-only: %s", strerror(-k));
goto child_fail;
}
+ }
if (mount_all(arg_directory) < 0)
goto child_fail;
@@ -2985,10 +2990,10 @@ int main(int argc, char *argv[]) {
if (setup_journal(arg_directory) < 0)
goto child_fail;
- if (mount_binds(arg_directory, arg_bind, 0) < 0)
+ if (mount_binds(arg_directory, arg_bind, false) < 0)
goto child_fail;
- if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
+ if (mount_binds(arg_directory, arg_bind_ro, true) < 0)
goto child_fail;
if (setup_kdbus(arg_directory, kdbus_domain) < 0)
diff --git a/src/shared/util.c b/src/shared/util.c
index 011fb36f4..7fa3742b4 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -6540,7 +6540,6 @@ int umount_recursive(const char *prefix, int flags) {
"%*s" /* (11) mount options 2 */
"%*[^\n]", /* some rubbish at the end */
&path);
-
if (k != 1) {
if (k == EOF)
break;
@@ -6570,3 +6569,147 @@ int umount_recursive(const char *prefix, int flags) {
return r ? r : n;
}
+
+int bind_remount_recursive(const char *prefix, bool ro) {
+ _cleanup_set_free_free_ Set *done = NULL;
+ _cleanup_free_ char *cleaned = NULL;
+ int r;
+
+ /* Recursively remount a directory (and all its submounts)
+ * read-only or read-write. If the directory is already
+ * mounted, we reuse the mount and simply mark it
+ * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
+ * operation). If it isn't we first make it one. Afterwards we
+ * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
+ * submounts we can access, too. When mounts are stacked on
+ * the same mount point we only care for each individual
+ * "top-level" mount on each point, as we cannot
+ * influence/access the underlying mounts anyway. We do not
+ * have any effect on future submounts that might get
+ * propagated, they migt be writable. This includes future
+ * submounts that have been triggered via autofs. */
+
+ cleaned = strdup(prefix);
+ if (!cleaned)
+ return -ENOMEM;
+
+ path_kill_slashes(cleaned);
+
+ done = set_new(string_hash_func, string_compare_func);
+ if (!done)
+ return -ENOMEM;
+
+ for (;;) {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+ _cleanup_set_free_free_ Set *todo = NULL;
+ bool top_autofs = false;
+ char *x;
+
+ todo = set_new(string_hash_func, string_compare_func);
+ if (!todo)
+ return -ENOMEM;
+
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
+ int k;
+
+ k = fscanf(proc_self_mountinfo,
+ "%*s " /* (1) mount id */
+ "%*s " /* (2) parent id */
+ "%*s " /* (3) major:minor */
+ "%*s " /* (4) root */
+ "%ms " /* (5) mount point */
+ "%*s" /* (6) mount options (superblock) */
+ "%*[^-]" /* (7) optional fields */
+ "- " /* (8) separator */
+ "%ms " /* (9) file system type */
+ "%*s" /* (10) mount source */
+ "%*s" /* (11) mount options (bind mount) */
+ "%*[^\n]", /* some rubbish at the end */
+ &path,
+ &type);
+ if (k != 2) {
+ if (k == EOF)
+ break;
+
+ continue;
+ }
+
+ p = cunescape(path);
+ if (!p)
+ return -ENOMEM;
+
+ /* Let's ignore autofs mounts. If they aren't
+ * triggered yet, we want to avoid triggering
+ * them, as we don't make any guarantees for
+ * future submounts anyway. If they are
+ * already triggered, then we will find
+ * another entry for this. */
+ if (streq(type, "autofs")) {
+ top_autofs = top_autofs || path_equal(cleaned, p);
+ continue;
+ }
+
+ if (path_startswith(p, cleaned) &&
+ !set_contains(done, p)) {
+
+ r = set_consume(todo, p);
+ p = NULL;
+
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* If we have no submounts to process anymore and if
+ * the root is either already done, or an autofs, we
+ * are done */
+ if (set_isempty(todo) &&
+ (top_autofs || set_contains(done, cleaned)))
+ return 0;
+
+ if (!set_contains(done, cleaned) &&
+ !set_contains(todo, cleaned)) {
+ /* The prefix directory itself is not yet a
+ * mount, make it one. */
+ if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
+ return -errno;
+
+ if (mount(NULL, prefix, NULL, MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
+ return -errno;
+
+ x = strdup(cleaned);
+ if (!x)
+ return -ENOMEM;
+
+ r = set_consume(done, x);
+ if (r < 0)
+ return r;
+ }
+
+ while ((x = set_steal_first(todo))) {
+
+ r = set_consume(done, x);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+
+ if (mount(NULL, x, NULL, MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) {
+
+ /* Deal with mount points that are
+ * obstructed by a later mount */
+
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ }
+ }
+}
diff --git a/src/shared/util.h b/src/shared/util.h
index ac851fa4d..952239e7e 100644
--- a/src/shared/util.h
+++ b/src/shared/util.h
@@ -944,3 +944,5 @@ union file_handle_union {
int update_reboot_param_file(const char *param);
int umount_recursive(const char *target, int flags);
+
+int bind_remount_recursive(const char *prefix, bool ro);