summaryrefslogtreecommitdiff
path: root/src/core/mount-setup.c
diff options
context:
space:
mode:
authorTejun Heo <htejun@fb.com>2016-11-21 14:45:53 -0500
committerSven Eden <yamakuzure@gmx.net>2017-07-17 17:58:35 +0200
commitf000ad56fe2346cc8391eeb684681e707711b3ce (patch)
treec87d454e456ce891cfc3ed25d1adff39d62d2c43 /src/core/mount-setup.c
parent2f30582bd3d00ce582d190ddb379e46e0a67f813 (diff)
core: make hybrid cgroup unified mode keep compat /sys/fs/cgroup/elogind hierarchy
Currently the hybrid mode mounts cgroup v2 on /sys/fs/cgroup instead of the v1 name=elogind hierarchy. While this works fine for elogind itself, it breaks tools which expect cgroup v1 hierarchy on /sys/fs/cgroup/elogind. This patch updates the hybrid mode so that it mounts v2 hierarchy on /sys/fs/cgroup/unified and keeps v1 "name=elogind" hierarchy on /sys/fs/cgroup/elogind for compatibility. elogind itself doesn't depend on the "name=elogind" hierarchy at all. All operations take place on the v2 hierarchy as before but the v1 hierarchy is kept in sync so that any tools which expect it to be there can keep doing so. This allows elogind to take advantage of cgroup v2 process management without requiring other tools to be aware of the hybrid mode. The hybrid mode is implemented by mapping the special elogind controller to /sys/fs/cgroup/unified and making the basic cgroup utility operations - cg_attach(), cg_create(), cg_rmdir() and cg_trim() - also operate on the /sys/fs/cgroup/elogind hierarchy whenever the cgroup2 hierarchy is updated. While a bit messy, this will allow dropping complications from using cgroup v1 for process management a lot sooner than otherwise possible which should make it a net gain in terms of maintainability. v2: Fixed !cgns breakage reported by @evverx and renamed the unified mount point to /sys/fs/cgroup/unified as suggested by @brauner. v3: chown the compat hierarchy too on delegation. Suggested by @evverx. v4: [zj] - drop the change to default, full "legacy" is still the default.
Diffstat (limited to 'src/core/mount-setup.c')
-rw-r--r--src/core/mount-setup.c28
1 files changed, 13 insertions, 15 deletions
diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c
index 7dfaf27df..65165b7ef 100644
--- a/src/core/mount-setup.c
+++ b/src/core/mount-setup.c
@@ -105,10 +105,10 @@ static const MountPoint mount_table[] = {
{ "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
#if 0 /// UNNEEDED by elogind
- { "cgroup", "/sys/fs/cgroup/systemd", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ { "cgroup", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
cg_is_unified_systemd_controller_wanted, MNT_IN_CONTAINER },
{ "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
- cg_is_legacy_systemd_controller_wanted, MNT_IN_CONTAINER },
+ cg_is_legacy_wanted, MNT_IN_CONTAINER },
{ "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
#else
{ "cgroup", "/sys/fs/cgroup/elogind", "cgroup", "none,name=elogind,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
@@ -377,7 +377,6 @@ int mount_setup(bool loaded_policy) {
int r = 0;
r = mount_points_setup(ELEMENTSOF(mount_table), loaded_policy);
-
if (r < 0)
return r;
@@ -409,26 +408,25 @@ int mount_setup(bool loaded_policy) {
* udevd. */
dev_setup(NULL, UID_INVALID, GID_INVALID);
- /* Mark the root directory as shared in regards to mount
- * propagation. The kernel defaults to "private", but we think
- * it makes more sense to have a default of "shared" so that
- * nspawn and the container tools work out of the box. If
- * specific setups need other settings they can reset the
- * propagation mode to private if needed. */
+ /* Mark the root directory as shared in regards to mount propagation. The kernel defaults to "private", but we
+ * think it makes more sense to have a default of "shared" so that nspawn and the container tools work out of
+ * the box. If specific setups need other settings they can reset the propagation mode to private if
+ * needed. Note that we set this only when we are invoked directly by the kernel. If we are invoked by a
+ * container manager we assume the container manager knows what it is doing (for example, because it set up
+ * some directories with different propagation modes). */
if (detect_container() <= 0)
if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
log_warning_errno(errno, "Failed to set up the root directory for shared mount propagation: %m");
- /* Create a few directories we always want around, Note that
- * sd_booted() checks for /run/systemd/system, so this mkdir
- * really needs to stay for good, otherwise software that
- * copied sd-daemon.c into their sources will misdetect
- * systemd. */
#endif // 0
+ /* Create a few directories we always want around, Note that sd_booted() checks for /run/systemd/system, so
+ * this mkdir really needs to stay for good, otherwise software that copied sd-daemon.c into their sources will
+ * misdetect systemd. */
(void) mkdir_label("/run/systemd", 0755);
(void) mkdir_label("/run/systemd/system", 0755);
- (void) mkdir_label("/run/systemd/inaccessible", 0000);
+
/* Set up inaccessible items */
+ (void) mkdir_label("/run/systemd/inaccessible", 0000);
(void) mknod("/run/systemd/inaccessible/reg", S_IFREG | 0000, 0);
(void) mkdir_label("/run/systemd/inaccessible/dir", 0000);
(void) mknod("/run/systemd/inaccessible/chr", S_IFCHR | 0000, makedev(0, 0));