From e89725d34d1d43980147dfd71d80760443a750b5 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Tue, 5 Sep 2017 19:27:53 +0200 Subject: cgroup, unit, fragment parser: make use of new firewall functions --- src/basic/cgroup-util.c | 2 +- src/basic/fs-util.c | 1 + src/basic/terminal-util.c | 1 + src/core/cgroup.c | 189 +++++++++++++++++++++++++++++++---- src/core/cgroup.h | 18 +++- src/libelogind/sd-bus/sd-bus.c | 1 + src/libelogind/sd-daemon/sd-daemon.c | 1 + src/libelogind/sd-event/test-event.c | 1 + src/login/logind-session.c | 1 + src/login/logind.c | 1 + src/shared/udev-util.c | 1 + src/test/test-cgroup.c | 1 + src/test/test-log.c | 1 + src/test/test-signal-util.c | 1 + 14 files changed, 199 insertions(+), 21 deletions(-) diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 83fd7e5f8..a991f29da 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -1007,7 +1007,7 @@ int cg_get_xattr(const char *controller, const char *path, const char *name, voi int cg_pid_get_path(const char *controller, pid_t pid, char **path) { _cleanup_fclose_ FILE *f = NULL; char line[LINE_MAX]; - const char *fs, *controller_str; + const char *fs, *controller_str = NULL; size_t cs = 0; int unified; diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c index 3701528ed..00c3a4e1b 100644 --- a/src/basic/fs-util.c +++ b/src/basic/fs-util.c @@ -38,6 +38,7 @@ #include "mkdir.h" #include "parse-util.h" #include "path-util.h" +#include "process-util.h" #include "stat-util.h" #include "stdio-util.h" #include "string-util.h" diff --git a/src/basic/terminal-util.c b/src/basic/terminal-util.c index 6de772117..d580c6dac 100644 --- a/src/basic/terminal-util.c +++ b/src/basic/terminal-util.c @@ -47,6 +47,7 @@ #include "log.h" #include "macro.h" #include "parse-util.h" +#include "path-util.h" #include "process-util.h" #include "socket-util.h" #include "stat-util.h" diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 17d3eb2e5..8f64d224b 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -30,9 +30,9 @@ #include "path-util.h" #include "process-util.h" //#include "special.h" +#include "stdio-util.h" #include "string-table.h" #include "string-util.h" -#include "stdio-util.h" #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC) @@ -649,7 +649,27 @@ static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_ "Failed to set %s: %m", file); } -static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { +static void cgroup_apply_firewall(Unit *u, CGroupContext *c) { + int r; + + if (u->type == UNIT_SLICE) /* Skip this for slice units, they are inner cgroup nodes, and since bpf/cgroup is + * not recursive we don't ever touch the bpf on them */ + return; + + r = bpf_firewall_compile(u); + if (r < 0) + return; + + (void) bpf_firewall_install(u); + return; +} + +static void cgroup_context_apply( + Unit *u, + CGroupMask apply_mask, + bool apply_bpf, + ManagerState state) { + const char *path; CGroupContext *c; bool is_root; @@ -663,7 +683,8 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { assert(c); assert(path); - if (mask == 0) + /* Nothing to do? Exit early! */ + if (apply_mask == 0 && !apply_bpf) return; /* Some cgroup attributes are not supported on the root cgroup, @@ -677,9 +698,11 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { * cgroup trees (assuming we are running in a container then), * and missing cgroups, i.e. EROFS and ENOENT. */ - if ((mask & CGROUP_MASK_CPU) && !is_root) { - bool has_weight = cgroup_context_has_cpu_weight(c); - bool has_shares = cgroup_context_has_cpu_shares(c); + if ((apply_mask & CGROUP_MASK_CPU) && !is_root) { + bool has_weight, has_shares; + + has_weight = cgroup_context_has_cpu_weight(c); + has_shares = cgroup_context_has_cpu_shares(c); if (cg_all_unified() > 0) { uint64_t weight; @@ -716,7 +739,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { } } - if (mask & CGROUP_MASK_IO) { + if (apply_mask & CGROUP_MASK_IO) { bool has_io = cgroup_context_has_io_config(c); bool has_blockio = cgroup_context_has_blockio_config(c); @@ -793,7 +816,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { } } - if (mask & CGROUP_MASK_BLKIO) { + if (apply_mask & CGROUP_MASK_BLKIO) { bool has_io = cgroup_context_has_io_config(c); bool has_blockio = cgroup_context_has_blockio_config(c); @@ -860,7 +883,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { } } - if ((mask & CGROUP_MASK_MEMORY) && !is_root) { + if ((apply_mask & CGROUP_MASK_MEMORY) && !is_root) { if (cg_all_unified() > 0) { uint64_t max, swap_max = CGROUP_LIMIT_MAX; @@ -900,7 +923,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { } } - if ((mask & CGROUP_MASK_DEVICES) && !is_root) { + if ((apply_mask & CGROUP_MASK_DEVICES) && !is_root) { CGroupDeviceAllow *a; /* Changing the devices list of a populated cgroup @@ -965,7 +988,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { } } - if ((mask & CGROUP_MASK_PIDS) && !is_root) { + if ((apply_mask & CGROUP_MASK_PIDS) && !is_root) { if (c->tasks_max != CGROUP_LIMIT_MAX) { char buf[DECIMAL_STR_MAX(uint64_t) + 2]; @@ -979,6 +1002,9 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to set pids.max: %m"); } + + if (apply_bpf) + cgroup_apply_firewall(u, c); } CGroupMask cgroup_context_get_mask(CGroupContext *c) { @@ -1125,6 +1151,39 @@ CGroupMask unit_get_enable_mask(Unit *u) { return mask; } +bool unit_get_needs_bpf(Unit *u) { + CGroupContext *c; + Unit *p; + assert(u); + + /* We never attach BPF to slice units, as they are inner cgroup nodes and cgroup/BPF is not recursive at the + * moment. */ + if (u->type == UNIT_SLICE) + return false; + + c = unit_get_cgroup_context(u); + if (!c) + return false; + + if (c->ip_accounting || + c->ip_address_allow || + c->ip_address_deny) + return true; + + /* If any parent slice has an IP access list defined, it applies too */ + for (p = UNIT_DEREF(u->slice); p; p = UNIT_DEREF(p->slice)) { + c = unit_get_cgroup_context(p); + if (!c) + return false; + + if (c->ip_address_allow || + c->ip_address_deny) + return true; + } + + return false; +} + /* Recurse from a unit up through its containing slices, propagating * mask bits upward. A unit is also member of itself. */ void unit_update_cgroup_members_masks(Unit *u) { @@ -1300,7 +1359,8 @@ int unit_watch_cgroup(Unit *u) { static int unit_create_cgroup( Unit *u, CGroupMask target_mask, - CGroupMask enable_mask) { + CGroupMask enable_mask, + bool needs_bpf) { CGroupContext *c; int r; @@ -1342,6 +1402,7 @@ static int unit_create_cgroup( u->cgroup_realized = true; u->cgroup_realized_mask = target_mask; u->cgroup_enabled_mask = enable_mask; + u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF; if (u->type != UNIT_SLICE && !c->delegate) { @@ -1391,10 +1452,19 @@ static void cgroup_xattr_apply(Unit *u) { log_unit_warning_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path); } -static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask enable_mask) { +static bool unit_has_mask_realized( + Unit *u, + CGroupMask target_mask, + CGroupMask enable_mask, + bool needs_bpf) { + assert(u); - return u->cgroup_realized && u->cgroup_realized_mask == target_mask && u->cgroup_enabled_mask == enable_mask; + return u->cgroup_realized && + u->cgroup_realized_mask == target_mask && + u->cgroup_enabled_mask == enable_mask && + ((needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_ON) || + (!needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_OFF)); } /* Check if necessary controllers and attributes for a unit are in place. @@ -1405,6 +1475,7 @@ static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask e * Returns 0 on success and < 0 on failure. */ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { CGroupMask target_mask, enable_mask; + bool needs_bpf, apply_bpf; int r; assert(u); @@ -1416,10 +1487,16 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { target_mask = unit_get_target_mask(u); enable_mask = unit_get_enable_mask(u); + needs_bpf = unit_get_needs_bpf(u); - if (unit_has_mask_realized(u, target_mask, enable_mask)) + if (unit_has_mask_realized(u, target_mask, enable_mask, needs_bpf)) return 0; + /* Make sure we apply the BPF filters either when one is configured, or if none is configured but previously + * the state was anything but off. This way, if a unit with a BPF filter applied is reconfigured to lose it + * this will trickle down properly to cgroupfs. */ + apply_bpf = needs_bpf || u->cgroup_bpf_state != UNIT_CGROUP_BPF_OFF; + /* First, realize parents */ if (UNIT_ISSET(u->slice)) { r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state); @@ -1428,12 +1505,12 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { } /* And then do the real work */ - r = unit_create_cgroup(u, target_mask, enable_mask); + r = unit_create_cgroup(u, target_mask, enable_mask, needs_bpf); if (r < 0) return r; /* Finally, apply the necessary attributes. */ - cgroup_context_apply(u, target_mask, state); + cgroup_context_apply(u, target_mask, apply_bpf, state); cgroup_xattr_apply(u); return 0; @@ -1497,7 +1574,10 @@ static void unit_queue_siblings(Unit *u) { /* If the unit doesn't need any new controllers * and has current ones realized, it doesn't need * any changes. */ - if (unit_has_mask_realized(m, unit_get_target_mask(m), unit_get_enable_mask(m))) + if (unit_has_mask_realized(m, + unit_get_target_mask(m), + unit_get_enable_mask(m), + unit_get_needs_bpf(m))) continue; unit_add_to_cgroup_queue(m); @@ -2179,7 +2259,34 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) { return 0; } -int unit_reset_cpu_usage(Unit *u) { +int unit_get_ip_accounting( + Unit *u, + CGroupIPAccountingMetric metric, + uint64_t *ret) { + + int fd, r; + + assert(u); + assert(metric >= 0); + assert(metric < _CGROUP_IP_ACCOUNTING_METRIC_MAX); + assert(ret); + + fd = IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_INGRESS_PACKETS) ? + u->ip_accounting_ingress_map_fd : + u->ip_accounting_egress_map_fd; + + if (fd < 0) + return -ENODATA; + + if (IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_EGRESS_BYTES)) + r = bpf_firewall_read_accounting(fd, ret, NULL); + else + r = bpf_firewall_read_accounting(fd, NULL, ret); + + return r; +} + +int unit_reset_cpu_accounting(Unit *u) { nsec_t ns; int r; @@ -2197,6 +2304,20 @@ int unit_reset_cpu_usage(Unit *u) { return 0; } +int unit_reset_ip_accounting(Unit *u) { + int r = 0, q = 0; + + assert(u); + + if (u->ip_accounting_ingress_map_fd >= 0) + r = bpf_firewall_reset_accounting(u->ip_accounting_ingress_map_fd); + + if (u->ip_accounting_egress_map_fd >= 0) + q = bpf_firewall_reset_accounting(u->ip_accounting_egress_map_fd); + + return r < 0 ? r : q; +} + bool unit_cgroup_delegate(Unit *u) { CGroupContext *c; @@ -2232,6 +2353,36 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) { unit_add_to_cgroup_queue(u); } +void unit_invalidate_cgroup_bpf(Unit *u) { + assert(u); + + if (!UNIT_HAS_CGROUP_CONTEXT(u)) + return; + + if (u->cgroup_bpf_state == UNIT_CGROUP_BPF_INVALIDATED) + return; + + u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED; + unit_add_to_cgroup_queue(u); + + /* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access + * list of our children includes our own. */ + if (u->type == UNIT_SLICE) { + Unit *member; + Iterator i; + + SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) { + if (member == u) + continue; + + if (UNIT_DEREF(member->slice) != u) + continue; + + unit_invalidate_cgroup_bpf(member); + } + } +} + void manager_invalidate_startup_units(Manager *m) { Iterator i; Unit *u; diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 11705be00..a7100d1fc 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -128,6 +128,16 @@ struct CGroupContext { bool delegate; }; +/* Used when querying IP accounting data */ +typedef enum CGroupIPAccountingMetric { + CGROUP_IP_INGRESS_BYTES, + CGROUP_IP_INGRESS_PACKETS, + CGROUP_IP_EGRESS_BYTES, + CGROUP_IP_EGRESS_PACKETS, + _CGROUP_IP_ACCOUNTING_METRIC_MAX, + _CGROUP_IP_ACCOUNTING_METRIC_INVALID = -1, +} CGroupIPAccountingMetric; + #include "unit.h" void cgroup_context_init(CGroupContext *c); @@ -150,6 +160,8 @@ CGroupMask unit_get_subtree_mask(Unit *u); CGroupMask unit_get_target_mask(Unit *u); CGroupMask unit_get_enable_mask(Unit *u); +bool unit_get_needs_bpf(Unit *u); + void unit_update_cgroup_members_masks(Unit *u); char *unit_default_cgroup_path(Unit *u); @@ -181,7 +193,10 @@ int unit_watch_all_pids(Unit *u); int unit_get_memory_current(Unit *u, uint64_t *ret); int unit_get_tasks_current(Unit *u, uint64_t *ret); int unit_get_cpu_usage(Unit *u, nsec_t *ret); -int unit_reset_cpu_usage(Unit *u); +int unit_get_ip_accounting(Unit *u, CGroupIPAccountingMetric metric, uint64_t *ret); + +int unit_reset_cpu_accounting(Unit *u); +int unit_reset_ip_accounting(Unit *u); bool unit_cgroup_delegate(Unit *u); @@ -191,6 +206,7 @@ int manager_notify_cgroup_empty(Manager *m, const char *group); #if 0 /// UNNEEDED by elogind void unit_invalidate_cgroup(Unit *u, CGroupMask m); +void unit_invalidate_cgroup_bpf(Unit *u); void manager_invalidate_startup_units(Manager *m); diff --git a/src/libelogind/sd-bus/sd-bus.c b/src/libelogind/sd-bus/sd-bus.c index 7bc0c056b..d9800f1cd 100644 --- a/src/libelogind/sd-bus/sd-bus.c +++ b/src/libelogind/sd-bus/sd-bus.c @@ -49,6 +49,7 @@ #include "macro.h" #include "missing.h" #include "parse-util.h" +#include "process-util.h" #include "string-util.h" #include "strv.h" #include "util.h" diff --git a/src/libelogind/sd-daemon/sd-daemon.c b/src/libelogind/sd-daemon/sd-daemon.c index 85459e166..06d3c64ff 100644 --- a/src/libelogind/sd-daemon/sd-daemon.c +++ b/src/libelogind/sd-daemon/sd-daemon.c @@ -38,6 +38,7 @@ #include "fs-util.h" #include "parse-util.h" #include "path-util.h" +#include "process-util.h" #include "socket-util.h" #include "strv.h" #include "util.h" diff --git a/src/libelogind/sd-event/test-event.c b/src/libelogind/sd-event/test-event.c index 1a581ae23..656f08d56 100644 --- a/src/libelogind/sd-event/test-event.c +++ b/src/libelogind/sd-event/test-event.c @@ -24,6 +24,7 @@ #include "fd-util.h" #include "log.h" #include "macro.h" +#include "process-util.h" #include "signal-util.h" #include "util.h" diff --git a/src/login/logind-session.c b/src/login/logind-session.c index 142ba55e9..d2125112d 100644 --- a/src/login/logind-session.c +++ b/src/login/logind-session.c @@ -33,6 +33,7 @@ #include "bus-error.h" #include "bus-util.h" #include "escape.h" +#include "extract-word.h" #include "fd-util.h" #include "fileio.h" #include "format-util.h" diff --git a/src/login/logind.c b/src/login/logind.c index 89c6a49e8..1fc636a7a 100644 --- a/src/login/logind.c +++ b/src/login/logind.c @@ -47,6 +47,7 @@ #include "elogind.h" #include "label.h" #include "musl_missing.h" +#include "process-util.h" static void manager_free(Manager *m); diff --git a/src/shared/udev-util.c b/src/shared/udev-util.c index f708dcfa1..ed32f0305 100644 --- a/src/shared/udev-util.c +++ b/src/shared/udev-util.c @@ -19,6 +19,7 @@ #include +#include "alloc-util.h" #include "fileio.h" #include "log.h" #include "string-util.h" diff --git a/src/test/test-cgroup.c b/src/test/test-cgroup.c index 71e318a15..2ed91c780 100644 --- a/src/test/test-cgroup.c +++ b/src/test/test-cgroup.c @@ -22,6 +22,7 @@ #include "cgroup-util.h" #include "path-util.h" +#include "process-util.h" #include "string-util.h" #include "util.h" diff --git a/src/test/test-log.c b/src/test/test-log.c index 8ab569f47..ec1bc2a63 100644 --- a/src/test/test-log.c +++ b/src/test/test-log.c @@ -22,6 +22,7 @@ #include "format-util.h" #include "log.h" +#include "process-util.h" #include "util.h" assert_cc(LOG_REALM_REMOVE_LEVEL(LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, LOG_FTP | LOG_DEBUG)) diff --git a/src/test/test-signal-util.c b/src/test/test-signal-util.c index 92e392778..1830396ac 100644 --- a/src/test/test-signal-util.c +++ b/src/test/test-signal-util.c @@ -21,6 +21,7 @@ #include #include "macro.h" +#include "process-util.h" #include "signal-util.h" static void test_block_signals(void) { -- cgit v1.2.3