summaryrefslogtreecommitdiff
path: root/src/core/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/cgroup.c')
-rw-r--r--src/core/cgroup.c189
1 files changed, 170 insertions, 19 deletions
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 17d3eb2e5..8f64d224b 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -30,9 +30,9 @@
#include "path-util.h"
#include "process-util.h"
//#include "special.h"
+#include "stdio-util.h"
#include "string-table.h"
#include "string-util.h"
-#include "stdio-util.h"
#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
@@ -649,7 +649,27 @@ static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_
"Failed to set %s: %m", file);
}
-static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
+static void cgroup_apply_firewall(Unit *u, CGroupContext *c) {
+ int r;
+
+ if (u->type == UNIT_SLICE) /* Skip this for slice units, they are inner cgroup nodes, and since bpf/cgroup is
+ * not recursive we don't ever touch the bpf on them */
+ return;
+
+ r = bpf_firewall_compile(u);
+ if (r < 0)
+ return;
+
+ (void) bpf_firewall_install(u);
+ return;
+}
+
+static void cgroup_context_apply(
+ Unit *u,
+ CGroupMask apply_mask,
+ bool apply_bpf,
+ ManagerState state) {
+
const char *path;
CGroupContext *c;
bool is_root;
@@ -663,7 +683,8 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
assert(c);
assert(path);
- if (mask == 0)
+ /* Nothing to do? Exit early! */
+ if (apply_mask == 0 && !apply_bpf)
return;
/* Some cgroup attributes are not supported on the root cgroup,
@@ -677,9 +698,11 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
* cgroup trees (assuming we are running in a container then),
* and missing cgroups, i.e. EROFS and ENOENT. */
- if ((mask & CGROUP_MASK_CPU) && !is_root) {
- bool has_weight = cgroup_context_has_cpu_weight(c);
- bool has_shares = cgroup_context_has_cpu_shares(c);
+ if ((apply_mask & CGROUP_MASK_CPU) && !is_root) {
+ bool has_weight, has_shares;
+
+ has_weight = cgroup_context_has_cpu_weight(c);
+ has_shares = cgroup_context_has_cpu_shares(c);
if (cg_all_unified() > 0) {
uint64_t weight;
@@ -716,7 +739,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
}
}
- if (mask & CGROUP_MASK_IO) {
+ if (apply_mask & CGROUP_MASK_IO) {
bool has_io = cgroup_context_has_io_config(c);
bool has_blockio = cgroup_context_has_blockio_config(c);
@@ -793,7 +816,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
}
}
- if (mask & CGROUP_MASK_BLKIO) {
+ if (apply_mask & CGROUP_MASK_BLKIO) {
bool has_io = cgroup_context_has_io_config(c);
bool has_blockio = cgroup_context_has_blockio_config(c);
@@ -860,7 +883,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
}
}
- if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
+ if ((apply_mask & CGROUP_MASK_MEMORY) && !is_root) {
if (cg_all_unified() > 0) {
uint64_t max, swap_max = CGROUP_LIMIT_MAX;
@@ -900,7 +923,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
}
}
- if ((mask & CGROUP_MASK_DEVICES) && !is_root) {
+ if ((apply_mask & CGROUP_MASK_DEVICES) && !is_root) {
CGroupDeviceAllow *a;
/* Changing the devices list of a populated cgroup
@@ -965,7 +988,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
}
}
- if ((mask & CGROUP_MASK_PIDS) && !is_root) {
+ if ((apply_mask & CGROUP_MASK_PIDS) && !is_root) {
if (c->tasks_max != CGROUP_LIMIT_MAX) {
char buf[DECIMAL_STR_MAX(uint64_t) + 2];
@@ -979,6 +1002,9 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to set pids.max: %m");
}
+
+ if (apply_bpf)
+ cgroup_apply_firewall(u, c);
}
CGroupMask cgroup_context_get_mask(CGroupContext *c) {
@@ -1125,6 +1151,39 @@ CGroupMask unit_get_enable_mask(Unit *u) {
return mask;
}
+bool unit_get_needs_bpf(Unit *u) {
+ CGroupContext *c;
+ Unit *p;
+ assert(u);
+
+ /* We never attach BPF to slice units, as they are inner cgroup nodes and cgroup/BPF is not recursive at the
+ * moment. */
+ if (u->type == UNIT_SLICE)
+ return false;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return false;
+
+ if (c->ip_accounting ||
+ c->ip_address_allow ||
+ c->ip_address_deny)
+ return true;
+
+ /* If any parent slice has an IP access list defined, it applies too */
+ for (p = UNIT_DEREF(u->slice); p; p = UNIT_DEREF(p->slice)) {
+ c = unit_get_cgroup_context(p);
+ if (!c)
+ return false;
+
+ if (c->ip_address_allow ||
+ c->ip_address_deny)
+ return true;
+ }
+
+ return false;
+}
+
/* Recurse from a unit up through its containing slices, propagating
* mask bits upward. A unit is also member of itself. */
void unit_update_cgroup_members_masks(Unit *u) {
@@ -1300,7 +1359,8 @@ int unit_watch_cgroup(Unit *u) {
static int unit_create_cgroup(
Unit *u,
CGroupMask target_mask,
- CGroupMask enable_mask) {
+ CGroupMask enable_mask,
+ bool needs_bpf) {
CGroupContext *c;
int r;
@@ -1342,6 +1402,7 @@ static int unit_create_cgroup(
u->cgroup_realized = true;
u->cgroup_realized_mask = target_mask;
u->cgroup_enabled_mask = enable_mask;
+ u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF;
if (u->type != UNIT_SLICE && !c->delegate) {
@@ -1391,10 +1452,19 @@ static void cgroup_xattr_apply(Unit *u) {
log_unit_warning_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
}
-static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask enable_mask) {
+static bool unit_has_mask_realized(
+ Unit *u,
+ CGroupMask target_mask,
+ CGroupMask enable_mask,
+ bool needs_bpf) {
+
assert(u);
- return u->cgroup_realized && u->cgroup_realized_mask == target_mask && u->cgroup_enabled_mask == enable_mask;
+ return u->cgroup_realized &&
+ u->cgroup_realized_mask == target_mask &&
+ u->cgroup_enabled_mask == enable_mask &&
+ ((needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_ON) ||
+ (!needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_OFF));
}
/* Check if necessary controllers and attributes for a unit are in place.
@@ -1405,6 +1475,7 @@ static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask e
* Returns 0 on success and < 0 on failure. */
static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
CGroupMask target_mask, enable_mask;
+ bool needs_bpf, apply_bpf;
int r;
assert(u);
@@ -1416,10 +1487,16 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
target_mask = unit_get_target_mask(u);
enable_mask = unit_get_enable_mask(u);
+ needs_bpf = unit_get_needs_bpf(u);
- if (unit_has_mask_realized(u, target_mask, enable_mask))
+ if (unit_has_mask_realized(u, target_mask, enable_mask, needs_bpf))
return 0;
+ /* Make sure we apply the BPF filters either when one is configured, or if none is configured but previously
+ * the state was anything but off. This way, if a unit with a BPF filter applied is reconfigured to lose it
+ * this will trickle down properly to cgroupfs. */
+ apply_bpf = needs_bpf || u->cgroup_bpf_state != UNIT_CGROUP_BPF_OFF;
+
/* First, realize parents */
if (UNIT_ISSET(u->slice)) {
r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
@@ -1428,12 +1505,12 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
}
/* And then do the real work */
- r = unit_create_cgroup(u, target_mask, enable_mask);
+ r = unit_create_cgroup(u, target_mask, enable_mask, needs_bpf);
if (r < 0)
return r;
/* Finally, apply the necessary attributes. */
- cgroup_context_apply(u, target_mask, state);
+ cgroup_context_apply(u, target_mask, apply_bpf, state);
cgroup_xattr_apply(u);
return 0;
@@ -1497,7 +1574,10 @@ static void unit_queue_siblings(Unit *u) {
/* If the unit doesn't need any new controllers
* and has current ones realized, it doesn't need
* any changes. */
- if (unit_has_mask_realized(m, unit_get_target_mask(m), unit_get_enable_mask(m)))
+ if (unit_has_mask_realized(m,
+ unit_get_target_mask(m),
+ unit_get_enable_mask(m),
+ unit_get_needs_bpf(m)))
continue;
unit_add_to_cgroup_queue(m);
@@ -2179,7 +2259,34 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
return 0;
}
-int unit_reset_cpu_usage(Unit *u) {
+int unit_get_ip_accounting(
+ Unit *u,
+ CGroupIPAccountingMetric metric,
+ uint64_t *ret) {
+
+ int fd, r;
+
+ assert(u);
+ assert(metric >= 0);
+ assert(metric < _CGROUP_IP_ACCOUNTING_METRIC_MAX);
+ assert(ret);
+
+ fd = IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_INGRESS_PACKETS) ?
+ u->ip_accounting_ingress_map_fd :
+ u->ip_accounting_egress_map_fd;
+
+ if (fd < 0)
+ return -ENODATA;
+
+ if (IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_EGRESS_BYTES))
+ r = bpf_firewall_read_accounting(fd, ret, NULL);
+ else
+ r = bpf_firewall_read_accounting(fd, NULL, ret);
+
+ return r;
+}
+
+int unit_reset_cpu_accounting(Unit *u) {
nsec_t ns;
int r;
@@ -2197,6 +2304,20 @@ int unit_reset_cpu_usage(Unit *u) {
return 0;
}
+int unit_reset_ip_accounting(Unit *u) {
+ int r = 0, q = 0;
+
+ assert(u);
+
+ if (u->ip_accounting_ingress_map_fd >= 0)
+ r = bpf_firewall_reset_accounting(u->ip_accounting_ingress_map_fd);
+
+ if (u->ip_accounting_egress_map_fd >= 0)
+ q = bpf_firewall_reset_accounting(u->ip_accounting_egress_map_fd);
+
+ return r < 0 ? r : q;
+}
+
bool unit_cgroup_delegate(Unit *u) {
CGroupContext *c;
@@ -2232,6 +2353,36 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
unit_add_to_cgroup_queue(u);
}
+void unit_invalidate_cgroup_bpf(Unit *u) {
+ assert(u);
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return;
+
+ if (u->cgroup_bpf_state == UNIT_CGROUP_BPF_INVALIDATED)
+ return;
+
+ u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED;
+ unit_add_to_cgroup_queue(u);
+
+ /* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access
+ * list of our children includes our own. */
+ if (u->type == UNIT_SLICE) {
+ Unit *member;
+ Iterator i;
+
+ SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
+ if (member == u)
+ continue;
+
+ if (UNIT_DEREF(member->slice) != u)
+ continue;
+
+ unit_invalidate_cgroup_bpf(member);
+ }
+ }
+}
+
void manager_invalidate_startup_units(Manager *m) {
Iterator i;
Unit *u;