1 files changed, 170 insertions, 19 deletions
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 17d3eb2e5..8f64d224b 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -30,9 +30,9 @@
 #include "path-util.h"
 #include "process-util.h"
 //#include "special.h"
+#include "stdio-util.h"
 #include "string-table.h"
 #include "string-util.h"
-#include "stdio-util.h"
 
 #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
 
@@ -649,7 +649,27 @@ static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_
                               "Failed to set %s: %m", file);
 }
 
-static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
+static void cgroup_apply_firewall(Unit *u, CGroupContext *c) {
+        int r;
+
+        if (u->type == UNIT_SLICE) /* Skip this for slice units, they are inner cgroup nodes, and since bpf/cgroup is
+                                    * not recursive we don't ever touch the bpf on them */
+                return;
+
+        r = bpf_firewall_compile(u);
+        if (r < 0)
+                return;
+
+        (void) bpf_firewall_install(u);
+        return;
+}
+
+static void cgroup_context_apply(
+                Unit *u,
+                CGroupMask apply_mask,
+                bool apply_bpf,
+                ManagerState state) {
+
         const char *path;
         CGroupContext *c;
         bool is_root;
@@ -663,7 +683,8 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
         assert(c);
         assert(path);
 
-        if (mask == 0)
+        /* Nothing to do? Exit early! */
+        if (apply_mask == 0 && !apply_bpf)
                 return;
 
         /* Some cgroup attributes are not supported on the root cgroup,
@@ -677,9 +698,11 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
          * cgroup trees (assuming we are running in a container then),
          * and missing cgroups, i.e. EROFS and ENOENT. */
 
-        if ((mask & CGROUP_MASK_CPU) && !is_root) {
-                bool has_weight = cgroup_context_has_cpu_weight(c);
-                bool has_shares = cgroup_context_has_cpu_shares(c);
+        if ((apply_mask & CGROUP_MASK_CPU) && !is_root) {
+                bool has_weight, has_shares;
+
+                has_weight = cgroup_context_has_cpu_weight(c);
+                has_shares = cgroup_context_has_cpu_shares(c);
 
                 if (cg_all_unified() > 0) {
                         uint64_t weight;
@@ -716,7 +739,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                 }
         }
 
-        if (mask & CGROUP_MASK_IO) {
+        if (apply_mask & CGROUP_MASK_IO) {
                 bool has_io = cgroup_context_has_io_config(c);
                 bool has_blockio = cgroup_context_has_blockio_config(c);
 
@@ -793,7 +816,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                 }
         }
 
-        if (mask & CGROUP_MASK_BLKIO) {
+        if (apply_mask & CGROUP_MASK_BLKIO) {
                 bool has_io = cgroup_context_has_io_config(c);
                 bool has_blockio = cgroup_context_has_blockio_config(c);
 
@@ -860,7 +883,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                 }
         }
 
-        if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
+        if ((apply_mask & CGROUP_MASK_MEMORY) && !is_root) {
                 if (cg_all_unified() > 0) {
                         uint64_t max, swap_max = CGROUP_LIMIT_MAX;
 
@@ -900,7 +923,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                 }
         }
 
-        if ((mask & CGROUP_MASK_DEVICES) && !is_root) {
+        if ((apply_mask & CGROUP_MASK_DEVICES) && !is_root) {
                 CGroupDeviceAllow *a;
 
                 /* Changing the devices list of a populated cgroup
@@ -965,7 +988,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                 }
         }
 
-        if ((mask & CGROUP_MASK_PIDS) && !is_root) {
+        if ((apply_mask & CGROUP_MASK_PIDS) && !is_root) {
 
                 if (c->tasks_max != CGROUP_LIMIT_MAX) {
                         char buf[DECIMAL_STR_MAX(uint64_t) + 2];
@@ -979,6 +1002,9 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                         log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
                                       "Failed to set pids.max: %m");
         }
+
+        if (apply_bpf)
+                cgroup_apply_firewall(u, c);
 }
 
 CGroupMask cgroup_context_get_mask(CGroupContext *c) {
@@ -1125,6 +1151,39 @@ CGroupMask unit_get_enable_mask(Unit *u) {
         return mask;
 }
 
+bool unit_get_needs_bpf(Unit *u) {
+        CGroupContext *c;
+        Unit *p;
+        assert(u);
+
+        /* We never attach BPF to slice units, as they are inner cgroup nodes and cgroup/BPF is not recursive at the
+         * moment. */
+        if (u->type == UNIT_SLICE)
+                return false;
+
+        c = unit_get_cgroup_context(u);
+        if (!c)
+                return false;
+
+        if (c->ip_accounting ||
+            c->ip_address_allow ||
+            c->ip_address_deny)
+                return true;
+
+        /* If any parent slice has an IP access list defined, it applies too */
+        for (p = UNIT_DEREF(u->slice); p; p = UNIT_DEREF(p->slice)) {
+                c = unit_get_cgroup_context(p);
+                if (!c)
+                        return false;
+
+                if (c->ip_address_allow ||
+                    c->ip_address_deny)
+                        return true;
+        }
+
+        return false;
+}
+
 /* Recurse from a unit up through its containing slices, propagating
  * mask bits upward. A unit is also member of itself. */
 void unit_update_cgroup_members_masks(Unit *u) {
@@ -1300,7 +1359,8 @@ int unit_watch_cgroup(Unit *u) {
 static int unit_create_cgroup(
                 Unit *u,
                 CGroupMask target_mask,
-                CGroupMask enable_mask) {
+                CGroupMask enable_mask,
+                bool needs_bpf) {
 
         CGroupContext *c;
         int r;
@@ -1342,6 +1402,7 @@ static int unit_create_cgroup(
         u->cgroup_realized = true;
         u->cgroup_realized_mask = target_mask;
         u->cgroup_enabled_mask = enable_mask;
+        u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF;
 
         if (u->type != UNIT_SLICE && !c->delegate) {
 
@@ -1391,10 +1452,19 @@ static void cgroup_xattr_apply(Unit *u) {
                 log_unit_warning_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
 }
 
-static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask enable_mask) {
+static bool unit_has_mask_realized(
+                Unit *u,
+                CGroupMask target_mask,
+                CGroupMask enable_mask,
+                bool needs_bpf) {
+
         assert(u);
 
-        return u->cgroup_realized && u->cgroup_realized_mask == target_mask && u->cgroup_enabled_mask == enable_mask;
+        return u->cgroup_realized &&
+                u->cgroup_realized_mask == target_mask &&
+                u->cgroup_enabled_mask == enable_mask &&
+                ((needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_ON) ||
+                 (!needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_OFF));
 }
 
 /* Check if necessary controllers and attributes for a unit are in place.
@@ -1405,6 +1475,7 @@ static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask e
  * Returns 0 on success and < 0 on failure. */
 static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
         CGroupMask target_mask, enable_mask;
+        bool needs_bpf, apply_bpf;
         int r;
 
         assert(u);
@@ -1416,10 +1487,16 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
 
         target_mask = unit_get_target_mask(u);
         enable_mask = unit_get_enable_mask(u);
+        needs_bpf = unit_get_needs_bpf(u);
 
-        if (unit_has_mask_realized(u, target_mask, enable_mask))
+        if (unit_has_mask_realized(u, target_mask, enable_mask, needs_bpf))
                 return 0;
 
+        /* Make sure we apply the BPF filters either when one is configured, or if none is configured but previously
+         * the state was anything but off. This way, if a unit with a BPF filter applied is reconfigured to lose it
+         * this will trickle down properly to cgroupfs. */
+        apply_bpf = needs_bpf || u->cgroup_bpf_state != UNIT_CGROUP_BPF_OFF;
+
         /* First, realize parents */
         if (UNIT_ISSET(u->slice)) {
                 r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
@@ -1428,12 +1505,12 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
         }
 
         /* And then do the real work */
-        r = unit_create_cgroup(u, target_mask, enable_mask);
+        r = unit_create_cgroup(u, target_mask, enable_mask, needs_bpf);
         if (r < 0)
                 return r;
 
         /* Finally, apply the necessary attributes. */
-        cgroup_context_apply(u, target_mask, state);
+        cgroup_context_apply(u, target_mask, apply_bpf, state);
         cgroup_xattr_apply(u);
 
         return 0;
@@ -1497,7 +1574,10 @@ static void unit_queue_siblings(Unit *u) {
                         /* If the unit doesn't need any new controllers
                          * and has current ones realized, it doesn't need
                          * any changes. */
-                        if (unit_has_mask_realized(m, unit_get_target_mask(m), unit_get_enable_mask(m)))
+                        if (unit_has_mask_realized(m,
+                                                   unit_get_target_mask(m),
+                                                   unit_get_enable_mask(m),
+                                                   unit_get_needs_bpf(m)))
                                 continue;
 
                         unit_add_to_cgroup_queue(m);
@@ -2179,7 +2259,34 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
         return 0;
 }
 
-int unit_reset_cpu_usage(Unit *u) {
+int unit_get_ip_accounting(
+                Unit *u,
+                CGroupIPAccountingMetric metric,
+                uint64_t *ret) {
+
+        int fd, r;
+
+        assert(u);
+        assert(metric >= 0);
+        assert(metric < _CGROUP_IP_ACCOUNTING_METRIC_MAX);
+        assert(ret);
+
+        fd = IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_INGRESS_PACKETS) ?
+                u->ip_accounting_ingress_map_fd :
+                u->ip_accounting_egress_map_fd;
+
+        if (fd < 0)
+                return -ENODATA;
+
+        if (IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_EGRESS_BYTES))
+                r = bpf_firewall_read_accounting(fd, ret, NULL);
+        else
+                r = bpf_firewall_read_accounting(fd, NULL, ret);
+
+        return r;
+}
+
+int unit_reset_cpu_accounting(Unit *u) {
         nsec_t ns;
         int r;
 
@@ -2197,6 +2304,20 @@ int unit_reset_cpu_usage(Unit *u) {
         return 0;
 }
 
+int unit_reset_ip_accounting(Unit *u) {
+        int r = 0, q = 0;
+
+        assert(u);
+
+        if (u->ip_accounting_ingress_map_fd >= 0)
+                r = bpf_firewall_reset_accounting(u->ip_accounting_ingress_map_fd);
+
+        if (u->ip_accounting_egress_map_fd >= 0)
+                q = bpf_firewall_reset_accounting(u->ip_accounting_egress_map_fd);
+
+        return r < 0 ? r : q;
+}
+
 bool unit_cgroup_delegate(Unit *u) {
         CGroupContext *c;
 
@@ -2232,6 +2353,36 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
         unit_add_to_cgroup_queue(u);
 }
 
+void unit_invalidate_cgroup_bpf(Unit *u) {
+        assert(u);
+
+        if (!UNIT_HAS_CGROUP_CONTEXT(u))
+                return;
+
+        if (u->cgroup_bpf_state == UNIT_CGROUP_BPF_INVALIDATED)
+                return;
+
+        u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED;
+        unit_add_to_cgroup_queue(u);
+
+        /* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access
+         * list of our children includes our own. */
+        if (u->type == UNIT_SLICE) {
+                Unit *member;
+                Iterator i;
+
+                SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
+                        if (member == u)
+                                continue;
+
+                        if (UNIT_DEREF(member->slice) != u)
+                                continue;
+
+                        unit_invalidate_cgroup_bpf(member);
+                }
+        }
+}
+
 void manager_invalidate_startup_units(Manager *m) {
         Iterator i;
         Unit *u;