summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2015-01-06 00:26:25 +0100
committerLennart Poettering <lennart@poettering.net>2015-01-06 03:16:39 +0100
commita354329f724d6ce913d2ccffb2be8f3327a67faa (patch)
treeb6b05cc2c42c274f8385a16b1896d6c88bf4fc8f /src/core
parent75399049653f2d5e22032da70cf96f20d7b4d9a6 (diff)
core: add new logic for services to store file descriptors in PID 1
With this change it is possible to send file descriptors to PID 1, via sd_pid_notify_with_fds() which PID 1 will store individually for each service, and pass via the usual fd passing logic on next invocation. This is useful for enable daemon reload schemes where daemons serialize their state to /run, push their fds into PID 1 and terminate, restoring their state on next start from the data in /run and passed in from PID 1. The fds are kept by PID 1 as long as no POLLHUP or POLLERR is seen on them, and the service they belong to are either not dead or failed, or have a job queued.
Diffstat (limited to 'src/core')
-rw-r--r--src/core/dbus-service.c1
-rw-r--r--src/core/load-fragment-gperf.gperf.m41
-rw-r--r--src/core/manager.c78
-rw-r--r--src/core/service.c166
-rw-r--r--src/core/service.h14
-rw-r--r--src/core/unit.c21
-rw-r--r--src/core/unit.h6
7 files changed, 240 insertions, 47 deletions
diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c
index 2b50ac93d..6d4713bab 100644
--- a/src/core/dbus-service.c
+++ b/src/core/dbus-service.c
@@ -59,6 +59,7 @@ const sd_bus_vtable bus_service_vtable[] = {
SD_BUS_PROPERTY("MainPID", "u", bus_property_get_pid, offsetof(Service, main_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Service, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("BusName", "s", NULL, offsetof(Service, bus_name), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FileDescriptorStoreMax", "u", NULL, offsetof(Service, n_fd_store_max), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("StatusText", "s", NULL, offsetof(Service, status_text), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("StatusErrno", "i", NULL, offsetof(Service, status_errno), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Service, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index e0ffaa605..9e87d91e7 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -226,6 +226,7 @@ Service.SuccessExitStatus, config_parse_set_status, 0,
Service.SysVStartPriority, config_parse_warn_compat, DISABLED_LEGACY, 0
Service.NonBlocking, config_parse_bool, 0, offsetof(Service, exec_context.non_blocking)
Service.BusName, config_parse_unit_string_printf, 0, offsetof(Service, bus_name)
+Service.FileDescriptorStoreMax, config_parse_unsigned, 0, offsetof(Service, n_fd_store_max)
Service.NotifyAccess, config_parse_notify_access, 0, offsetof(Service, notify_access)
Service.Sockets, config_parse_service_sockets, 0, 0
m4_ifdef(`ENABLE_KDBUS',
diff --git a/src/core/manager.c b/src/core/manager.c
index 519b37438..c18312a36 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -84,6 +84,9 @@
#define JOBS_IN_PROGRESS_PERIOD_USEC (USEC_PER_SEC / 3)
#define JOBS_IN_PROGRESS_PERIOD_DIVISOR 3
+#define NOTIFY_FD_MAX 768
+#define NOTIFY_BUFFER_MAX PIPE_BUF
+
static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
@@ -1449,7 +1452,7 @@ static unsigned manager_dispatch_dbus_queue(Manager *m) {
return n;
}
-static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, char *buf, size_t n) {
+static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, char *buf, size_t n, FDSet *fds) {
_cleanup_strv_free_ char **tags = NULL;
assert(m);
@@ -1466,12 +1469,13 @@ static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, char *
log_unit_debug(u->id, "Got notification message for unit %s", u->id);
if (UNIT_VTABLE(u)->notify_message)
- UNIT_VTABLE(u)->notify_message(u, pid, tags);
+ UNIT_VTABLE(u)->notify_message(u, pid, tags, fds);
}
static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
Manager *m = userdata;
ssize_t n;
+ int r;
assert(m);
assert(m->notify_fd == fd);
@@ -1482,73 +1486,101 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
}
for (;;) {
- char buf[4096];
+ _cleanup_fdset_free_ FDSet *fds = NULL;
+ char buf[NOTIFY_BUFFER_MAX+1];
struct iovec iovec = {
.iov_base = buf,
.iov_len = sizeof(buf)-1,
};
- bool found = false;
-
union {
struct cmsghdr cmsghdr;
- uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
+ uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(int) * NOTIFY_FD_MAX)];
} control = {};
-
struct msghdr msghdr = {
.msg_iov = &iovec,
.msg_iovlen = 1,
.msg_control = &control,
.msg_controllen = sizeof(control),
};
- struct ucred *ucred;
+ struct cmsghdr *cmsg;
+ struct ucred *ucred = NULL;
+ bool found = false;
Unit *u1, *u2, *u3;
+ int *fd_array = NULL;
+ unsigned n_fds = 0;
- n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT);
- if (n <= 0) {
- if (n == 0)
- return -EIO;
-
+ n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (n < 0) {
if (errno == EAGAIN || errno == EINTR)
break;
return -errno;
}
+ if (n == 0)
+ return -ECONNRESET;
+
+ for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+
+ fd_array = (int*) CMSG_DATA(cmsg);
+ n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+
+ } else if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
- if (msghdr.msg_controllen < CMSG_LEN(sizeof(struct ucred)) ||
- control.cmsghdr.cmsg_level != SOL_SOCKET ||
- control.cmsghdr.cmsg_type != SCM_CREDENTIALS ||
- control.cmsghdr.cmsg_len != CMSG_LEN(sizeof(struct ucred))) {
- log_warning("Received notify message without credentials. Ignoring.");
+ ucred = (struct ucred*) CMSG_DATA(cmsg);
+ }
+ }
+
+ if (n_fds > 0) {
+ assert(fd_array);
+
+ r = fdset_new_array(&fds, fd_array, n_fds);
+ if (r < 0) {
+ close_many(fd_array, n_fds);
+ return log_oom();
+ }
+ }
+
+ if (!ucred || ucred->pid <= 0) {
+ log_warning("Received notify message without valid credentials. Ignoring.");
continue;
}
- ucred = (struct ucred*) CMSG_DATA(&control.cmsghdr);
+ if ((size_t) n >= sizeof(buf)) {
+ log_warning("Received notify message exceeded maximum size. Ignoring.");
+ continue;
+ }
- assert((size_t) n < sizeof(buf));
buf[n] = 0;
/* Notify every unit that might be interested, but try
* to avoid notifying the same one multiple times. */
u1 = manager_get_unit_by_pid(m, ucred->pid);
if (u1) {
- manager_invoke_notify_message(m, u1, ucred->pid, buf, n);
+ manager_invoke_notify_message(m, u1, ucred->pid, buf, n, fds);
found = true;
}
u2 = hashmap_get(m->watch_pids1, LONG_TO_PTR(ucred->pid));
if (u2 && u2 != u1) {
- manager_invoke_notify_message(m, u2, ucred->pid, buf, n);
+ manager_invoke_notify_message(m, u2, ucred->pid, buf, n, fds);
found = true;
}
u3 = hashmap_get(m->watch_pids2, LONG_TO_PTR(ucred->pid));
if (u3 && u3 != u2 && u3 != u1) {
- manager_invoke_notify_message(m, u3, ucred->pid, buf, n);
+ manager_invoke_notify_message(m, u3, ucred->pid, buf, n, fds);
found = true;
}
if (!found)
log_warning("Cannot find unit for notify message of PID "PID_FMT".", ucred->pid);
+
+ if (fdset_size(fds) > 0)
+ log_warning("Got auxiliary fds with notification message, closing all.");
}
return 0;
diff --git a/src/core/service.c b/src/core/service.c
index bfbe959ed..78232ee71 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -242,6 +242,42 @@ static void service_reset_watchdog(Service *s) {
service_start_watchdog(s);
}
+static void service_fd_store_unlink(ServiceFDStore *fs) {
+
+ if (!fs)
+ return;
+
+ if (fs->service) {
+ assert(fs->service->n_fd_store > 0);
+ LIST_REMOVE(fd_store, fs->service->fd_store, fs);
+ fs->service->n_fd_store--;
+ }
+
+ if (fs->event_source) {
+ sd_event_source_set_enabled(fs->event_source, SD_EVENT_OFF);
+ sd_event_source_unref(fs->event_source);
+ }
+
+ safe_close(fs->fd);
+ free(fs);
+}
+
+static void service_release_resources(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ if (!s->fd_store)
+ return;
+
+ log_debug("Releasing all resources for %s", u->id);
+
+ while (s->fd_store)
+ service_fd_store_unlink(s->fd_store);
+
+ assert(s->n_fd_store == 0);
+}
+
static void service_done(Unit *u) {
Service *s = SERVICE(u);
@@ -286,6 +322,8 @@ static void service_done(Unit *u) {
service_stop_watchdog(s);
s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+
+ service_release_resources(u);
}
static int service_arm_timer(Service *s, usec_t usec) {
@@ -549,6 +587,14 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
if (s->status_text)
fprintf(f, "%sStatus Text: %s\n",
prefix, s->status_text);
+
+ if (s->n_fd_store_max > 0) {
+ fprintf(f,
+ "%sFile Descriptor Store Max: %u\n"
+ "%sFile Descriptor Store Current: %u\n",
+ prefix, s->n_fd_store_max,
+ prefix, s->n_fd_store);
+ }
}
static int service_load_pid_file(Service *s, bool may_warn) {
@@ -806,10 +852,10 @@ static int service_coldplug(Unit *u) {
}
static int service_collect_fds(Service *s, int **fds, unsigned *n_fds) {
+ _cleanup_free_ int *rfds = NULL;
+ unsigned rn_fds = 0;
Iterator i;
int r;
- int *rfds = NULL;
- unsigned rn_fds = 0;
Unit *u;
assert(s);
@@ -831,10 +877,12 @@ static int service_collect_fds(Service *s, int **fds, unsigned *n_fds) {
r = socket_collect_fds(sock, &cfds, &cn_fds);
if (r < 0)
- goto fail;
+ return r;
- if (!cfds)
+ if (cn_fds <= 0) {
+ free(cfds);
continue;
+ }
if (!rfds) {
rfds = cfds;
@@ -842,32 +890,39 @@ static int service_collect_fds(Service *s, int **fds, unsigned *n_fds) {
} else {
int *t;
- t = new(int, rn_fds+cn_fds);
+ t = realloc(rfds, (rn_fds + cn_fds) * sizeof(int));
if (!t) {
free(cfds);
- r = -ENOMEM;
- goto fail;
+ return -ENOMEM;
}
- memcpy(t, rfds, rn_fds * sizeof(int));
- memcpy(t+rn_fds, cfds, cn_fds * sizeof(int));
- free(rfds);
+ memcpy(t + rn_fds, cfds, cn_fds * sizeof(int));
+ rfds = t;
+ rn_fds += cn_fds;
+
free(cfds);
- rfds = t;
- rn_fds = rn_fds+cn_fds;
}
}
+ if (s->n_fd_store > 0) {
+ ServiceFDStore *fs;
+ int *t;
+
+ t = realloc(rfds, (rn_fds + s->n_fd_store) * sizeof(int));
+ if (!t)
+ return -ENOMEM;
+
+ rfds = t;
+ LIST_FOREACH(fd_store, fs, s->fd_store)
+ rfds[rn_fds++] = fs->fd;
+ }
+
*fds = rfds;
*n_fds = rn_fds;
+ rfds = NULL;
return 0;
-
-fail:
- free(rfds);
-
- return r;
}
static int service_spawn(
@@ -2543,7 +2598,75 @@ static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void
return 0;
}
-static void service_notify_message(Unit *u, pid_t pid, char **tags) {
+static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
+ ServiceFDStore *fs = userdata;
+
+ assert(e);
+ assert(fs);
+
+ /* If we get either EPOLLHUP or EPOLLERR, it's time to remove this entry from the fd store */
+ service_fd_store_unlink(fs);
+ return 0;
+}
+
+static int service_add_fd_set(Service *s, FDSet *fds) {
+ int r;
+
+ assert(s);
+
+ if (fdset_size(fds) <= 0)
+ return 0;
+
+ while (s->n_fd_store < s->n_fd_store_max) {
+ _cleanup_close_ int fd = -1;
+ ServiceFDStore *fs;
+ bool same = false;
+
+ fd = fdset_steal_first(fds);
+ if (fd < 0)
+ break;
+
+ LIST_FOREACH(fd_store, fs, s->fd_store) {
+ r = same_fd(fs->fd, fd);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s)->id, r, "%s: Couldn't check if same fd: %m", UNIT(s)->id);
+ if (r > 0) {
+ same = true;
+ break;
+ }
+ }
+
+ if (same)
+ continue;
+
+ fs = new0(ServiceFDStore, 1);
+ if (!fs)
+ return log_oom();
+
+ fs->fd = fd;
+ fs->service = s;
+
+ r = sd_event_add_io(UNIT(s)->manager->event, &fs->event_source, fd, 0, on_fd_store_io, fs);
+ if (r < 0) {
+ free(fs);
+ return log_unit_error_errno(UNIT(s)->id, r, "%s: Failed to add even source: %m", UNIT(s)->id);
+ }
+
+ LIST_PREPEND(fd_store, s->fd_store, fs);
+ s->n_fd_store++;
+
+ fd = -1;
+
+ log_unit_debug(UNIT(s)->id, "%s: added fd to fd store.", UNIT(s)->id);
+ }
+
+ if (fdset_size(fds) > 0)
+ log_unit_warning(UNIT(s)->id, "%s: tried to store more fds than FDStoreMax=%u allows, closing remaining.", UNIT(s)->id, s->n_fd_store_max);
+
+ return 0;
+}
+
+static void service_notify_message(Unit *u, pid_t pid, char **tags, FDSet *fds) {
Service *s = SERVICE(u);
_cleanup_free_ char *cc = NULL;
bool notify_dbus = false;
@@ -2675,6 +2798,12 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags) {
service_reset_watchdog(s);
}
+ /* Add the passed fds to the fd store */
+ if (strv_find(tags, "FDSTORE=1")) {
+ log_unit_debug(u->id, "%s: got FDSTORE=1", u->id);
+ service_add_fd_set(s, fds);
+ }
+
/* Notify clients about changed status or main pid */
if (notify_dbus)
unit_add_to_dbus_queue(u);
@@ -2917,6 +3046,7 @@ const UnitVTable service_vtable = {
.init = service_init,
.done = service_done,
.load = service_load,
+ .release_resources = service_release_resources,
.coldplug = service_coldplug,
diff --git a/src/core/service.h b/src/core/service.h
index f6a78c403..dfeee6a68 100644
--- a/src/core/service.h
+++ b/src/core/service.h
@@ -22,6 +22,7 @@
***/
typedef struct Service Service;
+typedef struct ServiceFDStore ServiceFDStore;
#include "unit.h"
#include "path.h"
@@ -115,6 +116,15 @@ typedef enum ServiceResult {
_SERVICE_RESULT_INVALID = -1
} ServiceResult;
+struct ServiceFDStore {
+ Service *service;
+
+ int fd;
+ sd_event_source *event_source;
+
+ LIST_FIELDS(ServiceFDStore, fd_store);
+};
+
struct Service {
Unit meta;
@@ -198,6 +208,10 @@ struct Service {
NotifyAccess notify_access;
NotifyState notify_state;
+
+ ServiceFDStore *fd_store;
+ unsigned n_fd_store;
+ unsigned n_fd_store_max;
};
extern const UnitVTable service_vtable;
diff --git a/src/core/unit.c b/src/core/unit.c
index 229bd0f73..7311c5804 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -278,21 +278,32 @@ int unit_set_description(Unit *u, const char *description) {
}
bool unit_check_gc(Unit *u) {
+ UnitActiveState state;
assert(u);
- if (UNIT_VTABLE(u)->no_gc)
+ if (u->job)
return true;
- if (u->no_gc)
+ if (u->nop_job)
return true;
- if (u->job)
+ state = unit_active_state(u);
+
+ /* If the unit is inactive and failed and no job is queued for
+ * it, then release its runtime resources */
+ if (UNIT_IS_INACTIVE_OR_FAILED(state) &&
+ UNIT_VTABLE(u)->release_resources)
+ UNIT_VTABLE(u)->release_resources(u);
+
+ /* But we keep the unit object around for longer when it is
+ * referenced or configured to not be gc'ed */
+ if (state != UNIT_INACTIVE)
return true;
- if (u->nop_job)
+ if (UNIT_VTABLE(u)->no_gc)
return true;
- if (unit_active_state(u) != UNIT_INACTIVE)
+ if (u->no_gc)
return true;
if (u->refs)
diff --git a/src/core/unit.h b/src/core/unit.h
index 19fa2f058..53b8a7f66 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -345,6 +345,10 @@ struct UnitVTable {
* way */
bool (*check_gc)(Unit *u);
+ /* When the unit is not running and no job for it queued we
+ * shall release its runtime resources */
+ void (*release_resources)(Unit *u);
+
/* Return true when this unit is suitable for snapshotting */
bool (*check_snapshot)(Unit *u);
@@ -359,7 +363,7 @@ struct UnitVTable {
void (*notify_cgroup_empty)(Unit *u);
/* Called whenever a process of this unit sends us a message */
- void (*notify_message)(Unit *u, pid_t pid, char **tags);
+ void (*notify_message)(Unit *u, pid_t pid, char **tags, FDSet *fds);
/* Called whenever a name this Unit registered for comes or
* goes away. */