summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile-man.am31
-rw-r--r--man/sd_listen_fds.xml21
-rw-r--r--man/sd_notify.xml122
-rw-r--r--man/systemd.service.xml29
-rw-r--r--src/core/dbus-service.c1
-rw-r--r--src/core/load-fragment-gperf.gperf.m41
-rw-r--r--src/core/manager.c78
-rw-r--r--src/core/service.c166
-rw-r--r--src/core/service.h14
-rw-r--r--src/core/unit.c21
-rw-r--r--src/core/unit.h6
-rw-r--r--src/libsystemd/libsystemd.sym.m45
-rw-r--r--src/libsystemd/sd-daemon/sd-daemon.c94
-rw-r--r--src/shared/fdset.c54
-rw-r--r--src/shared/fdset.h11
-rw-r--r--src/shared/util.c25
-rw-r--r--src/shared/util.h2
-rw-r--r--src/systemd/sd-daemon.h13
18 files changed, 594 insertions, 100 deletions
diff --git a/Makefile-man.am b/Makefile-man.am
index 68e7483e7..8dc8febcf 100644
--- a/Makefile-man.am
+++ b/Makefile-man.am
@@ -198,6 +198,9 @@ MANPAGES_ALIAS += \
man/sd_journal_wait.3 \
man/sd_machine_get_ifindices.3 \
man/sd_notifyf.3 \
+ man/sd_pid_notify.3 \
+ man/sd_pid_notify_with_fds.3 \
+ man/sd_pid_notifyf.3 \
man/sleep.conf.d.5 \
man/system.conf.d.5 \
man/systemd-ask-password-console.path.8 \
@@ -308,6 +311,9 @@ man/sd_journal_test_cursor.3: man/sd_journal_get_cursor.3
man/sd_journal_wait.3: man/sd_journal_get_fd.3
man/sd_machine_get_ifindices.3: man/sd_machine_get_class.3
man/sd_notifyf.3: man/sd_notify.3
+man/sd_pid_notify.3: man/sd_notify.3
+man/sd_pid_notify_with_fds.3: man/sd_notify.3
+man/sd_pid_notifyf.3: man/sd_notify.3
man/sleep.conf.d.5: man/systemd-sleep.conf.5
man/system.conf.d.5: man/systemd-system.conf.5
man/systemd-ask-password-console.path.8: man/systemd-ask-password-console.service.8
@@ -566,6 +572,15 @@ man/sd_machine_get_ifindices.html: man/sd_machine_get_class.html
man/sd_notifyf.html: man/sd_notify.html
$(html-alias)
+man/sd_pid_notify.html: man/sd_notify.html
+ $(html-alias)
+
+man/sd_pid_notify_with_fds.html: man/sd_notify.html
+ $(html-alias)
+
+man/sd_pid_notifyf.html: man/sd_notify.html
+ $(html-alias)
+
man/sleep.conf.d.html: man/systemd-sleep.conf.html
$(html-alias)
@@ -674,12 +689,6 @@ man/systemd-user.conf.html: man/systemd-system.conf.html
man/user.conf.d.html: man/systemd-system.conf.html
$(html-alias)
-if ENABLE_HWDB
-MANPAGES += \
- man/hwdb.7 \
- man/systemd-hwdb.8
-
-endif
if ENABLE_BACKLIGHT
MANPAGES += \
@@ -750,6 +759,16 @@ man/systemd-hostnamed.html: man/systemd-hostnamed.service.html
endif
+if ENABLE_HWDB
+MANPAGES += \
+ man/hwdb.7 \
+ man/systemd-hwdb.8
+MANPAGES_ALIAS += \
+ #
+
+
+endif
+
if ENABLE_KDBUS
MANPAGES += \
man/sd_bus_creds_get_pid.3 \
diff --git a/man/sd_listen_fds.xml b/man/sd_listen_fds.xml
index 6999db980..437774563 100644
--- a/man/sd_listen_fds.xml
+++ b/man/sd_listen_fds.xml
@@ -73,7 +73,7 @@
<para>If the <parameter>unset_environment</parameter>
parameter is non-zero,
<function>sd_listen_fds()</function> will unset the
- <varname>$LISTEN_FDS</varname>/<varname>$LISTEN_PID</varname>
+ <varname>$LISTEN_FDS</varname> and <varname>$LISTEN_PID</varname>
environment variables before returning (regardless of
whether the function call itself succeeded or
not). Further calls to
@@ -83,10 +83,11 @@
<para>If a daemon receives more than one file
descriptor, they will be passed in the same order as
- configured in the systemd socket definition
- file. Nonetheless, it is recommended to verify the
- correct socket types before using them. To simplify
- this checking, the functions
+ configured in the systemd socket unit file (see
+ <citerefentry><refentrytitle>systemd.socket</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+ for details). Nonetheless, it is recommended to verify
+ the correct socket types before using them. To
+ simplify this checking, the functions
<citerefentry><refentrytitle>sd_is_fifo</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_is_socket</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_is_socket_inet</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
@@ -103,6 +104,16 @@
<para>This function call will set the FD_CLOEXEC flag
for all passed file descriptors to avoid further
inheritance to children of the calling process.</para>
+
+ <para>If multiple socket units activate the same
+ service the order of the file descriptors passed to
+ its main process is undefined. If additional file
+ descriptors have been passed to the service manager
+ using
+ <citerefentry><refentrytitle>sd_pid_notify_with_fds</refentrytitle><manvolnum>3</manvolnum></citerefentry>'s
+ <literal>FDSTORE=1</literal> messages, these file
+ descriptors are passed last, in arbitrary order, and
+ with duplicates removed.</para>
</refsect1>
<refsect1>
diff --git a/man/sd_notify.xml b/man/sd_notify.xml
index 35f6f71ab..2bf3383c0 100644
--- a/man/sd_notify.xml
+++ b/man/sd_notify.xml
@@ -46,6 +46,9 @@
<refnamediv>
<refname>sd_notify</refname>
<refname>sd_notifyf</refname>
+ <refname>sd_pid_notify</refname>
+ <refname>sd_pid_notifyf</refname>
+ <refname>sd_pid_notify_with_fds</refname>
<refpurpose>Notify service manager about start-up completion and other service status changes</refpurpose>
</refnamediv>
@@ -65,6 +68,30 @@
<paramdef>const char *<parameter>format</parameter></paramdef>
<paramdef>...</paramdef>
</funcprototype>
+
+ <funcprototype>
+ <funcdef>int <function>sd_pid_notify</function></funcdef>
+ <paramdef>pid_t <parameter>pid</parameter></paramdef>
+ <paramdef>int <parameter>unset_environment</parameter></paramdef>
+ <paramdef>const char *<parameter>state</parameter></paramdef>
+ </funcprototype>
+
+ <funcprototype>
+ <funcdef>int <function>sd_pid_notifyf</function></funcdef>
+ <paramdef>pid_t <parameter>pid</parameter></paramdef>
+ <paramdef>int <parameter>unset_environment</parameter></paramdef>
+ <paramdef>const char *<parameter>format</parameter></paramdef>
+ <paramdef>...</paramdef>
+ </funcprototype>
+
+ <funcprototype>
+ <funcdef>int <function>sd_pid_notify_with_fds</function></funcdef>
+ <paramdef>pid_t <parameter>pid</parameter></paramdef>
+ <paramdef>int <parameter>unset_environment</parameter></paramdef>
+ <paramdef>const char *<parameter>state</parameter></paramdef>
+ <paramdef>const int *<parameter>fds</parameter></paramdef>
+ <paramdef>unsigned <parameter>n_fds</parameter></paramdef>
+ </funcprototype>
</funcsynopsis>
</refsynopsisdiv>
@@ -175,7 +202,7 @@
<varlistentry>
<term>MAINPID=...</term>
- <listitem><para>The main pid of the
+ <listitem><para>The main process ID (PID) of the
service, in case the service manager did
not fork off the process
itself. Example:
@@ -185,7 +212,7 @@
<varlistentry>
<term>WATCHDOG=1</term>
- <listitem><para>Tells systemd to
+ <listitem><para>Tells the service manager to
update the watchdog timestamp. This is
the keep-alive ping that services need
to issue in regular intervals if
@@ -199,12 +226,53 @@
check if the the watchdog is enabled.
</para></listitem>
</varlistentry>
+
+
+ <varlistentry>
+ <term>FDSTORE=1</term>
+
+ <listitem><para>Stores additional file
+ descriptors in the service
+ manager. File descriptors sent this
+ way will be maintained per-service by
+ the service manager and be passed
+ again using the usual file descriptor
+ passing logic on the next invocation
+ of the service (see
+ <citerefentry><refentrytitle>sd_listen_fds</refentrytitle><manvolnum>3</manvolnum></citerefentry>). This
+ is useful for implementing service
+ restart schemes where services
+ serialize their state to
+ <filename>/run</filename>, push their
+ file descriptors to the system
+ manager, and are then restarted,
+ retrieving their state again via
+ socket passing and
+ <filename>/run</filename>. Note that
+ the service manager will accept
+ messages for a service only if
+ <varname>FileDescriptorStoreMax=</varname>
+ is set to non-zero for it (defaults to
+ zero). See
+ <citerefentry><refentrytitle>systemd.service</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+ for details. Multiple arrays of file
+ descriptors may be sent in seperate
+ messages, in which case the arrays are
+ combined. Note that the service
+ manager removes duplicate file
+ descriptors before passing them to the
+ service. Use
+ <function>sd_pid_notify_with_fds()</function>
+ to send messages with
+ <literal>FDSTORE=1</literal>, see
+ below.</para></listitem>
+ </varlistentry>
+
</variablelist>
<para>It is recommended to prefix variable names that
- are not shown in the list above with
- <varname>X_</varname> to avoid namespace
- clashes.</para>
+ are not listed above with <varname>X_</varname> to
+ avoid namespace clashes.</para>
<para>Note that systemd will accept status data sent
from a service only if the
@@ -217,6 +285,36 @@
<function>sd_notify()</function> but takes a
<function>printf()</function>-like format string plus
arguments.</para>
+
+ <para><function>sd_pid_notify()</function> and
+ <function>sd_pid_notifyf()</function> are similar to
+ <function>sd_notify()</function> and
+ <function>sd_notifyf()</function> but take a process
+ ID (PID) to use as originating PID for the message as
+ first argument. This is useful to send notification
+ messages on behalf of other processes, provided the
+ appropriate privileges are available. If the PID
+ argument is specified as 0 the process ID of the
+ calling process is used, in which case the calls are
+ fully equivalent to <function>sd_notify()</function>
+ and <function>sd_notifyf()</function>.</para>
+
+ <para><function>sd_pid_notify_with_fds()</function> is
+ similar to <function>sd_pid_notify()</function> but
+ takes an additional array of file descriptors. These
+ file descriptors are sent along the notification
+ message to the service manager. This is particularly
+ useful for sending <literal>FDSTORE=1</literal>
+ messages, as described above. The additional arguments
+ are a pointer to the file descriptor array plus the
+ number of file descriptors in the array. If the number
+ of file descriptors is passed as 0, the call is fully
+ equivalent to <function>sd_pid_notify()</function>,
+ i.e. no file descriptors are passed. Note that sending
+ file descriptors to the service manager on messages
+ that do not expect them (i.e. without
+ <literal>FDSTORE=1</literal>) they are immediately
+ closed on reception.</para>
</refsect1>
<refsect1>
@@ -295,13 +393,25 @@
<example>
<title>Error Cause Notification</title>
- <para>A service could send the following shortly before exiting, on failure</para>
+ <para>A service could send the following shortly before exiting, on failure:</para>
<programlisting>sd_notifyf(0, "STATUS=Failed to start up: %s\n"
"ERRNO=%i",
strerror(errno),
errno);</programlisting>
</example>
+
+ <example>
+ <title>Store a File Descriptor in the Service Manager</title>
+
+ <para>To store an open file descriptor in the
+ service manager, in order to continue
+ operation after a service restart without
+ losing state use
+ <literal>FDSTORE=1</literal>:</para>
+
+ <programlisting>sd_pid_notify_with_fds(0, 0, "FDSTORE=1", &amp;fd, 1);</programlisting>
+ </example>
</refsect1>
<refsect1>
diff --git a/man/systemd.service.xml b/man/systemd.service.xml
index 0b68aa089..4c890dfb7 100644
--- a/man/systemd.service.xml
+++ b/man/systemd.service.xml
@@ -1117,6 +1117,35 @@
command.</para></listitem>
</varlistentry>
+ <varlistentry>
+ <term><varname>FileDescriptorStoreMax=</varname></term>
+ <listitem><para>Configure how many
+ file descriptors may be stored in the
+ service manager for the service using
+ <citerefentry><refentrytitle>sd_pid_notify_with_fds</refentrytitle><manvolnum>3</manvolnum></citerefentry>'s
+ <literal>FDSTORE=1</literal>
+ messages. This is useful for
+ implementing service restart schemes
+ where the state is serialized to
+ <filename>/run</filename> and the file
+ descriptors passed to the service
+ manager, to allow restarts without
+ losing state. Defaults to 0, i.e. no
+ file descriptors may be stored in the
+ service manager by default. All file
+ descriptors passed to the service
+ manager from a specific service are
+ passed back to the service's main
+ process on the next service
+ restart. Any file descriptors passed
+ to the service manager are
+ automatically closed when POLLHUP or
+ POLLERR is seen on them, or when the
+ service is fully stopped and no job
+ queued or being executed for
+ it.</para></listitem>
+ </varlistentry>
+
</variablelist>
<para>Check
diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c
index 2b50ac93d..6d4713bab 100644
--- a/src/core/dbus-service.c
+++ b/src/core/dbus-service.c
@@ -59,6 +59,7 @@ const sd_bus_vtable bus_service_vtable[] = {
SD_BUS_PROPERTY("MainPID", "u", bus_property_get_pid, offsetof(Service, main_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Service, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("BusName", "s", NULL, offsetof(Service, bus_name), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FileDescriptorStoreMax", "u", NULL, offsetof(Service, n_fd_store_max), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("StatusText", "s", NULL, offsetof(Service, status_text), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("StatusErrno", "i", NULL, offsetof(Service, status_errno), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Service, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index e0ffaa605..9e87d91e7 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -226,6 +226,7 @@ Service.SuccessExitStatus, config_parse_set_status, 0,
Service.SysVStartPriority, config_parse_warn_compat, DISABLED_LEGACY, 0
Service.NonBlocking, config_parse_bool, 0, offsetof(Service, exec_context.non_blocking)
Service.BusName, config_parse_unit_string_printf, 0, offsetof(Service, bus_name)
+Service.FileDescriptorStoreMax, config_parse_unsigned, 0, offsetof(Service, n_fd_store_max)
Service.NotifyAccess, config_parse_notify_access, 0, offsetof(Service, notify_access)
Service.Sockets, config_parse_service_sockets, 0, 0
m4_ifdef(`ENABLE_KDBUS',
diff --git a/src/core/manager.c b/src/core/manager.c
index 519b37438..c18312a36 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -84,6 +84,9 @@
#define JOBS_IN_PROGRESS_PERIOD_USEC (USEC_PER_SEC / 3)
#define JOBS_IN_PROGRESS_PERIOD_DIVISOR 3
+#define NOTIFY_FD_MAX 768
+#define NOTIFY_BUFFER_MAX PIPE_BUF
+
static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
@@ -1449,7 +1452,7 @@ static unsigned manager_dispatch_dbus_queue(Manager *m) {
return n;
}
-static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, char *buf, size_t n) {
+static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, char *buf, size_t n, FDSet *fds) {
_cleanup_strv_free_ char **tags = NULL;
assert(m);
@@ -1466,12 +1469,13 @@ static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, char *
log_unit_debug(u->id, "Got notification message for unit %s", u->id);
if (UNIT_VTABLE(u)->notify_message)
- UNIT_VTABLE(u)->notify_message(u, pid, tags);
+ UNIT_VTABLE(u)->notify_message(u, pid, tags, fds);
}
static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
Manager *m = userdata;
ssize_t n;
+ int r;
assert(m);
assert(m->notify_fd == fd);
@@ -1482,73 +1486,101 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
}
for (;;) {
- char buf[4096];
+ _cleanup_fdset_free_ FDSet *fds = NULL;
+ char buf[NOTIFY_BUFFER_MAX+1];
struct iovec iovec = {
.iov_base = buf,
.iov_len = sizeof(buf)-1,
};
- bool found = false;
-
union {
struct cmsghdr cmsghdr;
- uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
+ uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(int) * NOTIFY_FD_MAX)];
} control = {};
-
struct msghdr msghdr = {
.msg_iov = &iovec,
.msg_iovlen = 1,
.msg_control = &control,
.msg_controllen = sizeof(control),
};
- struct ucred *ucred;
+ struct cmsghdr *cmsg;
+ struct ucred *ucred = NULL;
+ bool found = false;
Unit *u1, *u2, *u3;
+ int *fd_array = NULL;
+ unsigned n_fds = 0;
- n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT);
- if (n <= 0) {
- if (n == 0)
- return -EIO;
-
+ n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (n < 0) {
if (errno == EAGAIN || errno == EINTR)
break;
return -errno;
}
+ if (n == 0)
+ return -ECONNRESET;
+
+ for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+
+ fd_array = (int*) CMSG_DATA(cmsg);
+ n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+
+ } else if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
- if (msghdr.msg_controllen < CMSG_LEN(sizeof(struct ucred)) ||
- control.cmsghdr.cmsg_level != SOL_SOCKET ||
- control.cmsghdr.cmsg_type != SCM_CREDENTIALS ||
- control.cmsghdr.cmsg_len != CMSG_LEN(sizeof(struct ucred))) {
- log_warning("Received notify message without credentials. Ignoring.");
+ ucred = (struct ucred*) CMSG_DATA(cmsg);
+ }
+ }
+
+ if (n_fds > 0) {
+ assert(fd_array);
+
+ r = fdset_new_array(&fds, fd_array, n_fds);
+ if (r < 0) {
+ close_many(fd_array, n_fds);
+ return log_oom();
+ }
+ }
+
+ if (!ucred || ucred->pid <= 0) {
+ log_warning("Received notify message without valid credentials. Ignoring.");
continue;
}
- ucred = (struct ucred*) CMSG_DATA(&control.cmsghdr);
+ if ((size_t) n >= sizeof(buf)) {
+ log_warning("Received notify message exceeded maximum size. Ignoring.");
+ continue;
+ }
- assert((size_t) n < sizeof(buf));
buf[n] = 0;
/* Notify every unit that might be interested, but try
* to avoid notifying the same one multiple times. */
u1 = manager_get_unit_by_pid(m, ucred->pid);
if (u1) {
- manager_invoke_notify_message(m, u1, ucred->pid, buf, n);
+ manager_invoke_notify_message(m, u1, ucred->pid, buf, n, fds);
found = true;
}
u2 = hashmap_get(m->watch_pids1, LONG_TO_PTR(ucred->pid));
if (u2 && u2 != u1) {
- manager_invoke_notify_message(m, u2, ucred->pid, buf, n);
+ manager_invoke_notify_message(m, u2, ucred->pid, buf, n, fds);
found = true;
}
u3 = hashmap_get(m->watch_pids2, LONG_TO_PTR(ucred->pid));
if (u3 && u3 != u2 && u3 != u1) {
- manager_invoke_notify_message(m, u3, ucred->pid, buf, n);
+ manager_invoke_notify_message(m, u3, ucred->pid, buf, n, fds);
found = true;
}
if (!found)
log_warning("Cannot find unit for notify message of PID "PID_FMT".", ucred->pid);
+
+ if (fdset_size(fds) > 0)
+ log_warning("Got auxiliary fds with notification message, closing all.");
}
return 0;
diff --git a/src/core/service.c b/src/core/service.c
index bfbe959ed..78232ee71 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -242,6 +242,42 @@ static void service_reset_watchdog(Service *s) {
service_start_watchdog(s);
}
+static void service_fd_store_unlink(ServiceFDStore *fs) {
+
+ if (!fs)
+ return;
+
+ if (fs->service) {
+ assert(fs->service->n_fd_store > 0);
+ LIST_REMOVE(fd_store, fs->service->fd_store, fs);
+ fs->service->n_fd_store--;
+ }
+
+ if (fs->event_source) {
+ sd_event_source_set_enabled(fs->event_source, SD_EVENT_OFF);
+ sd_event_source_unref(fs->event_source);
+ }
+
+ safe_close(fs->fd);
+ free(fs);
+}
+
+static void service_release_resources(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ if (!s->fd_store)
+ return;
+
+ log_debug("Releasing all resources for %s", u->id);
+
+ while (s->fd_store)
+ service_fd_store_unlink(s->fd_store);
+
+ assert(s->n_fd_store == 0);
+}
+
static void service_done(Unit *u) {
Service *s = SERVICE(u);
@@ -286,6 +322,8 @@ static void service_done(Unit *u) {
service_stop_watchdog(s);
s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+
+ service_release_resources(u);
}
static int service_arm_timer(Service *s, usec_t usec) {
@@ -549,6 +587,14 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
if (s->status_text)
fprintf(f, "%sStatus Text: %s\n",
prefix, s->status_text);
+
+ if (s->n_fd_store_max > 0) {
+ fprintf(f,
+ "%sFile Descriptor Store Max: %u\n"
+ "%sFile Descriptor Store Current: %u\n",
+ prefix, s->n_fd_store_max,
+ prefix, s->n_fd_store);
+ }
}
static int service_load_pid_file(Service *s, bool may_warn) {
@@ -806,10 +852,10 @@ static int service_coldplug(Unit *u) {
}
static int service_collect_fds(Service *s, int **fds, unsigned *n_fds) {
+ _cleanup_free_ int *rfds = NULL;
+ unsigned rn_fds = 0;
Iterator i;
int r;
- int *rfds = NULL;
- unsigned rn_fds = 0;
Unit *u;
assert(s);
@@ -831,10 +877,12 @@ static int service_collect_fds(Service *s, int **fds, unsigned *n_fds) {
r = socket_collect_fds(sock, &cfds, &cn_fds);
if (r < 0)
- goto fail;
+ return r;
- if (!cfds)
+ if (cn_fds <= 0) {
+ free(cfds);
continue;
+ }
if (!rfds) {
rfds = cfds;
@@ -842,32 +890,39 @@ static int service_collect_fds(Service *s, int **fds, unsigned *n_fds) {
} else {
int *t;
- t = new(int, rn_fds+cn_fds);
+ t = realloc(rfds, (rn_fds + cn_fds) * sizeof(int));
if (!t) {
free(cfds);
- r = -ENOMEM;
- goto fail;
+ return -ENOMEM;
}
- memcpy(t, rfds, rn_fds * sizeof(int));
- memcpy(t+rn_fds, cfds, cn_fds * sizeof(int));
- free(rfds);
+ memcpy(t + rn_fds, cfds, cn_fds * sizeof(int));
+ rfds = t;
+ rn_fds += cn_fds;
+
free(cfds);
- rfds = t;
- rn_fds = rn_fds+cn_fds;
}
}
+ if (s->n_fd_store > 0) {
+ ServiceFDStore *fs;
+ int *t;
+
+ t = realloc(rfds, (rn_fds + s->n_fd_store) * sizeof(int));
+ if (!t)
+ return -ENOMEM;
+
+ rfds = t;
+ LIST_FOREACH(fd_store, fs, s->fd_store)
+ rfds[rn_fds++] = fs->fd;
+ }
+
*fds = rfds;
*n_fds = rn_fds;
+ rfds = NULL;
return 0;
-
-fail:
- free(rfds);
-
- return r;
}
static int service_spawn(
@@ -2543,7 +2598,75 @@ static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void
return 0;
}
-static void service_notify_message(Unit *u, pid_t pid, char **tags) {
+static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
+ ServiceFDStore *fs = userdata;
+
+ assert(e);
+ assert(fs);
+
+ /* If we get either EPOLLHUP or EPOLLERR, it's time to remove this entry from the fd store */
+ service_fd_store_unlink(fs);
+ return 0;
+}
+
+static int service_add_fd_set(Service *s, FDSet *fds) {
+ int r;
+
+ assert(s);
+
+ if (fdset_size(fds) <= 0)
+ return 0;
+
+ while (s->n_fd_store < s->n_fd_store_max) {
+ _cleanup_close_ int fd = -1;
+ ServiceFDStore *fs;
+ bool same = false;
+
+ fd = fdset_steal_first(fds);
+ if (fd < 0)
+ break;
+
+ LIST_FOREACH(fd_store, fs, s->fd_store) {
+ r = same_fd(fs->fd, fd);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s)->id, r, "%s: Couldn't check if same fd: %m", UNIT(s)->id);
+ if (r > 0) {
+ same = true;
+ break;
+ }
+ }
+
+ if (same)
+ continue;
+
+ fs = new0(ServiceFDStore, 1);
+ if (!fs)
+ return log_oom();
+
+ fs->fd = fd;
+ fs->service = s;
+
+ r = sd_event_add_io(UNIT(s)->manager->event, &fs->event_source, fd, 0, on_fd_store_io, fs);
+ if (r < 0) {
+ free(fs);
+ return log_unit_error_errno(UNIT(s)->id, r, "%s: Failed to add even source: %m", UNIT(s)->id);
+ }
+
+ LIST_PREPEND(fd_store, s->fd_store, fs);
+ s->n_fd_store++;
+
+ fd = -1;
+
+ log_unit_debug(UNIT(s)->id, "%s: added fd to fd store.", UNIT(s)->id);
+ }
+
+ if (fdset_size(fds) > 0)
+ log_unit_warning(UNIT(s)->id, "%s: tried to store more fds than FDStoreMax=%u allows, closing remaining.", UNIT(s)->id, s->n_fd_store_max);
+
+ return 0;
+}
+
+static void service_notify_message(Unit *u, pid_t pid, char **tags, FDSet *fds) {
Service *s = SERVICE(u);
_cleanup_free_ char *cc = NULL;
bool notify_dbus = false;
@@ -2675,6 +2798,12 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags) {
service_reset_watchdog(s);
}
+ /* Add the passed fds to the fd store */
+ if (strv_find(tags, "FDSTORE=1")) {
+ log_unit_debug(u->id, "%s: got FDSTORE=1", u->id);
+ service_add_fd_set(s, fds);
+ }
+
/* Notify clients about changed status or main pid */
if (notify_dbus)
unit_add_to_dbus_queue(u);
@@ -2917,6 +3046,7 @@ const UnitVTable service_vtable = {
.init = service_init,
.done = service_done,
.load = service_load,
+ .release_resources = service_release_resources,
.coldplug = service_coldplug,
diff --git a/src/core/service.h b/src/core/service.h
index f6a78c403..dfeee6a68 100644
--- a/src/core/service.h
+++ b/src/core/service.h
@@ -22,6 +22,7 @@
***/
typedef struct Service Service;
+typedef struct ServiceFDStore ServiceFDStore;
#include "unit.h"
#include "path.h"
@@ -115,6 +116,15 @@ typedef enum ServiceResult {
_SERVICE_RESULT_INVALID = -1
} ServiceResult;
+struct ServiceFDStore {
+ Service *service;
+
+ int fd;
+ sd_event_source *event_source;
+
+ LIST_FIELDS(ServiceFDStore, fd_store);
+};
+
struct Service {
Unit meta;
@@ -198,6 +208,10 @@ struct Service {
NotifyAccess notify_access;
NotifyState notify_state;
+
+ ServiceFDStore *fd_store;
+ unsigned n_fd_store;
+ unsigned n_fd_store_max;
};
extern const UnitVTable service_vtable;
diff --git a/src/core/unit.c b/src/core/unit.c
index 229bd0f73..7311c5804 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -278,21 +278,32 @@ int unit_set_description(Unit *u, const char *description) {
}
bool unit_check_gc(Unit *u) {
+ UnitActiveState state;
assert(u);
- if (UNIT_VTABLE(u)->no_gc)
+ if (u->job)
return true;
- if (u->no_gc)
+ if (u->nop_job)
return true;
- if (u->job)
+ state = unit_active_state(u);
+
+ /* If the unit is inactive and failed and no job is queued for
+ * it, then release its runtime resources */
+ if (UNIT_IS_INACTIVE_OR_FAILED(state) &&
+ UNIT_VTABLE(u)->release_resources)
+ UNIT_VTABLE(u)->release_resources(u);
+
+ /* But we keep the unit object around for longer when it is
+ * referenced or configured to not be gc'ed */
+ if (state != UNIT_INACTIVE)
return true;
- if (u->nop_job)
+ if (UNIT_VTABLE(u)->no_gc)
return true;
- if (unit_active_state(u) != UNIT_INACTIVE)
+ if (u->no_gc)
return true;
if (u->refs)
diff --git a/src/core/unit.h b/src/core/unit.h
index 19fa2f058..53b8a7f66 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -345,6 +345,10 @@ struct UnitVTable {
* way */
bool (*check_gc)(Unit *u);
+ /* When the unit is not running and no job for it queued we
+ * shall release its runtime resources */
+ void (*release_resources)(Unit *u);
+
/* Return true when this unit is suitable for snapshotting */
bool (*check_snapshot)(Unit *u);
@@ -359,7 +363,7 @@ struct UnitVTable {
void (*notify_cgroup_empty)(Unit *u);
/* Called whenever a process of this unit sends us a message */
- void (*notify_message)(Unit *u, pid_t pid, char **tags);
+ void (*notify_message)(Unit *u, pid_t pid, char **tags, FDSet *fds);
/* Called whenever a name this Unit registered for comes or
* goes away. */
diff --git a/src/libsystemd/libsystemd.sym.m4 b/src/libsystemd/libsystemd.sym.m4
index 80a61baab..19a49f45d 100644
--- a/src/libsystemd/libsystemd.sym.m4
+++ b/src/libsystemd/libsystemd.sym.m4
@@ -158,6 +158,11 @@ global:
sd_session_get_desktop;
} LIBSYSTEMD_216;
+LIBSYSTEMD_219 {
+global:
+ sd_pid_notify_with_fds;
+} LIBSYSTEMD_217;
+
m4_ifdef(`ENABLE_KDBUS',
LIBSYSTEMD_FUTURE {
global:
diff --git a/src/libsystemd/sd-daemon/sd-daemon.c b/src/libsystemd/sd-daemon/sd-daemon.c
index 1f2a53393..028c2a7a5 100644
--- a/src/libsystemd/sd-daemon/sd-daemon.c
+++ b/src/libsystemd/sd-daemon/sd-daemon.c
@@ -340,16 +340,28 @@ _public_ int sd_is_mq(int fd, const char *path) {
return 1;
}
-_public_ int sd_pid_notify(pid_t pid, int unset_environment, const char *state) {
- union sockaddr_union sockaddr = {};
- _cleanup_close_ int fd = -1;
- struct msghdr msghdr = {};
- struct iovec iovec = {};
- const char *e;
+_public_ int sd_pid_notify_with_fds(pid_t pid, int unset_environment, const char *state, const int *fds, unsigned n_fds) {
+ union sockaddr_union sockaddr = {
+ .sa.sa_family = AF_UNIX,
+ };
+ struct iovec iovec = {
+ .iov_base = (char*) state,
+ };
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_name = &sockaddr,
+ };
union {
struct cmsghdr cmsghdr;
- uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
- } control = {};
+ uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(int) * n_fds)];
+ } control;
+ _cleanup_close_ int fd = -1;
+ struct cmsghdr *cmsg = NULL;
+ const char *e;
+ size_t controllen_without_ucred = 0;
+ bool try_without_ucred = false;
int r;
if (!state) {
@@ -357,6 +369,11 @@ _public_ int sd_pid_notify(pid_t pid, int unset_environment, const char *state)
goto finish;
}
+ if (n_fds > 0 && !fds) {
+ r = -EINVAL;
+ goto finish;
+ }
+
e = getenv("NOTIFY_SOCKET");
if (!e)
return 0;
@@ -373,42 +390,50 @@ _public_ int sd_pid_notify(pid_t pid, int unset_environment, const char *state)
goto finish;
}
- sockaddr.sa.sa_family = AF_UNIX;
- strncpy(sockaddr.un.sun_path, e, sizeof(sockaddr.un.sun_path));
+ iovec.iov_len = strlen(state);
+ strncpy(sockaddr.un.sun_path, e, sizeof(sockaddr.un.sun_path));
if (sockaddr.un.sun_path[0] == '@')
sockaddr.un.sun_path[0] = 0;
- iovec.iov_base = (char*) state;
- iovec.iov_len = strlen(state);
-
- msghdr.msg_name = &sockaddr;
msghdr.msg_namelen = offsetof(struct sockaddr_un, sun_path) + strlen(e);
-
if (msghdr.msg_namelen > sizeof(struct sockaddr_un))
msghdr.msg_namelen = sizeof(struct sockaddr_un);
- msghdr.msg_iov = &iovec;
- msghdr.msg_iovlen = 1;
+ if (n_fds > 0) {
+ msghdr.msg_control = &control;
+ msghdr.msg_controllen = CMSG_LEN(sizeof(int) * n_fds);
+
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int) * n_fds);
+
+ memcpy(CMSG_DATA(cmsg), fds, sizeof(int) * n_fds);
+ }
if (pid != 0 && pid != getpid()) {
- struct cmsghdr *cmsg;
- struct ucred ucred = {};
+ struct ucred *ucred;
+
+ try_without_ucred = true;
+ controllen_without_ucred = msghdr.msg_controllen;
msghdr.msg_control = &control;
- msghdr.msg_controllen = sizeof(control);
+ msghdr.msg_controllen += CMSG_LEN(sizeof(struct ucred));
+
+ if (cmsg)
+ cmsg = CMSG_NXTHDR(&msghdr, cmsg);
+ else
+ cmsg = CMSG_FIRSTHDR(&msghdr);
- cmsg = CMSG_FIRSTHDR(&msghdr);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_CREDENTIALS;
cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
- ucred.pid = pid;
- ucred.uid = getuid();
- ucred.gid = getgid();
-
- memcpy(CMSG_DATA(cmsg), &ucred, sizeof(struct ucred));
- msghdr.msg_controllen = cmsg->cmsg_len;
+ ucred = (struct ucred*) CMSG_DATA(cmsg);
+ ucred->pid = pid;
+ ucred->uid = getuid();
+ ucred->gid = getgid();
}
/* First try with fake ucred data, as requested */
@@ -417,10 +442,11 @@ _public_ int sd_pid_notify(pid_t pid, int unset_environment, const char *state)
goto finish;
}
- /* If that failed, try with our own instead */
- if (msghdr.msg_control) {
- msghdr.msg_control = NULL;
- msghdr.msg_controllen = 0;
+ /* If that failed, try with our own ucred instead */
+ if (try_without_ucred) {
+ if (controllen_without_ucred <= 0)
+ msghdr.msg_control = NULL;
+ msghdr.msg_controllen = controllen_without_ucred;
if (sendmsg(fd, &msghdr, MSG_NOSIGNAL) >= 0) {
r = 1;
@@ -437,8 +463,12 @@ finish:
return r;
}
+_public_ int sd_pid_notify(pid_t pid, int unset_environment, const char *state) {
+ return sd_pid_notify_with_fds(pid, unset_environment, state, NULL, 0);
+}
+
_public_ int sd_notify(int unset_environment, const char *state) {
- return sd_pid_notify(0, unset_environment, state);
+ return sd_pid_notify_with_fds(0, unset_environment, state, NULL, 0);
}
_public_ int sd_pid_notifyf(pid_t pid, int unset_environment, const char *format, ...) {
diff --git a/src/shared/fdset.c b/src/shared/fdset.c
index 46f7773a9..9e35ce5ce 100644
--- a/src/shared/fdset.c
+++ b/src/shared/fdset.c
@@ -41,7 +41,31 @@ FDSet *fdset_new(void) {
return MAKE_FDSET(set_new(NULL));
}
-void fdset_free(FDSet *s) {
+int fdset_new_array(FDSet **ret, int *fds, unsigned n_fds) {
+ unsigned i;
+ FDSet *s;
+ int r;
+
+ assert(ret);
+
+ s = fdset_new();
+ if (!s)
+ return -ENOMEM;
+
+ for (i = 0; i < n_fds; i++) {
+
+ r = fdset_put(s, fds[i]);
+ if (r < 0) {
+ set_free(MAKE_SET(s));
+ return r;
+ }
+ }
+
+ *ret = s;
+ return 0;
+}
+
+FDSet* fdset_free(FDSet *s) {
void *p;
while ((p = set_steal_first(MAKE_SET(s)))) {
@@ -61,6 +85,7 @@ void fdset_free(FDSet *s) {
}
set_free(MAKE_SET(s));
+ return NULL;
}
int fdset_put(FDSet *s, int fd) {
@@ -70,6 +95,19 @@ int fdset_put(FDSet *s, int fd) {
return set_put(MAKE_SET(s), FD_TO_PTR(fd));
}
+int fdset_consume(FDSet *s, int fd) {
+ int r;
+
+ assert(s);
+ assert(fd >= 0);
+
+ r = fdset_put(s, fd);
+ if (r <= 0)
+ safe_close(fd);
+
+ return r;
+}
+
int fdset_put_dup(FDSet *s, int fd) {
int copy, r;
@@ -223,6 +261,10 @@ unsigned fdset_size(FDSet *fds) {
return set_size(MAKE_SET(fds));
}
+bool fdset_isempty(FDSet *fds) {
+ return set_isempty(MAKE_SET(fds));
+}
+
int fdset_iterate(FDSet *s, Iterator *i) {
void *p;
@@ -232,3 +274,13 @@ int fdset_iterate(FDSet *s, Iterator *i) {
return PTR_TO_FD(p);
}
+
+int fdset_steal_first(FDSet *fds) {
+ void *p;
+
+ p = set_steal_first(MAKE_SET(fds));
+ if (!p)
+ return -ENOENT;
+
+ return PTR_TO_FD(p);
+}
diff --git a/src/shared/fdset.h b/src/shared/fdset.h
index 907acd76d..c3c5e5228 100644
--- a/src/shared/fdset.h
+++ b/src/shared/fdset.h
@@ -27,25 +27,30 @@
typedef struct FDSet FDSet;
FDSet* fdset_new(void);
-void fdset_free(FDSet *s);
+FDSet* fdset_free(FDSet *s);
int fdset_put(FDSet *s, int fd);
int fdset_put_dup(FDSet *s, int fd);
+int fdset_consume(FDSet *s, int fd);
bool fdset_contains(FDSet *s, int fd);
int fdset_remove(FDSet *s, int fd);
-int fdset_new_fill(FDSet **_s);
-int fdset_new_listen_fds(FDSet **_s, bool unset);
+int fdset_new_array(FDSet **ret, int *fds, unsigned n_fds);
+int fdset_new_fill(FDSet **ret);
+int fdset_new_listen_fds(FDSet **ret, bool unset);
int fdset_cloexec(FDSet *fds, bool b);
int fdset_close_others(FDSet *fds);
unsigned fdset_size(FDSet *fds);
+bool fdset_isempty(FDSet *fds);
int fdset_iterate(FDSet *s, Iterator *i);
+int fdset_steal_first(FDSet *fds);
+
#define FDSET_FOREACH(fd, fds, i) \
for ((i) = ITERATOR_FIRST, (fd) = fdset_iterate((fds), &(i)); (fd) >= 0; (fd) = fdset_iterate((fds), &(i)))
diff --git a/src/shared/util.c b/src/shared/util.c
index bda3c93ae..f01022ed0 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -7672,3 +7672,28 @@ int fd_setcrtime(int fd, usec_t usec) {
return 0;
}
+
+int same_fd(int a, int b) {
+ struct stat sta, stb;
+
+ assert(a >= 0);
+ assert(b >= 0);
+
+ if (a == b)
+ return true;
+
+ if (fstat(a, &sta) < 0)
+ return -errno;
+
+ if (fstat(b, &stb) < 0)
+ return -errno;
+
+ if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
+ return false;
+
+ if (S_ISREG(sta.st_mode) || S_ISDIR(sta.st_mode) || S_ISFIFO(sta.st_mode) || S_ISSOCK(sta.st_mode) || S_ISLNK(sta.st_mode))
+ return (sta.st_dev == stb.st_dev) && (sta.st_ino == stb.st_ino);
+
+ /* We consider all device fds different... */
+ return false;
+}
diff --git a/src/shared/util.h b/src/shared/util.h
index a131a3c0f..4b7e12e62 100644
--- a/src/shared/util.h
+++ b/src/shared/util.h
@@ -1069,3 +1069,5 @@ int fd_setcrtime(int fd, usec_t usec);
int fd_getcrtime(int fd, usec_t *usec);
int path_getcrtime(const char *p, usec_t *usec);
int fd_getcrtime_at(int dirfd, const char *name, usec_t *usec, int flags);
+
+int same_fd(int a, int b);
diff --git a/src/systemd/sd-daemon.h b/src/systemd/sd-daemon.h
index 351b4e59c..b878b4d8a 100644
--- a/src/systemd/sd-daemon.h
+++ b/src/systemd/sd-daemon.h
@@ -190,6 +190,12 @@ int sd_is_mq(int fd, const char *path);
timestamps to detect failed services. Also see
sd_watchdog_enabled() below.
+ FDSTORE=1 Store the file descriptors passed along with the
+ message in the per-service file descriptor store,
+ and pass them to the main process again on next
+ invocation. This variable is only supported with
+ sd_pid_notify_with_fds().
+
Daemons can choose to send additional variables. However, it is
recommended to prefix variable names not listed above with X_.
@@ -243,6 +249,13 @@ int sd_pid_notify(pid_t pid, int unset_environment, const char *state);
int sd_pid_notifyf(pid_t pid, int unset_environment, const char *format, ...) _sd_printf_(3,4);
/*
+ Similar to sd_pid_notify(), but also passes the specified fd array
+ to the service manager for storage. This is particularly useful for
+ FDSTORE=1 messages.
+*/
+int sd_pid_notify_with_fds(pid_t pid, int unset_environment, const char *state, const int *fds, unsigned n_fds);
+
+/*
Returns > 0 if the system was booted with systemd. Returns < 0 on
error. Returns 0 if the system was not booted with systemd. Note
that all of the functions above handle non-systemd boots just