summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/basic/fd-util.c199
-rw-r--r--src/basic/fd-util.h2
-rw-r--r--src/test/test-fd-util.c88
3 files changed, 289 insertions, 0 deletions
diff --git a/src/basic/fd-util.c b/src/basic/fd-util.c
index 5adbd05f6..9b2fd332b 100644
--- a/src/basic/fd-util.c
+++ b/src/basic/fd-util.c
@@ -12,10 +12,13 @@
#include <sys/stat.h>
#include <unistd.h>
+//#include "alloc-util.h"
+//#include "copy.h"
#include "dirent-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
+//#include "io-util.h"
#include "macro.h"
#include "memfd-util.h"
#include "missing.h"
@@ -566,6 +569,202 @@ try_dev_shm_without_o_tmpfile:
return -EOPNOTSUPP;
}
+/* When the data is smaller or equal to 64K, try to place the copy in a memfd/pipe */
+#define DATA_FD_MEMORY_LIMIT (64U*1024U)
+
+/* If memfd/pipe didn't work out, then let's use a file in /tmp up to a size of 1M. If it's large than that use /var/tmp instead. */
+#define DATA_FD_TMP_LIMIT (1024U*1024U)
+
+int fd_duplicate_data_fd(int fd) {
+
+ _cleanup_close_ int copy_fd = -1, tmp_fd = -1;
+ _cleanup_free_ void *remains = NULL;
+ _cleanup_free_ char *t = NULL;
+ size_t remains_size = 0;
+ const char *td;
+ struct stat st;
+ int r;
+
+ /* Creates a 'data' fd from the specified source fd, containing all the same data in a read-only fashion, but
+ * independent of it (i.e. the source fd can be closed and unmounted after this call succeeded). Tries to be
+ * somewhat smart about where to place the data. In the best case uses a memfd(). If memfd() are not supported
+ * uses a pipe instead. For larger data will use an unlinked file in /tmp, and for even larger data one in
+ * /var/tmp. */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* For now, let's only accept regular files, sockets, pipes and char devices */
+ if (S_ISDIR(st.st_mode))
+ return -EISDIR;
+ if (S_ISLNK(st.st_mode))
+ return -ELOOP;
+ if (!S_ISREG(st.st_mode) && !S_ISSOCK(st.st_mode) && !S_ISFIFO(st.st_mode) && !S_ISCHR(st.st_mode))
+ return -EBADFD;
+
+ /* If we have reason to believe the data is bounded in size, then let's use memfds or pipes as backing fd. Note
+ * that we use the reported regular file size only as a hint, given that there are plenty special files in
+ * /proc and /sys which report a zero file size but can be read from. */
+
+ if (!S_ISREG(st.st_mode) || st.st_size < DATA_FD_MEMORY_LIMIT) {
+
+ /* Try a memfd first */
+ copy_fd = memfd_new("data-fd");
+ if (copy_fd >= 0) {
+ off_t f;
+
+ r = copy_bytes(fd, copy_fd, DATA_FD_MEMORY_LIMIT, 0);
+ if (r < 0)
+ return r;
+
+ f = lseek(copy_fd, 0, SEEK_SET);
+ if (f != 0)
+ return -errno;
+
+ if (r == 0) {
+ /* Did it fit into the limit? If so, we are done. */
+ r = memfd_set_sealed(copy_fd);
+ if (r < 0)
+ return r;
+
+ return TAKE_FD(copy_fd);
+ }
+
+ /* Hmm, pity, this didn't fit. Let's fall back to /tmp then, see below */
+
+ } else {
+ _cleanup_(close_pairp) int pipefds[2] = { -1, -1 };
+ int isz;
+
+ /* If memfds aren't available, use a pipe. Set O_NONBLOCK so that we will get EAGAIN rather
+ * then block indefinitely when we hit the pipe size limit */
+
+ if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
+ return -errno;
+
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+
+ /* Try to enlarge the pipe size if necessary */
+ if ((size_t) isz < DATA_FD_MEMORY_LIMIT) {
+
+ (void) fcntl(pipefds[1], F_SETPIPE_SZ, DATA_FD_MEMORY_LIMIT);
+
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+ }
+
+ if ((size_t) isz >= DATA_FD_MEMORY_LIMIT) {
+
+ r = copy_bytes_full(fd, pipefds[1], DATA_FD_MEMORY_LIMIT, 0, &remains, &remains_size);
+ if (r < 0 && r != -EAGAIN)
+ return r; /* If we get EAGAIN it could be because of the source or because of
+ * the destination fd, we can't know, as sendfile() and friends won't
+ * tell us. Hence, treat this as reason to fall back, just to be
+ * sure. */
+ if (r == 0) {
+ /* Everything fit in, yay! */
+ (void) fd_nonblock(pipefds[0], false);
+
+ return TAKE_FD(pipefds[0]);
+ }
+
+ /* Things didn't fit in. But we read data into the pipe, let's remember that, so that
+ * when writing the new file we incorporate this first. */
+ copy_fd = TAKE_FD(pipefds[0]);
+ }
+ }
+ }
+
+ /* If we have reason to believe this will fit fine in /tmp, then use that as first fallback. */
+ if ((!S_ISREG(st.st_mode) || st.st_size < DATA_FD_TMP_LIMIT) &&
+ (DATA_FD_MEMORY_LIMIT + remains_size) < DATA_FD_TMP_LIMIT) {
+ off_t f;
+
+ tmp_fd = open_tmpfile_unlinkable(NULL /* NULL as directory means /tmp */, O_RDWR|O_CLOEXEC);
+ if (tmp_fd < 0)
+ return tmp_fd;
+
+ if (copy_fd >= 0) {
+ /* If we tried a memfd/pipe first and it ended up being too large, then copy this into the
+ * temporary file first. */
+
+ r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, 0);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ }
+
+ if (remains_size > 0) {
+ /* If there were remaining bytes (i.e. read into memory, but not written out yet) from the
+ * failed copy operation, let's flush them out next. */
+
+ r = loop_write(tmp_fd, remains, remains_size, false);
+ if (r < 0)
+ return r;
+ }
+
+ r = copy_bytes(fd, tmp_fd, DATA_FD_TMP_LIMIT - DATA_FD_MEMORY_LIMIT - remains_size, COPY_REFLINK);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ goto finish; /* Yay, it fit in */
+
+ /* It didn't fit in. Let's not forget to use what we already used */
+ f = lseek(tmp_fd, 0, SEEK_SET);
+ if (f != 0)
+ return -errno;
+
+ safe_close(copy_fd);
+ copy_fd = TAKE_FD(tmp_fd);
+
+ remains = mfree(remains);
+ remains_size = 0;
+ }
+
+ /* As last fallback use /var/tmp */
+ r = var_tmp_dir(&td);
+ if (r < 0)
+ return r;
+
+ tmp_fd = open_tmpfile_unlinkable(td, O_RDWR|O_CLOEXEC);
+ if (tmp_fd < 0)
+ return tmp_fd;
+
+ if (copy_fd >= 0) {
+ /* If we tried a memfd/pipe first, or a file in /tmp, and it ended up being too large, than copy this
+ * into the temporary file first. */
+ r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ }
+
+ if (remains_size > 0) {
+ /* Then, copy in any read but not yet written bytes. */
+ r = loop_write(tmp_fd, remains, remains_size, false);
+ if (r < 0)
+ return r;
+ }
+
+ /* Copy in the rest */
+ r = copy_bytes(fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+
+finish:
+ /* Now convert the O_RDWR file descriptor into an O_RDONLY one (and as side effect seek to the beginning of the
+ * file again */
+
+ return fd_reopen(tmp_fd, O_RDONLY|O_CLOEXEC);
+}
+
int fd_move_above_stdio(int fd) {
int flags, copy;
PROTECT_ERRNO;
diff --git a/src/basic/fd-util.h b/src/basic/fd-util.h
index 78207ee8d..5d52e3b19 100644
--- a/src/basic/fd-util.h
+++ b/src/basic/fd-util.h
@@ -85,6 +85,8 @@ enum {
int acquire_data_fd(const void *data, size_t size, unsigned flags);
+int fd_duplicate_data_fd(int fd);
+
/* Hint: ENETUNREACH happens if we try to connect to "non-existing" special IP addresses, such as ::5 */
#define ERRNO_IS_DISCONNECT(r) \
IN_SET(r, ENOTCONN, ECONNRESET, ECONNREFUSED, ECONNABORTED, EPIPE, ENETUNREACH)
diff --git a/src/test/test-fd-util.c b/src/test/test-fd-util.c
index f266e580b..edcb8ab75 100644
--- a/src/test/test-fd-util.c
+++ b/src/test/test-fd-util.c
@@ -230,6 +230,93 @@ static void test_rearrange_stdio(void) {
}
}
+static void assert_equal_fd(int fd1, int fd2) {
+
+ for (;;) {
+ uint8_t a[4096], b[4096];
+ ssize_t x, y;
+
+ x = read(fd1, a, sizeof(a));
+ assert(x >= 0);
+
+ y = read(fd2, b, sizeof(b));
+ assert(y >= 0);
+
+ assert(x == y);
+
+ if (x == 0)
+ break;
+
+ assert(memcmp(a, b, x) == 0);
+ }
+}
+
+static void test_fd_duplicate_data_fd(void) {
+ _cleanup_close_ int fd1 = -1, fd2 = -1;
+ _cleanup_(close_pairp) int sfd[2] = { -1, -1 };
+ _cleanup_(sigkill_waitp) pid_t pid = -1;
+ uint64_t i, j;
+ int r;
+
+ fd1 = open("/etc/fstab", O_RDONLY|O_CLOEXEC);
+ if (fd1 >= 0) {
+
+ fd2 = fd_duplicate_data_fd(fd1);
+ assert_se(fd2 >= 0);
+
+ assert_se(lseek(fd1, 0, SEEK_SET) == 0);
+ assert_equal_fd(fd1, fd2);
+ }
+
+ fd1 = safe_close(fd1);
+ fd2 = safe_close(fd2);
+
+ fd1 = acquire_data_fd("hallo", 6, 0);
+ assert_se(fd1 >= 0);
+
+ fd2 = fd_duplicate_data_fd(fd1);
+ assert_se(fd2 >= 0);
+
+ safe_close(fd1);
+ fd1 = acquire_data_fd("hallo", 6, 0);
+ assert_se(fd1 >= 0);
+
+ assert_equal_fd(fd1, fd2);
+
+ fd1 = safe_close(fd1);
+ fd2 = safe_close(fd2);
+
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, sfd) >= 0);
+
+ r = safe_fork("(sd-pipe)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ /* child */
+
+ sfd[0] = safe_close(sfd[0]);
+
+ for (i = 0; i < 1536*1024 / sizeof(uint64_t); i++)
+ assert_se(write(sfd[1], &i, sizeof(i)) == sizeof(i));
+
+ sfd[1] = safe_close(sfd[1]);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ sfd[1] = safe_close(sfd[1]);
+
+ fd2 = fd_duplicate_data_fd(sfd[0]);
+ assert_se(fd2 >= 0);
+
+ for (i = 0; i < 1536*1024 / sizeof(uint64_t); i++) {
+ assert_se(read(fd2, &j, sizeof(j)) == sizeof(j));
+ assert_se(i == j);
+ }
+
+ assert_se(read(fd2, &j, sizeof(j)) == 0);
+}
+
int main(int argc, char *argv[]) {
test_close_many();
test_close_nointr();
@@ -240,6 +327,7 @@ int main(int argc, char *argv[]) {
test_acquire_data_fd();
test_fd_move_above_stdio();
test_rearrange_stdio();
+ test_fd_duplicate_data_fd();
return 0;
}