191 files changed, 13546 insertions, 2127 deletions
diff --git a/.gitignore b/.gitignore
index 144ebb3b..82620f3c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,80 +1,97 @@
 *.o
 *.static.o
 *.o.d
-tags
-.cc-defines.h
-version.h
-version
-man/*.gz
-Documentation/*.gz
-Documentation/*.html
-btrfs
-btrfs.static
-btrfs-map-logical
-btrfs-fragments
-btrfsck
-ioctl-test
-dir-test
-send-test
-quick-test
-find-root
-mkfs.btrfs
-mkfs.btrfs.static
-repair
-restore
-btrfs-convert
-btrfs-find-root
-btrfs-find-root.static
-btrfs-image
-btrfs-corrupt-block
-btrfs-select-super
-btrfs-crc
-btrfstune
-mktables
-libbtrfs.a
-libbtrfs.so
-libbtrfs.so.0
-libbtrfs.so.0.1
-library-test
-library-test-static
+
+/.cc-defines.h
+/version.h
+/btrfs
+/btrfs.static
+/btrfs-map-logical
+/btrfs-map-logical.static
+/btrfs-fragments
+/btrfs-fragments.static
+/btrfsck
+/mkfs.btrfs
+/mkfs.btrfs.static
+/btrfs-convert
+/btrfs-convert.static
+/btrfs-find-root
+/btrfs-find-root.static
+/btrfs-image
+/btrfs-image.static
+/btrfs-corrupt-block
+/btrfs-corrupt-block.static
+/btrfs-select-super
+/btrfs-select-super.static
+/btrfs-crc
+/btrfstune
+/btrfstune.static
+/mktables
+
+/libbtrfs.a
+/libbtrfs.so*
+/library-test
+/library-test-static
 /libbtrfsutil.so*
 /libbtrfsutil.a
-/fssum
-testsuite-id
 
+/fssum
+/testsuite-id
 /tests/*-tests-results.txt
 /tests/test-console.txt
 /tests/test.img
 /tests/mnt/
 *.restored
+/ioctl-test
+/send-test
+/quick-test
+
+/aclocal.m4
+/autom4te.cache
+/config.cache
+/config/*
+/config.h
+/config.h.in
+/config.h.in~
+/config.log
+/config.status
+/configure
+/Makefile.inc
 
-aclocal.m4
-autom4te.cache
-compile
-config.cache
-config.guess
-config.h
-config.h.in
-config.h.in~
-config.log
-config.rpath
-config.status
-config.sub
-config/ltmain.sh
-config/py-compile
-config/test-driver
-configure
-cscope.out
+/tags
+/cscope.out
+/cscope.files
+/cscope.in.out
+/cscope.po.out
 .clang_complete
-depcomp
-libtool
-Makefile.inc
-Documentation/Makefile
-missing
-mkinstalldirs
-stamp-h
-stamp-h.in
-stamp-h1
-config/*
+
+/Documentation/Makefile
+/Documentation/*.html
+/Documentation/btrfs-balance.8
+/Documentation/btrfs-check.8
+/Documentation/btrfs-convert.8
+/Documentation/btrfs-device.8
+/Documentation/btrfs-filesystem.8
+/Documentation/btrfs-find-root.8
+/Documentation/btrfs-image.8
+/Documentation/btrfs-inspect-internal.8
+/Documentation/btrfs-ioctl.3
+/Documentation/btrfs-man5.5
+/Documentation/btrfs-map-logical.8
+/Documentation/btrfs-property.8
+/Documentation/btrfs-qgroup.8
+/Documentation/btrfs-quota.8
+/Documentation/btrfs-receive.8
+/Documentation/btrfs-replace.8
+/Documentation/btrfs-rescue.8
+/Documentation/btrfs-restore.8
+/Documentation/btrfs-scrub.8
+/Documentation/btrfs-select-super.8
+/Documentation/btrfs-send.8
+/Documentation/btrfs-subvolume.8
+/Documentation/btrfs.8
+/Documentation/btrfstune.8
+/Documentation/fsck.btrfs.8
+/Documentation/mkfs.btrfs.8
 
 *.patch
diff --git a/CHANGES b/CHANGES
index ae0614ef..f4017f43 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,57 @@
+btrfs-progs-4.19.1 (2018-12-05)
+  * build fixes
+    * big-endian builds fail due to bswap helper clashes
+    * 'swap' macro is too generic, renamed to prevent build failures
+  * libbtrfs
+    * minor version update to 1.1.0
+    * fix default search to top=0 as documented
+    * rename 'async' to avoid future python binding problems
+    * add support for unprivileged subvolume listing ioctls
+    * added tests, API docs
+  * other
+    * lot of typos fixed
+    * warning cleanups
+    * doc formatting updates
+    * CI tests against zstd 1.3.7
+
+btrfs-progs-4.19 (2018-11-03)
+  * check: support repair of fs with free-space-tree feature
+  * core:
+    * port delayed ref infrastructure from kernel
+    * support write to free space tree
+  * dump-tree: new options for BFS and DFS enumeration of b-trees
+  * quota: rescan is now done automatically after 'assign'
+  * btrfstune: incomplete fix to uuid change
+  * subvol: fix 255 char limit checks
+  * completion: complete block devices and now regular files too
+  * docs:
+    * ship uncompressed manual pages
+    * btrfsck uses a manual page link instead of symlink
+  * other
+    * improved error handling
+    * docs
+    * new tests
+
+btrfs-progs-4.17.1 (2018-08-06)
+  * check:
+    * add ability to fix wrong ram_bytes for compressed inline files
+    * beautify progress output
+  * btrfstune: allow to continue uuid change after unclean interruption
+  * several fuzz fixes:
+    * detect overlapping chunks
+    * chunk loading error handling
+    * don't crash with unexpected root refs to extents
+  * relax option parsing again to allow mixing options and non-options
+    arguments
+  * fix qgroup rescan status reporting
+  * build:
+    * drop obsolete dir-test
+    * new configure option to disable building of tools
+    * add compatibility options --disable-static and --disable-shared
+  * other:
+    * cleanups and preparatory work
+    * new test images
+
 btrfs-progs-4.17 (2018-06-14)
   * check
     * many lowmem mode improvements
@@ -143,7 +197,7 @@ btrfs-progs-4.13 (2017-09-08)
   * check: new option --force to allow check of a mounted filesystem (no repair)
   * mkfs: --rootdir will now copy special files
   * dump-tree: minor output changes
-  * inspect rootid: accept file as arugment
+  * inspect rootid: accept file as argument
   * dev usage: don't calculate slack space for missing devices
   * fi du: don't print error on EMPTY_SUBVOL (inode number 2)
   * build:
@@ -206,7 +260,7 @@ btrfs-progs-4.11 (2017-05-18)
 btrfs-progs-4.10.2 (2017-03-31)
   * check: lowmem mode fix for false alert about lost backrefs
   * convert: minor bugfix
-  * library: fix build, misisng symbols, added tests
+  * library: fix build, missing symbols, added tests
 
 btrfs-progs-4.10.1 (2017-03-17)
   * receive: handle subvolume in path clone
@@ -317,7 +371,7 @@ btrfs-progs-4.8.3 (2016-11-11)
 
 btrfs-progs-4.8.2 (2016-10-26)
   * convert: also convert file attributes
-  * convert: fix wrong tree block alignment for unalianged block group
+  * convert: fix wrong tree block alignment for unaligned block group
   * check: quota verify fixes, handle reloc tree
   * build: add stub for FIEMAP_EXTENT_SHARED, compiles on ancient kernels
   * build: add stub for BUILD_ASSERT when ioctl.h is included
@@ -592,7 +646,7 @@ btrfs-progs-4.3 (2015-11-06)
     * test for broken 'subvolume sync'
     * basic tests for mkfs, raid option combinations
     * basic tests for fuzzed images (check)
-    * command intrumentation (eg valgrind)
+    * command instrumentation (eg valgrind)
     * print commands if requested
     * add README for tests
 
diff --git a/Documentation/DocConventions b/Documentation/DocConventions
index e84ed7a7..969209cf 100644
--- a/Documentation/DocConventions
+++ b/Documentation/DocConventions
@@ -23,7 +23,7 @@ Quotation in subcommands:
 - command reference: bold *btrfs fi show*
 - section references: italics 'EXAMPLES'
 
-- argument name in option desciption: caps in angle brackets <NAME>
+- argument name in option description: caps in angle brackets <NAME>
   - reference in help text: caps NAME
     also possible: caps italics 'NAME'
 
@@ -34,6 +34,6 @@ Quotation in subcommands:
   - optional parameter with argument: [-p <path>]
 
 
-Refrences:
+References:
 - full asciidoc syntax: http://asciidoc.org/userguide.html
 - cheatsheet: http://powerman.name/doc/asciidoc
diff --git a/Documentation/Makefile.in b/Documentation/Makefile.in
index 184647c4..afc16980 100644
--- a/Documentation/Makefile.in
+++ b/Documentation/Makefile.in
@@ -37,9 +37,9 @@ MAN3_TXT += btrfs-ioctl.asciidoc
 MAN_TXT = $(MAN3_TXT) $(MAN8_TXT) $(MAN5_TXT)
 MAN_XML = $(patsubst %.asciidoc,%.xml,$(MAN_TXT))
 MAN_HTML = $(patsubst %.asciidoc,%.html,$(MAN_TXT))
-GZ_MAN3 = $(patsubst %.asciidoc,%.3.gz,$(MAN3_TXT))
-GZ_MAN5 = $(patsubst %.asciidoc,%.5.gz,$(MAN5_TXT))
-GZ_MAN8 = $(patsubst %.asciidoc,%.8.gz,$(MAN8_TXT))
+MAN3 = $(patsubst %.asciidoc,%.3,$(MAN3_TXT))
+MAN5 = $(patsubst %.asciidoc,%.5,$(MAN5_TXT))
+MAN8 = $(patsubst %.asciidoc,%.8,$(MAN8_TXT))
 
 mandir ?= $(prefix)/share/man
 man3dir = $(mandir)/man3
@@ -65,7 +65,6 @@ MANPAGE_XSL = manpage-normal.xsl
 XMLTO = @XMLTO@
 XMLTO_EXTRA =
 XMLTO_EXTRA = -m manpage-bold-literal.xsl
-GZIPCMD = @GZIP@
 INSTALL = @INSTALL@
 RM = @RM@
 RMDIR = @RMDIR@
@@ -80,15 +79,14 @@ ifndef V
 	QUIET_RM	= @
 	QUIET_ASCIIDOC	= @echo "    [ASCII]  $@";
 	QUIET_XMLTO	= @echo "    [XMLTO]  $@";
-	QUIET_GZIP	= @echo "    [GZ]     $@";
 endif
 endif
 
 all: man
 man: man3 man5 man8
-man3: $(GZ_MAN3)
-man5: $(GZ_MAN5)
-man8: $(GZ_MAN8)
+man3: $(MAN3)
+man5: $(MAN5)
+man8: $(MAN8)
 html: $(MAN_HTML)
 
 install: install-man
@@ -96,28 +94,19 @@ install: install-man
 install-man: man
 	$(INSTALL) -d -m 755 $(DESTDIR)$(man5dir)
 	$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
-	$(INSTALL) -m 644 $(GZ_MAN5) $(DESTDIR)$(man5dir)
+	$(INSTALL) -m 644 $(MAN5) $(DESTDIR)$(man5dir)
 	# the source file name of btrfs.5 clashes with section 8 page, but we
 	# want to keep the code generic
-	$(MV) $(DESTDIR)$(man5dir)/btrfs-man5.5.gz $(DESTDIR)$(man5dir)/btrfs.5.gz
-	$(INSTALL) -m 644 $(GZ_MAN8) $(DESTDIR)$(man8dir)
-	$(LN_S) -f btrfs-check.8.gz $(DESTDIR)$(man8dir)/btrfsck.8.gz
+	$(MV) $(DESTDIR)$(man5dir)/btrfs-man5.5 $(DESTDIR)$(man5dir)/btrfs.5
+	$(INSTALL) -m 644 $(MAN8) $(DESTDIR)$(man8dir)
+	$(INSTALL) -m 644 btrfsck.8 $(DESTDIR)$(man8dir)
 
 uninstall:
-	cd $(DESTDIR)$(man8dir); rm -f btrfs-check.8.gz $(GZ_MAN8)
+	cd $(DESTDIR)$(man8dir); rm -f btrfs-check.8 $(MAN8)
 	$(RMDIR) -p --ignore-fail-on-non-empty $(DESTDIR)$(man8dir)
 
 clean:
-	$(QUIET_RM)$(RM) -f *.xml *.xml+ *.3 *.3.gz *.5 *.5.gz *.8 *.8.gz *.html
-
-%.3.gz : %.3
-	$(QUIET_GZIP)$(GZIPCMD) -n -c $< > $@
-
-%.5.gz : %.5
-	$(QUIET_GZIP)$(GZIPCMD) -n -c $< > $@
-
-%.8.gz : %.8
-	$(QUIET_GZIP)$(GZIPCMD) -n -c $< > $@
+	$(QUIET_RM)$(RM) -f $(MAN_XML) *.xml+ $(MAN3) $(MAN5) $(MAN8) $(MAN_HTML)
 
 %.3 : %.xml
 	$(QUIET_XMLTO)$(RM) -f $@ && \
diff --git a/Documentation/asciidoc.conf b/Documentation/asciidoc.conf
index 1ea74591..be36a7b5 100644
--- a/Documentation/asciidoc.conf
+++ b/Documentation/asciidoc.conf
@@ -29,6 +29,16 @@ backtick=&#96;
 litdd=&#45;&#45;
 
 ifdef::doctype-manpage[]
+
+[quotes]
+^=#exponent
+
+[tags]
+exponent={1?[{1}]}^|
+
+endif::doctype-manpage[]
+
+ifdef::doctype-manpage[]
 ifdef::backend-docbook[]
 [header]
 template::[header-declarations]
@@ -46,3 +56,4 @@ template::[header-declarations]
 </refnamediv>
 endif::backend-docbook[]
 endif::doctype-manpage[]
+
diff --git a/Documentation/btrfs-balance.8 b/Documentation/btrfs-balance.8
new file mode 100644
index 00000000..c686381c
--- /dev/null
+++ b/Documentation/btrfs-balance.8
@@ -0,0 +1,547 @@
+'\" t
+.\"     Title: btrfs-balance
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-BALANCE" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-balance \- balance block groups on a btrfs filesystem
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs balance\fR \fI<subcommand>\fR \fI<args>\fR
+.SH "DESCRIPTION"
+.sp
+The primary purpose of the balance feature is to spread block groups across all devices so they match constraints defined by the respective profiles\&. See \fBmkfs\&.btrfs\fR(8) section \fIPROFILES\fR for more details\&. The scope of the balancing process can be further tuned by use of filters that can select the block groups to process\&. Balance works only on a mounted filesystem\&.
+.sp
+The balance operation is cancellable by the user\&. The on\-disk state of the filesystem is always consistent so an unexpected interruption (eg\&. system crash, reboot) does not corrupt the filesystem\&. The progress of the balance operation is temporarily stored as an internal state and will be resumed upon mount, unless the mount option \fIskip_balance\fR is specified\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBWarning\fR
+.ps -1
+.br
+.sp
+running balance without filters will take a lot of time as it basically rewrites the entire filesystem and needs to update all block pointers\&.
+.sp .5v
+.RE
+.sp
+The filters can be used to perform following actions:
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+convert block group profiles (filter
+\fIconvert\fR)
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+make block group usage more compact (filter
+\fIusage\fR)
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+perform actions only on a given device (filters
+\fIdevid\fR,
+\fIdrange\fR)
+.RE
+.sp
+The filters can be applied to a combination of block group types (data, metadata, system)\&. Note that changing \fIsystem\fR needs the force option\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+the balance operation needs enough work space, ie\&. space that is completely unused in the filesystem, otherwise this may lead to ENOSPC reports\&. See the section \fIENOSPC\fR for more details\&.
+.sp .5v
+.RE
+.SH "COMPATIBILITY"
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+The balance subcommand also exists under the \fBbtrfs filesystem\fR namespace\&. This still works for backward compatibility but is deprecated and should not be used any more\&.
+.sp .5v
+.RE
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+A short syntax \fBbtrfs balance \fR\fB\fI<path>\fR\fR works due to backward compatibility but is deprecated and should not be used any more\&. Use \fBbtrfs balance start\fR command instead\&.
+.sp .5v
+.RE
+.SH "PERFORMANCE IMPLICATIONS"
+.sp
+Balancing operations are very IO intensive and can also be quite CPU intensive, impacting other ongoing filesystem operations\&. Typically large amounts of data are copied from one location to another, with corresponding metadata updates\&.
+.sp
+Depending upon the block group layout, it can also be seek heavy\&. Performance on rotational devices is noticeably worse compared to SSDs or fast arrays\&.
+.SH "SUBCOMMAND"
+.PP
+\fBcancel\fR \fI<path>\fR
+.RS 4
+cancels a running or paused balance, the command will block and wait until the current blockgroup being processed completes
+.RE
+.PP
+\fBpause\fR \fI<path>\fR
+.RS 4
+pause running balance operation, this will store the state of the balance progress and used filters to the filesystem
+.RE
+.PP
+\fBresume\fR \fI<path>\fR
+.RS 4
+resume interrupted balance, the balance status must be stored on the filesystem from previous run, eg\&. after it was forcibly interrupted and mounted again with
+\fIskip_balance\fR
+.RE
+.PP
+\fBstart\fR [options] \fI<path>\fR
+.RS 4
+start the balance operation according to the specified filters, no filters will rewrite the entire filesystem\&. The process runs in the foreground\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+the balance command without filters will basically rewrite everything in the filesystem\&. The run time is potentially very long, depending on the filesystem size\&. To prevent starting a full balance by accident, the user is warned and has a few seconds to cancel the operation before it starts\&. The warning and delay can be skipped with
+\fI\-\-full\-balance\fR
+option\&.
+.sp .5v
+.RE
+Please note that the filters must be written together with the
+\fI\-d\fR,
+\fI\-m\fR
+and
+\fI\-s\fR
+options, because they\(cqre optional and bare
+\fI\-d\fR
+etc also work and mean no filters\&.
+.sp
+\fBOptions\fR
+.PP
+\-d[\fI<filters>\fR]
+.RS 4
+act on data block groups, see
+\fBFILTERS\fR
+section for details about
+\fIfilters\fR
+.RE
+.PP
+\-m[\fI<filters>\fR]
+.RS 4
+act on metadata chunks, see
+\fBFILTERS\fR
+section for details about
+\fIfilters\fR
+.RE
+.PP
+\-s[\fI<filters>\fR]
+.RS 4
+act on system chunks (requires
+\fI\-f\fR), see
+\fBFILTERS\fR
+section for details about
+\fIfilters\fR\&.
+.RE
+.PP
+\-v
+.RS 4
+be verbose and print balance filter arguments
+.RE
+.PP
+\-f
+.RS 4
+force reducing of metadata integrity, eg\&. when going from
+\fIraid1\fR
+to
+\fIsingle\fR
+.RE
+.PP
+\-\-background|\-\-bg
+.RS 4
+run the balance operation asynchronously in the background, uses
+\fBfork\fR(2) to start the process that calls the kernel ioctl
+.RE
+.RE
+.PP
+\fBstatus\fR [\-v] \fI<path>\fR
+.RS 4
+Show status of running or paused balance\&.
+.sp
+If
+\fI\-v\fR
+option is given, output will be verbose\&.
+.RE
+.SH "FILTERS"
+.sp
+From kernel 3\&.3 onwards, btrfs balance can limit its action to a subset of the whole filesystem, and can be used to change the replication configuration (e\&.g\&. moving data from single to RAID1)\&. This functionality is accessed through the \fI\-d\fR, \fI\-m\fR or \fI\-s\fR options to btrfs balance start, which filter on data, metadata and system blocks respectively\&.
+.sp
+A filter has the following structure: \fItype\fR[=\fIparams\fR][,\fItype\fR=\&...]
+.sp
+The available types are:
+.PP
+\fBprofiles=\fR\fB\fI<profiles>\fR\fR
+.RS 4
+Balances only block groups with the given profiles\&. Parameters are a list of profile names separated by "\fI|\fR" (pipe)\&.
+.RE
+.PP
+\fBusage=\fR\fB\fI<percent>\fR\fR, \fBusage=\fR\fB\fI<range>\fR\fR
+.RS 4
+Balances only block groups with usage under the given percentage\&. The value of 0 is allowed and will clean up completely unused block groups, this should not require any new work space allocated\&. You may want to use
+\fIusage=0\fR
+in case balance is returning ENOSPC and your filesystem is not too full\&.
+.sp
+The argument may be a single value or a range\&. The single value
+\fIN\fR
+means
+\fIat most N percent used\fR, equivalent to
+\fI\&.\&.N\fR
+range syntax\&. Kernels prior to 4\&.4 accept only the single value format\&. The minimum range boundary is inclusive, maximum is exclusive\&.
+.RE
+.PP
+\fBdevid=\fR\fB\fI<id>\fR\fR
+.RS 4
+Balances only block groups which have at least one chunk on the given device\&. To list devices with ids use
+\fBbtrfs filesystem show\fR\&.
+.RE
+.PP
+\fBdrange=\fR\fB\fI<range>\fR\fR
+.RS 4
+Balance only block groups which overlap with the given byte range on any device\&. Use in conjunction with
+\fIdevid\fR
+to filter on a specific device\&. The parameter is a range specified as
+\fIstart\&.\&.end\fR\&.
+.RE
+.PP
+\fBvrange=\fR\fB\fI<range>\fR\fR
+.RS 4
+Balance only block groups which overlap with the given byte range in the filesystem\(cqs internal virtual address space\&. This is the address space that most reports from btrfs in the kernel log use\&. The parameter is a range specified as
+\fIstart\&.\&.end\fR\&.
+.RE
+.PP
+\fBconvert=\fR\fB\fI<profile>\fR\fR
+.RS 4
+Convert each selected block group to the given profile name identified by parameters\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+starting with kernel 4\&.5, the
+\fIdata\fR
+chunks can be converted to/from the
+\fIDUP\fR
+profile on a single device\&.
+.sp .5v
+.RE
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+starting with kernel 4\&.6, all profiles can be converted to/from
+\fIDUP\fR
+on multi\-device filesystems\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fBlimit=\fR\fB\fI<number>\fR\fR, \fBlimit=\fR\fB\fI<range>\fR\fR
+.RS 4
+Process only given number of chunks, after all filters are applied\&. This can be used to specifically target a chunk in connection with other filters (\fIdrange\fR,
+\fIvrange\fR) or just simply limit the amount of work done by a single balance run\&.
+.sp
+The argument may be a single value or a range\&. The single value
+\fIN\fR
+means
+\fIat most N chunks\fR, equivalent to
+\fI\&.\&.N\fR
+range syntax\&. Kernels prior to 4\&.4 accept only the single value format\&. The range minimum and maximum are inclusive\&.
+.RE
+.PP
+\fBstripes=\fR\fB\fI<range>\fR\fR
+.RS 4
+Balance only block groups which have the given number of stripes\&. The parameter is a range specified as
+\fIstart\&.\&.end\fR\&. Makes sense for block group profiles that utilize striping, ie\&. RAID0/10/5/6\&. The range minimum and maximum are inclusive\&.
+.RE
+.PP
+\fBsoft\fR
+.RS 4
+Takes no parameters\&. Only has meaning when converting between profiles\&. When doing convert from one profile to another and soft mode is on, chunks that already have the target profile are left untouched\&. This is useful e\&.g\&. when half of the filesystem was converted earlier but got cancelled\&.
+.sp
+The soft mode switch is (like every other filter) per\-type\&. For example, this means that we can convert metadata chunks the "hard" way while converting data chunks selectively with soft switch\&.
+.RE
+.sp
+Profile names, used in \fIprofiles\fR and \fIconvert\fR are one of: \fIraid0\fR, \fIraid1\fR, \fIraid10\fR, \fIraid5\fR, \fIraid6\fR, \fIdup\fR, \fIsingle\fR\&. The mixed data/metadata profiles can be converted in the same way, but it\(cqs conversion between mixed and non\-mixed is not implemented\&. For the constraints of the profiles please refer to \fBmkfs\&.btrfs\fR(8), section \fIPROFILES\fR\&.
+.SH "ENOSPC"
+.sp
+The way balance operates, it usually needs to temporarily create a new block group and move the old data there, before the old block group can be removed\&. For that it needs the work space, otherwise it fails for ENOSPC reasons\&. This is not the same ENOSPC as if the free space is exhausted\&. This refers to the space on the level of block groups, which are bigger parts of the filesystem that contain many file extents\&.
+.sp
+The free work space can be calculated from the output of the \fBbtrfs filesystem show\fR command:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+   Label: \*(AqBTRFS\*(Aq  uuid: 8a9d72cd\-ead3\-469d\-b371\-9c7203276265
+           Total devices 2 FS bytes used 77\&.03GiB
+           devid    1 size 53\&.90GiB used 51\&.90GiB path /dev/sdc2
+           devid    2 size 53\&.90GiB used 51\&.90GiB path /dev/sde1
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+\fIsize\fR \- \fIused\fR = \fIfree work space\fR \fI53\&.90GiB\fR \- \fI51\&.90GiB\fR = \fI2\&.00GiB\fR
+.sp
+An example of a filter that does not require workspace is \fIusage=0\fR\&. This will scan through all unused block groups of a given type and will reclaim the space\&. After that it might be possible to run other filters\&.
+.sp
+\fBCONVERSIONS ON MULTIPLE DEVICES\fR
+.sp
+Conversion to profiles based on striping (RAID0, RAID5/6) require the work space on each device\&. An interrupted balance may leave partially filled block groups that consume the work space\&.
+.SH "EXAMPLES"
+.sp
+A more comprehensive example when going from one to multiple devices, and back, can be found in section \fITYPICAL USECASES\fR of \fBbtrfs\-device\fR(8)\&.
+.SS "MAKING BLOCK GROUP LAYOUT MORE COMPACT"
+.sp
+The layout of block groups is not normally visible; most tools report only summarized numbers of free or used space, but there are still some hints provided\&.
+.sp
+Let\(cqs use the following real life example and start with the output:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+$ btrfs filesystem df /path
+Data, single: total=75\&.81GiB, used=64\&.44GiB
+System, RAID1: total=32\&.00MiB, used=20\&.00KiB
+Metadata, RAID1: total=15\&.87GiB, used=8\&.84GiB
+GlobalReserve, single: total=512\&.00MiB, used=0\&.00B
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+Roughly calculating for data, \fI75G \- 64G = 11G\fR, the used/total ratio is about \fI85%\fR\&. How can we can interpret that:
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+chunks are filled by 85% on average, ie\&. the
+\fIusage\fR
+filter with anything smaller than 85 will likely not affect anything
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+in a more realistic scenario, the space is distributed unevenly, we can assume there are completely used chunks and the remaining are partially filled
+.RE
+.sp
+Compacting the layout could be used on both\&. In the former case it would spread data of a given chunk to the others and removing it\&. Here we can estimate that roughly 850 MiB of data have to be moved (85% of a 1 GiB chunk)\&.
+.sp
+In the latter case, targeting the partially used chunks will have to move less data and thus will be faster\&. A typical filter command would look like:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+# btrfs balance start \-dusage=50 /path
+Done, had to relocate 2 out of 97 chunks
+
+$ btrfs filesystem df /path
+Data, single: total=74\&.03GiB, used=64\&.43GiB
+System, RAID1: total=32\&.00MiB, used=20\&.00KiB
+Metadata, RAID1: total=15\&.87GiB, used=8\&.84GiB
+GlobalReserve, single: total=512\&.00MiB, used=0\&.00B
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+As you can see, the \fItotal\fR amount of data is decreased by just 1 GiB, which is an expected result\&. Let\(cqs see what will happen when we increase the estimated usage filter\&.
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+# btrfs balance start \-dusage=85 /path
+Done, had to relocate 13 out of 95 chunks
+
+$ btrfs filesystem df /path
+Data, single: total=68\&.03GiB, used=64\&.43GiB
+System, RAID1: total=32\&.00MiB, used=20\&.00KiB
+Metadata, RAID1: total=15\&.87GiB, used=8\&.85GiB
+GlobalReserve, single: total=512\&.00MiB, used=0\&.00B
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+Now the used/total ratio is about 94% and we moved about \fI74G \- 68G = 6G\fR of data to the remaining blockgroups, ie\&. the 6GiB are now free of filesystem structures, and can be reused for new data or metadata block groups\&.
+.sp
+We can do a similar exercise with the metadata block groups, but this should not typically be necessary, unless the used/total ratio is really off\&. Here the ratio is roughly 50% but the difference as an absolute number is "a few gigabytes", which can be considered normal for a workload with snapshots or reflinks updated frequently\&.
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+# btrfs balance start \-musage=50 /path
+Done, had to relocate 4 out of 89 chunks
+
+$ btrfs filesystem df /path
+Data, single: total=68\&.03GiB, used=64\&.43GiB
+System, RAID1: total=32\&.00MiB, used=20\&.00KiB
+Metadata, RAID1: total=14\&.87GiB, used=8\&.85GiB
+GlobalReserve, single: total=512\&.00MiB, used=0\&.00B
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+Just 1 GiB decrease, which possibly means there are block groups with good utilization\&. Making the metadata layout more compact would in turn require updating more metadata structures, ie\&. lots of IO\&. As running out of metadata space is a more severe problem, it\(cqs not necessary to keep the utilization ratio too high\&. For the purpose of this example, let\(cqs see the effects of further compaction:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+# btrfs balance start \-musage=70 /path
+Done, had to relocate 13 out of 88 chunks
+
+$ btrfs filesystem df \&.
+Data, single: total=68\&.03GiB, used=64\&.43GiB
+System, RAID1: total=32\&.00MiB, used=20\&.00KiB
+Metadata, RAID1: total=11\&.97GiB, used=8\&.83GiB
+GlobalReserve, single: total=512\&.00MiB, used=0\&.00B
+.fi
+.if n \{\
+.RE
+.\}
+.SS "GETTING RID OF COMPLETELY UNUSED BLOCK GROUPS"
+.sp
+Normally the balance operation needs a work space, to temporarily move the data before the old block groups gets removed\&. If there\(cqs no work space, it ends with \fIno space left\fR\&.
+.sp
+There\(cqs a special case when the block groups are completely unused, possibly left after removing lots of files or deleting snapshots\&. Removing empty block groups is automatic since 3\&.18\&. The same can be achieved manually with a notable exception that this operation does not require the work space\&. Thus it can be used to reclaim unused block groups to make it available\&.
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+# btrfs balance start \-dusage=0 /path
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+This should lead to decrease in the \fItotal\fR numbers in the \fBbtrfs filesystem df\fR output\&.
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs balance\fR returns a zero exit status if it succeeds\&. Non zero is returned in case of failure\&.
+.SH "AVAILABILITY"
+.sp
+\fBbtrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8), \fBbtrfs\-device\fR(8)
diff --git a/Documentation/btrfs-balance.8.gz b/Documentation/btrfs-balance.8.gz
deleted file mode 100644
index 02771775..00000000
--- a/Documentation/btrfs-balance.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-check.8 b/Documentation/btrfs-check.8
new file mode 100644
index 00000000..da562a57
--- /dev/null
+++ b/Documentation/btrfs-check.8
@@ -0,0 +1,230 @@
+'\" t
+.\"     Title: btrfs-check
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-CHECK" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-check \- check or repair a btrfs filesystem
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs check\fR [options] \fI<device>\fR
+.SH "DESCRIPTION"
+.sp
+The filesystem checker is used to verify structural integrity of a filesystem and attempt to repair it if requested\&. It is recommended to unmount the filesystem prior to running the check, but it is possible to start checking a mounted filesystem (see \fI\-\-force\fR)\&.
+.sp
+By default, \fBbtrfs check\fR will not modify the device but you can reaffirm that by the option \fI\-\-readonly\fR\&.
+.sp
+\fBbtrfsck\fR is an alias of \fBbtrfs check\fR command and is now deprecated\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBWarning\fR
+.ps -1
+.br
+.sp
+Do not use \fI\-\-repair\fR unless you are advised to do so by a developer or an experienced user, and then only after having accepted that no \fIfsck\fR successfully repair all types of filesystem corruption\&. Eg\&. some other software or hardware bugs can fatally damage a volume\&.
+.sp .5v
+.RE
+.sp
+The structural integrity check verifies if internal filesystem objects or data structures satisfy the constraints, point to the right objects or are correctly connected together\&.
+.sp
+There are several cross checks that can detect wrong reference counts of shared extents, backreferences, missing extents of inodes, directory and inode connectivity etc\&.
+.sp
+The amount of memory required can be high, depending on the size of the filesystem, similarly the run time\&.
+.SH "SAFE OR ADVISORY OPTIONS"
+.PP
+\-b|\-\-backup
+.RS 4
+use the first valid set of backup roots stored in the superblock
+.sp
+This can be combined with
+\fI\-\-super\fR
+if some of the superblocks are damaged\&.
+.RE
+.PP
+\-\-check\-data\-csum
+.RS 4
+verify checksums of data blocks
+.sp
+This expects that the filesystem is otherwise OK, and is basically and offline
+\fIscrub\fR
+but does not repair data from spare copies\&.
+.RE
+.PP
+\-\-chunk\-root \fI<bytenr>\fR
+.RS 4
+use the given offset
+\fIbytenr\fR
+for the chunk tree root
+.RE
+.PP
+\-E|\-\-subvol\-extents \fI<subvolid>\fR
+.RS 4
+show extent state for the given subvolume
+.RE
+.PP
+\-p|\-\-progress
+.RS 4
+indicate progress at various checking phases
+.RE
+.PP
+\-Q|\-\-qgroup\-report
+.RS 4
+verify qgroup accounting and compare against filesystem accounting
+.RE
+.PP
+\-r|\-\-tree\-root \fI<bytenr>\fR
+.RS 4
+use the given offset
+\fIbytenr\fR
+for the tree root
+.RE
+.PP
+\-\-readonly
+.RS 4
+(default) run in read\-only mode, this option exists to calm potential panic when users are going to run the checker
+.RE
+.PP
+\-s|\-\-super \fI<superblock>\fR
+.RS 4
+use \*(Aqsuperblock\(cqth superblock copy, valid values are 0, 1 or 2 if the respective superblock offset is within the device size
+.sp
+This can be used to use a different starting point if some of the primary superblock is damaged\&.
+.RE
+.PP
+\-\-clear\-space\-cache v1|v2
+.RS 4
+completely wipe all free space cache of given type
+.sp
+For free space cache
+\fIv1\fR, the
+\fIclear_cache\fR
+kernel mount option only rebuilds the free space cache for block groups that are modified while the filesystem is mounted with that option\&. Thus, using this option with
+\fIv1\fR
+makes it possible to actually clear the entire free space cache\&.
+.sp
+For free space cache
+\fIv2\fR, the
+\fIclear_cache\fR
+kernel mount option destroys the entire free space cache\&. This option, with
+\fIv2\fR
+provides an alternative method of clearing the free space cache that doesn\(cqt require mounting the filesystem\&.
+.RE
+.SH "DANGEROUS OPTIONS"
+.PP
+\-\-repair
+.RS 4
+enable the repair mode and attempt to fix problems where possible
+.RE
+.PP
+\-\-init\-csum\-tree
+.RS 4
+create a new checksum tree and recalculate checksums in all files
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+Do not blindly use this option to fix checksum mismatch problems\&.
+.sp .5v
+.RE
+.RE
+.PP
+\-\-init\-extent\-tree
+.RS 4
+build the extent tree from scratch
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+Do not use unless you know what you\(cqre doing\&.
+.sp .5v
+.RE
+.RE
+.PP
+\-\-mode=MODE
+.RS 4
+select mode of operation regarding memory and IO
+.sp
+The
+\fIMODE\fR
+can be one of
+\fIoriginal\fR
+and
+\fIlowmem\fR\&. The original mode is mostly unoptimized regarding memory consumption and can lead to out\-of\-memory conditions on large filesystems\&. The possible workaround is to export the block device over network to a machine with enough memory\&. The low memory mode is supposed to address the memory consumption, at the cost of increased IO when it needs to re\-read blocks when needed\&. This may increase run time\&.
+.RE
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+\fIlowmem\fR mode does not work with \fI\-\-repair\fR yet, and is still considered experimental\&.
+.sp .5v
+.RE
+.PP
+\-\-force
+.RS 4
+allow work on a mounted filesystem\&. Note that this should work fine on a quiescent or read\-only mounted filesystem but may crash if the device is changed externally, eg\&. by the kernel module\&. Repair without mount checks is not supported right now\&.
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs check\fR returns a zero exit status if it succeeds\&. Non zero is returned in case of failure\&.
+.SH "AVAILABILITY"
+.sp
+\fBbtrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8), \fBbtrfs\-scrub\fR(8), \fBbtrfs\-rescue\fR(8)
diff --git a/Documentation/btrfs-check.8.gz b/Documentation/btrfs-check.8.gz
deleted file mode 100644
index 7a4b5d6b..00000000
--- a/Documentation/btrfs-check.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-convert.8 b/Documentation/btrfs-convert.8
new file mode 100644
index 00000000..7b5239ab
--- /dev/null
+++ b/Documentation/btrfs-convert.8
@@ -0,0 +1,217 @@
+'\" t
+.\"     Title: btrfs-convert
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-CONVERT" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-convert \- convert from ext2/3/4 or reiserfs filesystem to btrfs in\-place
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs\-convert\fR [options] \fI<device>\fR
+.SH "DESCRIPTION"
+.sp
+\fBbtrfs\-convert\fR is used to convert existing source filesystem image to a btrfs filesystem in\-place\&. The original filesystem image is accessible in subvolume named like \fIext2_saved\fR as file \fIimage\fR\&.
+.sp
+Supported filesystems:
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+ext2, ext3, ext4 \(em original feature, always built in
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+reiserfs \(em since version 4\&.13, optionally built, requires libreiserfscore 3\&.6\&.27
+.RE
+.sp
+The list of supported source filesystem by a given binary is listed at the end of help (option \fI\-\-help\fR)\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBWarning\fR
+.ps -1
+.br
+.sp
+If you are going to perform rollback to the original filesystem, you should not execute \fBbtrfs balance\fR command on the converted filesystem\&. This will change the extent layout and make \fBbtrfs\-convert\fR unable to rollback\&.
+.sp .5v
+.RE
+.sp
+The conversion utilizes free space of the original filesystem\&. The exact estimate of the required space cannot be foretold\&. The final btrfs metadata might occupy several gigabytes on a hundreds\-gigabyte filesystem\&.
+.sp
+If the ability to rollback is no longer important, the it is recommended to perform a few more steps to transition the btrfs filesystem to a more compact layout\&. This is because the conversion inherits the original data blocks\*(Aq fragmentation, and also because the metadata blocks are bound to the original free space layout\&.
+.sp
+Due to different constraints, it is only possible to convert filesystems that have a supported data block size (ie\&. the same that would be valid for \fImkfs\&.btrfs\fR)\&. This is typically the system page size (4KiB on x86_64 machines)\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+The source filesystem should be clean, you are encouraged to run the \fIfsck\fR tool if you\(cqre not sure\&.
+.sp .5v
+.RE
+.sp
+\fBREMOVE THE ORIGINAL FILESYSTEM METADATA\fR
+.sp
+By removing the subvolume named like \fIext2_saved\fR or \fIreiserfs_saved\fR, all metadata of the original filesystem will be removed:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+# btrfs subvolume delete /mnt/ext2_saved
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+At this point it is not possible to do a rollback\&. The filesystem is usable but may be impacted by the fragmentation inherited from the original filesystem\&.
+.sp
+\fBMAKE FILE DATA MORE CONTIGUOUS\fR
+.sp
+An optional but recommended step is to run defragmentation on the entire filesystem\&. This will attempt to make file extents more contiguous\&.
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+# btrfs filesystem defrag \-v \-r \-f \-t 32M /mnt/btrfs
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+Verbose recursive defragmentation (\fI\-v\fR, \fI\-r\fR), flush data per\-file (\fI\-f\fR) with target extent size 32MiB (\fI\-t\fR)\&.
+.sp
+\fBATTEMPT TO MAKE BTRFS METADATA MORE COMPACT\fR
+.sp
+Optional but recommended step\&.
+.sp
+The metadata block groups after conversion may be smaller than the default size (256MiB or 1GiB)\&. Running a balance will attempt to merge the block groups\&. This depends on the free space layout (and fragmentation) and may fail due to lack of enough work space\&. This is a soft error leaving the filesystem usable but the block group layout may remain unchanged\&.
+.sp
+Note that balance operation takes a lot of time, please see also \fBbtrfs\-balance\fR(8)\&.
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+# btrfs balance start \-m /mnt/btrfs
+.fi
+.if n \{\
+.RE
+.\}
+.SH "OPTIONS"
+.PP
+\-d|\-\-no\-datasum
+.RS 4
+disable data checksum calculations and set the NODATASUM file flag, this can speed up the conversion
+.RE
+.PP
+\-i|\-\-no\-xattr
+.RS 4
+ignore xattrs and ACLs of files
+.RE
+.PP
+\-n|\-\-no\-inline
+.RS 4
+disable inlining of small files to metadata blocks, this will decrease the metadata consumption and may help to convert a filesystem with low free space
+.RE
+.PP
+\-N|\-\-nodesize \fI<SIZE>\fR
+.RS 4
+set filesystem nodesize, the tree block size in which btrfs stores its metadata\&. The default value is 16KB (16384) or the page size, whichever is bigger\&. Must be a multiple of the sectorsize, but not larger than 65536\&. See
+\fBmkfs\&.btrfs\fR(8) for more details\&.
+.RE
+.PP
+\-r|\-\-rollback
+.RS 4
+rollback to the original ext2/3/4 filesystem if possible
+.RE
+.PP
+\-l|\-\-label \fI<LABEL>\fR
+.RS 4
+set filesystem label during conversion
+.RE
+.PP
+\-L|\-\-copy\-label
+.RS 4
+use label from the converted filesystem
+.RE
+.PP
+\-O|\-\-features \fI<feature1>\fR[,\fI<feature2>\fR\&...]
+.RS 4
+A list of filesystem features enabled the at time of conversion\&. Not all features are supported by old kernels\&. To disable a feature, prefix it with
+\fI^\fR\&. Description of the features is in section
+\fIFILESYSTEM FEATURES\fR
+of
+\fBmkfs\&.btrfs\fR(8)\&.
+.sp
+To see all available features that btrfs\-convert supports run:
+.sp
+\fBbtrfs\-convert \-O list\-all\fR
+.RE
+.PP
+\-p|\-\-progress
+.RS 4
+show progress of conversion (a heartbeat indicator and number of inodes processed), on by default
+.RE
+.PP
+\-\-no\-progress
+.RS 4
+disable progress and show only the main phases of conversion
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs\-convert\fR will return 0 if no error happened\&. If any problems happened, 1 will be returned\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8)
diff --git a/Documentation/btrfs-convert.8.gz b/Documentation/btrfs-convert.8.gz
deleted file mode 100644
index af0b9b13..00000000
--- a/Documentation/btrfs-convert.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-device.8 b/Documentation/btrfs-device.8
new file mode 100644
index 00000000..082e66a5
--- /dev/null
+++ b/Documentation/btrfs-device.8
@@ -0,0 +1,410 @@
+'\" t
+.\"     Title: btrfs-device
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-DEVICE" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-device \- manage devices of btrfs filesystems
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs device\fR \fI<subcommand>\fR \fI<args>\fR
+.SH "DESCRIPTION"
+.sp
+The \fBbtrfs device\fR command group is used to manage devices of the btrfs filesystems\&.
+.SH "DEVICE MANAGEMENT"
+.sp
+Btrfs filesystem can be created on top of single or multiple block devices\&. Data and metadata are organized in allocation profiles with various redundancy policies\&. There\(cqs some similarity with traditional RAID levels, but this could be confusing to users familiar with the traditional meaning\&. Due to the similarity, the RAID terminology is widely used in the documentation\&. See \fBmkfs\&.btrfs\fR(8) for more details and the exact profile capabilities and constraints\&.
+.sp
+The device management works on a mounted filesystem\&. Devices can be added, removed or replaced, by commands provided by \fBbtrfs device\fR and \fBbtrfs replace\fR\&.
+.sp
+The profiles can be also changed, provided there\(cqs enough workspace to do the conversion, using the \fBbtrfs balance\fR command and namely the filter \fIconvert\fR\&.
+.PP
+Profile
+.RS 4
+A profile describes an allocation policy based on the redundancy/replication constraints in connection with the number of devices\&. The profile applies to data and metadata block groups separately\&.
+.RE
+.PP
+RAID level
+.RS 4
+Where applicable, the level refers to a profile that matches constraints of the standard RAID levels\&. At the moment the supported ones are: RAID0, RAID1, RAID10, RAID5 and RAID6\&.
+.RE
+.sp
+See the section \fBTYPICAL USECASES\fR for some examples\&.
+.SH "SUBCOMMAND"
+.PP
+\fBadd\fR [\-Kf] \fI<device>\fR [\fI<device>\fR\&...] \fI<path>\fR
+.RS 4
+Add device(s) to the filesystem identified by
+\fI<path>\fR\&.
+.sp
+If applicable, a whole device discard (TRIM) operation is performed prior to adding the device\&. A device with existing filesystem detected by
+\fBblkid\fR(8) will prevent device addition and has to be forced\&. Alternatively the filesystem can be wiped from the device using eg\&. the
+\fBwipefs\fR(8) tool\&.
+.sp
+The operation is instant and does not affect existing data\&. The operation merely adds the device to the filesystem structures and creates some block groups headers\&.
+.sp
+\fBOptions\fR
+.PP
+\-K|\-\-nodiscard
+.RS 4
+do not perform discard (TRIM) by default
+.RE
+.PP
+\-f|\-\-force
+.RS 4
+force overwrite of existing filesystem on the given disk(s)
+.RE
+.RE
+.PP
+\fBremove\fR \fI<device>\fR|\fI<devid>\fR [\fI<device>\fR|\fI<devid>\fR\&...] \fI<path>\fR
+.RS 4
+Remove device(s) from a filesystem identified by
+\fI<path>\fR
+.sp
+Device removal must satisfy the profile constraints, otherwise the command fails\&. The filesystem must be converted to profile(s) that would allow the removal\&. This can typically happen when going down from 2 devices to 1 and using the RAID1 profile\&. See the
+\fBTYPICAL USECASES\fR
+section below\&.
+.sp
+The operation can take long as it needs to move all data from the device\&.
+.sp
+It is possible to delete the device that was used to mount the filesystem\&. The device entry in the mount table will be replaced by another device name with the lowest device id\&.
+.sp
+If the filesystem is mounted in degraded mode (\-o degraded), special term
+\fImissing\fR
+can be used for
+\fIdevice\fR\&. In that case, the first device that is described by the filesystem metadata, but not present at the mount time will be removed\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+In most cases, there is only one missing device in degraded mode, otherwise mount fails\&. If there are two or more devices missing (e\&.g\&. possible in RAID6), you need specify
+\fImissing\fR
+as many times as the number of missing devices to remove all of them\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fBdelete\fR \fI<device>\fR|\fI<devid>\fR [\fI<device>\fR|\fI<devid>\fR\&...] \fI<path>\fR
+.RS 4
+Alias of remove kept for backward compatibility
+.RE
+.PP
+\fBready\fR \fI<device>\fR
+.RS 4
+Wait until all devices of a multiple\-device filesystem are scanned and registered within the kernel module\&. This is to provide a way for automatic filesystem mounting tools to wait before the mount can start\&. The device scan is only one of the preconditions and the mount can fail for other reasons\&. Normal users usually do not need this command and may safely ignore it\&.
+.RE
+.PP
+\fBscan\fR [(\-\-all\-devices|\-d)|\fI<device>\fR [\fI<device>\fR\&...]]
+.RS 4
+Scan devices for a btrfs filesystem and register them with the kernel module\&. This allows mounting multiple\-device filesystem by specifying just one from the whole group\&.
+.sp
+If no devices are passed, all block devices that blkid reports to contain btrfs are scanned\&.
+.sp
+The options
+\fI\-\-all\-devices\fR
+or
+\fI\-d\fR
+are deprecated and kept for backward compatibility\&. If used, behavior is the same as if no devices are passed\&.
+.sp
+The command can be run repeatedly\&. Devices that have been already registered remain as such\&. Reloading the kernel module will drop this information\&. There\(cqs an alternative way of mounting multiple\-device filesystem without the need for prior scanning\&. See the mount option
+\fIdevice\fR\&.
+.RE
+.PP
+\fBstats\fR [options] \fI<path>\fR|\fI<device>\fR
+.RS 4
+Read and print the device IO error statistics for all devices of the given filesystem identified by
+\fI<path>\fR
+or for a single
+\fI<device>\fR\&. The filesystem must be mounted\&. See section
+\fBDEVICE STATS\fR
+for more information about the reported statistics and the meaning\&.
+.sp
+\fBOptions\fR
+.PP
+\-z|\-\-reset
+.RS 4
+Print the stats and reset the values to zero afterwards\&.
+.RE
+.PP
+\-c|\-\-check
+.RS 4
+Check if the stats are all zeros and return 0 if it is so\&. Set bit 6 of the return code if any of the statistics is no\-zero\&. The error values is 65 if reading stats from at least one device failed, otherwise it\(cqs 64\&.
+.RE
+.RE
+.PP
+\fBusage\fR [options] \fI<path>\fR [\fI<path>\fR\&...]
+.RS 4
+Show detailed information about internal allocations in devices\&.
+.sp
+\fBOptions\fR
+.PP
+\-b|\-\-raw
+.RS 4
+raw numbers in bytes, without the
+\fIB\fR
+suffix
+.RE
+.PP
+\-h|\-\-human\-readable
+.RS 4
+print human friendly numbers, base 1024, this is the default
+.RE
+.PP
+\-H
+.RS 4
+print human friendly numbers, base 1000
+.RE
+.PP
+\-\-iec
+.RS 4
+select the 1024 base for the following options, according to the IEC standard
+.RE
+.PP
+\-\-si
+.RS 4
+select the 1000 base for the following options, according to the SI standard
+.RE
+.PP
+\-k|\-\-kbytes
+.RS 4
+show sizes in KiB, or kB with \-\-si
+.RE
+.PP
+\-m|\-\-mbytes
+.RS 4
+show sizes in MiB, or MB with \-\-si
+.RE
+.PP
+\-g|\-\-gbytes
+.RS 4
+show sizes in GiB, or GB with \-\-si
+.RE
+.PP
+\-t|\-\-tbytes
+.RS 4
+show sizes in TiB, or TB with \-\-si
+.RE
+.RE
+.sp
+If conflicting options are passed, the last one takes precedence\&.
+.SH "TYPICAL USECASES"
+.SS "STARTING WITH A SINGLE\-DEVICE FILESYSTEM"
+.sp
+Assume we\(cqve created a filesystem on a block device \fI/dev/sda\fR with profile \fIsingle/single\fR (data/metadata), the device size is 50GiB and we\(cqve used the whole device for the filesystem\&. The mount point is \fI/mnt\fR\&.
+.sp
+The amount of data stored is 16GiB, metadata have allocated 2GiB\&.
+.sp
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBADD NEW DEVICE\fR
+.RS 4
+.sp
+We want to increase the total size of the filesystem and keep the profiles\&. The size of the new device \fI/dev/sdb\fR is 100GiB\&.
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+$ btrfs device add /dev/sdb /mnt
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+The amount of free data space increases by less than 100GiB, some space is allocated for metadata\&.
+.RE
+.sp
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBCONVERT TO RAID1\fR
+.RS 4
+.sp
+Now we want to increase the redundancy level of both data and metadata, but we\(cqll do that in steps\&. Note, that the device sizes are not equal and we\(cqll use that to show the capabilities of split data/metadata and independent profiles\&.
+.sp
+The constraint for RAID1 gives us at most 50GiB of usable space and exactly 2 copies will be stored on the devices\&.
+.sp
+First we\(cqll convert the metadata\&. As the metadata occupy less than 50GiB and there\(cqs enough workspace for the conversion process, we can do:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+$ btrfs balance start \-mconvert=raid1 /mnt
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+This operation can take a while, because all metadata have to be moved and all block pointers updated\&. Depending on the physical locations of the old and new blocks, the disk seeking is the key factor affecting performance\&.
+.sp
+You\(cqll note that the system block group has been also converted to RAID1, this normally happens as the system block group also holds metadata (the physical to logical mappings)\&.
+.sp
+What changed:
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+available data space decreased by 3GiB, usable roughly (50 \- 3) + (100 \- 3) = 144 GiB
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+metadata redundancy increased
+.RE
+.sp
+IOW, the unequal device sizes allow for combined space for data yet improved redundancy for metadata\&. If we decide to increase redundancy of data as well, we\(cqre going to lose 50GiB of the second device for obvious reasons\&.
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+$ btrfs balance start \-dconvert=raid1 /mnt
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+The balance process needs some workspace (ie\&. a free device space without any data or metadata block groups) so the command could fail if there\(cqs too much data or the block groups occupy the whole first device\&.
+.sp
+The device size of \fI/dev/sdb\fR as seen by the filesystem remains unchanged, but the logical space from 50\-100GiB will be unused\&.
+.RE
+.sp
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBREMOVE DEVICE\fR
+.RS 4
+.sp
+Device removal must satisfy the profile constraints, otherwise the command fails\&. For example:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+$ btrfs device remove /dev/sda /mnt
+ERROR: error removing device \*(Aq/dev/sda\*(Aq: unable to go below two devices on raid1
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+In order to remove a device, you need to convert the profile in this case:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+$ btrfs balance start \-mconvert=dup \-dconvert=single /mnt
+$ btrfs device remove /dev/sda /mnt
+.fi
+.if n \{\
+.RE
+.\}
+.RE
+.SH "DEVICE STATS"
+.sp
+The device stats keep persistent record of several error classes related to doing IO\&. The current values are printed at mount time and updated during filesystem lifetime or from a scrub run\&.
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+$ btrfs device stats /dev/sda3
+[/dev/sda3]\&.write_io_errs   0
+[/dev/sda3]\&.read_io_errs    0
+[/dev/sda3]\&.flush_io_errs   0
+[/dev/sda3]\&.corruption_errs 0
+[/dev/sda3]\&.generation_errs 0
+.fi
+.if n \{\
+.RE
+.\}
+.PP
+write_io_errs
+.RS 4
+Failed writes to the block devices, means that the layers beneath the filesystem were not able to satisfy the write request\&.
+.RE
+.PP
+read_io_errors
+.RS 4
+Read request analogy to write_io_errs\&.
+.RE
+.PP
+flush_io_errs
+.RS 4
+Number of failed writes with the
+\fIFLUSH\fR
+flag set\&. The flushing is a method of forcing a particular order between write requests and is crucial for implementing crash consistency\&. In case of btrfs, all the metadata blocks must be permanently stored on the block device before the superblock is written\&.
+.RE
+.PP
+corruption_errs
+.RS 4
+A block checksum mismatched or a corrupted metadata header was found\&.
+.RE
+.PP
+generation_errs
+.RS 4
+The block generation does not match the expected value (eg\&. stored in the parent node)\&.
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs device\fR returns a zero exit status if it succeeds\&. Non zero is returned in case of failure\&.
+.sp
+If the \fI\-s\fR option is used, \fBbtrfs device stats\fR will add 64 to the exit status if any of the error counters is non\-zero\&.
+.SH "AVAILABILITY"
+.sp
+\fBbtrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8), \fBbtrfs\-replace\fR(8), \fBbtrfs\-balance\fR(8)
diff --git a/Documentation/btrfs-device.8.gz b/Documentation/btrfs-device.8.gz
deleted file mode 100644
index 668dbc67..00000000
--- a/Documentation/btrfs-device.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-filesystem.8 b/Documentation/btrfs-filesystem.8
new file mode 100644
index 00000000..94e4647d
--- /dev/null
+++ b/Documentation/btrfs-filesystem.8
@@ -0,0 +1,686 @@
+'\" t
+.\"     Title: btrfs-filesystem
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-FILESYSTEM" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-filesystem \- command group that primarily does work on the whole filesystems
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs filesystem\fR \fI<subcommand>\fR \fI<args>\fR
+.SH "DESCRIPTION"
+.sp
+\fBbtrfs filesystem\fR is used to perform several whole filesystem level tasks, including all the regular filesystem operations like resizing, space stats, label setting/getting, and defragmentation\&. There are other whole filesystem tasks like scrub or balance that are grouped in separate commands\&.
+.SH "SUBCOMMAND"
+.PP
+\fBdf\fR [options] \fI<path>\fR
+.RS 4
+Show a terse summary information about allocation of block group types of a given mount point\&. The original purpose of this command was a debugging helper\&. The output needs to be further interpreted and is not suitable for quick overview\&.
+.sp
+An example with description:
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+device size:
+\fI1\&.9TiB\fR, one device, no RAID
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+filesystem size:
+\fI1\&.9TiB\fR
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+created with:
+\fImkfs\&.btrfs \-d single \-m single\fR
+.RE
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+$ btrfs filesystem df /path
+Data, single: total=1\&.15TiB, used=1\&.13TiB
+System, single: total=32\&.00MiB, used=144\&.00KiB
+Metadata, single: total=12\&.00GiB, used=6\&.45GiB
+GlobalReserve, single: total=512\&.00MiB, used=0\&.00B
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+\fIData\fR,
+\fISystem\fR
+and
+\fIMetadata\fR
+are separate block group types\&.
+\fIGlobalReserve\fR
+is an artificial and internal emergency space, see below\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+\fIsingle\fR \(em the allocation profile, defined at mkfs time
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+\fItotal\fR \(em sum of space reserved for all allocation profiles of the given type, ie\&. all Data/single\&. Note that it\(cqs not total size of filesystem\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+\fIused\fR \(em sum of used space of the above, ie\&. file extents, metadata blocks
+.RE
+.sp
+\fIGlobalReserve\fR
+is an artificial and internal emergency space\&. It is used eg\&. when the filesystem is full\&. Its
+\fItotal\fR
+size is dynamic based on the filesystem size, usually not larger than 512MiB,
+\fIused\fR
+may fluctuate\&.
+.sp
+The GlobalReserve is a portion of Metadata\&. In case the filesystem metadata is exhausted,
+\fIGlobalReserve/total + Metadata/used = Metadata/total\fR\&. Otherwise there appears to be some unused space of Metadata\&.
+.sp
+\fBOptions\fR
+.PP
+\-b|\-\-raw
+.RS 4
+raw numbers in bytes, without the
+\fIB\fR
+suffix
+.RE
+.PP
+\-h|\-\-human\-readable
+.RS 4
+print human friendly numbers, base 1024, this is the default
+.RE
+.PP
+\-H
+.RS 4
+print human friendly numbers, base 1000
+.RE
+.PP
+\-\-iec
+.RS 4
+select the 1024 base for the following options, according to the IEC standard
+.RE
+.PP
+\-\-si
+.RS 4
+select the 1000 base for the following options, according to the SI standard
+.RE
+.PP
+\-k|\-\-kbytes
+.RS 4
+show sizes in KiB, or kB with \-\-si
+.RE
+.PP
+\-m|\-\-mbytes
+.RS 4
+show sizes in MiB, or MB with \-\-si
+.RE
+.PP
+\-g|\-\-gbytes
+.RS 4
+show sizes in GiB, or GB with \-\-si
+.RE
+.PP
+\-t|\-\-tbytes
+.RS 4
+show sizes in TiB, or TB with \-\-si
+.sp
+If conflicting options are passed, the last one takes precedence\&.
+.RE
+.RE
+.PP
+\fBdefragment\fR [options] \fI<file>\fR|\fI<dir>\fR [\fI<file>\fR|\fI<dir>\fR\&...]
+.RS 4
+Defragment file data on a mounted filesystem\&. Requires kernel 2\&.6\&.33 and newer\&.
+.sp
+If
+\fI\-r\fR
+is passed, files in dir will be defragmented recursively\&. The start position and the number of bytes to defragment can be specified by start and length using
+\fI\-s\fR
+and
+\fI\-l\fR
+options below\&. Extents bigger than value given by
+\fI\-t\fR
+will be skipped, otherwise this value is used as a target extent size, but is only advisory and may not be reached if the free space is too fragmented\&. Use 0 to take the kernel default, which is 256kB but may change in the future\&. You can also turn on compression in defragment operations\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBWarning\fR
+.ps -1
+.br
+Defragmenting with Linux kernel versions < 3\&.9 or \(>= 3\&.14\-rc2 as well as with Linux stable kernel versions \(>= 3\&.10\&.31, \(>= 3\&.12\&.12 or \(>= 3\&.13\&.4 will break up the reflinks of COW data (for example files copied with
+\fBcp \-\-reflink\fR, snapshots or de\-duplicated data)\&. This may cause considerable increase of space usage depending on the broken up reflinks\&.
+.sp .5v
+.RE
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+Directory arguments without
+\fI\-r\fR
+do not defragment files recursively but will defragment certain internal trees (extent tree and the subvolume tree)\&. This has been confusing and could be removed in the future\&.
+.sp .5v
+.RE
+For
+\fIstart\fR,
+\fIlen\fR,
+\fIsize\fR
+it is possible to append units designator: \*(AqK\*(Aq, \*(AqM\*(Aq, \*(AqG\*(Aq, \*(AqT\*(Aq, \*(AqP\*(Aq, or \*(AqE\*(Aq, which represent KiB, MiB, GiB, TiB, PiB, or EiB, respectively (case does not matter)\&.
+.sp
+\fBOptions\fR
+.PP
+\-v
+.RS 4
+be verbose, print file names as they\(cqre submitted for defragmentation
+.RE
+.PP
+\-c[\fI<algo>\fR]
+.RS 4
+compress file contents while defragmenting\&. Optional argument selects the compression algorithm,
+\fIzlib\fR
+(default),
+\fIlzo\fR
+or
+\fIzstd\fR\&. Currently it\(cqs not possible to select no compression\&. See also section
+\fIEXAMPLES\fR\&.
+.RE
+.PP
+\-r
+.RS 4
+defragment files recursively in given directories
+.RE
+.PP
+\-f
+.RS 4
+flush data for each file before going to the next file\&.
+.sp
+This will limit the amount of dirty data to current file, otherwise the amount accumulates from several files and will increase system load\&. This can also lead to ENOSPC if there\(cqs too much dirty data to write and it\(cqs not possible to make the reservations for the new data (ie\&. how the COW design works)\&.
+.RE
+.PP
+\-s \fI<start>\fR[kKmMgGtTpPeE]
+.RS 4
+defragmentation will start from the given offset, default is beginning of a file
+.RE
+.PP
+\-l \fI<len>\fR[kKmMgGtTpPeE]
+.RS 4
+defragment only up to
+\fIlen\fR
+bytes, default is the file size
+.RE
+.PP
+\-t \fI<size>\fR[kKmMgGtTpPeE]
+.RS 4
+target extent size, do not touch extents bigger than
+\fIsize\fR, default: 32M
+.sp
+The value is only advisory and the final size of the extents may differ, depending on the state of the free space and fragmentation or other internal logic\&. Reasonable values are from tens to hundreds of megabytes\&.
+.RE
+.RE
+.PP
+\fBdu\fR [options] \fI<path>\fR [\fI<path>\fR\&.\&.]
+.RS 4
+Calculate disk usage of the target files using FIEMAP\&. For individual files, it will report a count of total bytes, and exclusive (not shared) bytes\&. We also calculate a
+\fIset shared\fR
+value which is described below\&.
+.sp
+Each argument to
+\fIbtrfs filesystem du\fR
+will have a
+\fIset shared\fR
+value calculated for it\&. We define each
+\fIset\fR
+as those files found by a recursive search of an argument\&. The
+\fIset shared\fR
+value then is a sum of all shared space referenced by the set\&.
+.sp
+\fIset shared\fR
+takes into account overlapping shared extents, hence it isn\(cqt as simple as adding up shared extents\&.
+.sp
+\fBOptions\fR
+.PP
+\-s|\-\-summarize
+.RS 4
+display only a total for each argument
+.RE
+.PP
+\-\-raw
+.RS 4
+raw numbers in bytes, without the
+\fIB\fR
+suffix\&.
+.RE
+.PP
+\-\-human\-readable
+.RS 4
+print human friendly numbers, base 1024, this is the default
+.RE
+.PP
+\-\-iec
+.RS 4
+select the 1024 base for the following options, according to the IEC standard\&.
+.RE
+.PP
+\-\-si
+.RS 4
+select the 1000 base for the following options, according to the SI standard\&.
+.RE
+.PP
+\-\-kbytes
+.RS 4
+show sizes in KiB, or kB with \-\-si\&.
+.RE
+.PP
+\-\-mbytes
+.RS 4
+show sizes in MiB, or MB with \-\-si\&.
+.RE
+.PP
+\-\-gbytes
+.RS 4
+show sizes in GiB, or GB with \-\-si\&.
+.RE
+.PP
+\-\-tbytes
+.RS 4
+show sizes in TiB, or TB with \-\-si\&.
+.RE
+.RE
+.PP
+\fBlabel\fR [\fI<device>\fR|\fI<mountpoint>\fR] [\fI<newlabel>\fR]
+.RS 4
+Show or update the label of a filesystem\&. This works on a mounted filesystem or a filesystem image\&.
+.sp
+The
+\fInewlabel\fR
+argument is optional\&. Current label is printed if the argument is omitted\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+the maximum allowable length shall be less than 256 chars and must not contain a newline\&. The trailing newline is stripped automatically\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fBresize\fR [\fI<devid>\fR:][+/\-]\fI<size>\fR[kKmMgGtTpPeE]|[\fI<devid>\fR:]max \fI<path>\fR
+.RS 4
+Resize a mounted filesystem identified by
+\fIpath\fR\&. A particular device can be resized by specifying a
+\fIdevid\fR\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBWarning\fR
+.ps -1
+.br
+If
+\fIpath\fR
+is a file containing a BTRFS image then resize does not work as expected and does not resize the image\&. This would resize the underlying filesystem instead\&.
+.sp .5v
+.RE
+The
+\fIdevid\fR
+can be found in the output of
+\fBbtrfs filesystem show\fR
+and defaults to 1 if not specified\&. The
+\fIsize\fR
+parameter specifies the new size of the filesystem\&. If the prefix
+\fI+\fR
+or
+\fI\-\fR
+is present the size is increased or decreased by the quantity
+\fIsize\fR\&. If no units are specified, bytes are assumed for
+\fIsize\fR\&. Optionally, the size parameter may be suffixed by one of the following unit designators: \*(AqK\*(Aq, \*(AqM\*(Aq, \*(AqG\*(Aq, \*(AqT\*(Aq, \*(AqP\*(Aq, or \*(AqE\*(Aq, which represent KiB, MiB, GiB, TiB, PiB, or EiB, respectively (case does not matter)\&.
+.sp
+If
+\fImax\fR
+is passed, the filesystem will occupy all available space on the device respecting
+\fIdevid\fR
+(remember, devid 1 by default)\&.
+.sp
+The resize command does not manipulate the size of underlying partition\&. If you wish to enlarge/reduce a filesystem, you must make sure you can expand the partition before enlarging the filesystem and shrink the partition after reducing the size of the filesystem\&. This can done using
+\fBfdisk\fR(8) or
+\fBparted\fR(8) to delete the existing partition and recreate it with the new desired size\&. When recreating the partition make sure to use the same starting partition offset as before\&.
+.sp
+Growing is usually instant as it only updates the size\&. However, shrinking could take a long time if there are data in the device area that\(cqs beyond the new end\&. Relocation of the data takes time\&.
+.sp
+See also section
+\fIEXAMPLES\fR\&.
+.RE
+.PP
+\fBshow\fR [options] [\fI<path>\fR|\fI<uuid>\fR|\fI<device>\fR|\fI<label>\fR]
+.RS 4
+Show the btrfs filesystem with some additional info about devices and space allocation\&.
+.sp
+If no option none of
+\fIpath\fR/\fIuuid\fR/\fIdevice\fR/\fIlabel\fR
+is passed, information about all the BTRFS filesystems is shown, both mounted and unmounted\&.
+.sp
+\fBOptions\fR
+.PP
+\-m|\-\-mounted
+.RS 4
+probe kernel for mounted BTRFS filesystems
+.RE
+.PP
+\-d|\-\-all\-devices
+.RS 4
+scan all devices under /dev, otherwise the devices list is extracted from the /proc/partitions file\&. This is a fallback option if there\(cqs no device node manager (like udev) available in the system\&.
+.RE
+.PP
+\-\-raw
+.RS 4
+raw numbers in bytes, without the
+\fIB\fR
+suffix
+.RE
+.PP
+\-\-human\-readable
+.RS 4
+print human friendly numbers, base 1024, this is the default
+.RE
+.PP
+\-\-iec
+.RS 4
+select the 1024 base for the following options, according to the IEC standard
+.RE
+.PP
+\-\-si
+.RS 4
+select the 1000 base for the following options, according to the SI standard
+.RE
+.PP
+\-\-kbytes
+.RS 4
+show sizes in KiB, or kB with \-\-si
+.RE
+.PP
+\-\-mbytes
+.RS 4
+show sizes in MiB, or MB with \-\-si
+.RE
+.PP
+\-\-gbytes
+.RS 4
+show sizes in GiB, or GB with \-\-si
+.RE
+.PP
+\-\-tbytes
+.RS 4
+show sizes in TiB, or TB with \-\-si
+.RE
+.RE
+.PP
+\fBsync\fR \fI<path>\fR
+.RS 4
+Force a sync of the filesystem at
+\fIpath\fR\&. This is done via a special ioctl and will also trigger cleaning of deleted subvolumes\&. Besides that it\(cqs equivalent to the
+\fBsync\fR(1) command\&.
+.RE
+.PP
+\fBusage\fR [options] \fI<path>\fR [\fI<path>\fR\&...]
+.RS 4
+Show detailed information about internal filesystem usage\&. This is supposed to replace the
+\fBbtrfs filesystem df\fR
+command in the long run\&.
+.sp
+The level of detail can differ if the command is run under a regular or the root user (due to use of restricted ioctl)\&. For both there\(cqs a summary section with information about space usage:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+$ btrfs filesystem usage /path
+WARNING: cannot read detailed chunk info, RAID5/6 numbers will be incorrect, run as root
+Overall:
+    Device size:                   1\&.82TiB
+    Device allocated:              1\&.17TiB
+    Device unallocated:          669\&.99GiB
+    Device missing:                  0\&.00B
+    Used:                          1\&.14TiB
+    Free (estimated):            692\&.57GiB      (min: 692\&.57GiB)
+    Data ratio:                       1\&.00
+    Metadata ratio:                   1\&.00
+    Global reserve:              512\&.00MiB      (used: 0\&.00B)
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+The root user will also see stats broken down by block group types:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+Data,single: Size:1\&.15TiB, Used:1\&.13TiB
+   /dev/sdb        1\&.15TiB
+
+Metadata,single: Size:12\&.00GiB, Used:6\&.45GiB
+   /dev/sdb       12\&.00GiB
+
+System,single: Size:32\&.00MiB, Used:144\&.00KiB
+   /dev/sdb       32\&.00MiB
+
+Unallocated:
+   /dev/sdb      669\&.99GiB
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+\fBOptions\fR
+.PP
+\-b|\-\-raw
+.RS 4
+raw numbers in bytes, without the
+\fIB\fR
+suffix
+.RE
+.PP
+\-h|\-\-human\-readable
+.RS 4
+print human friendly numbers, base 1024, this is the default
+.RE
+.PP
+\-H
+.RS 4
+print human friendly numbers, base 1000
+.RE
+.PP
+\-\-iec
+.RS 4
+select the 1024 base for the following options, according to the IEC standard
+.RE
+.PP
+\-\-si
+.RS 4
+select the 1000 base for the following options, according to the SI standard
+.RE
+.PP
+\-k|\-\-kbytes
+.RS 4
+show sizes in KiB, or kB with \-\-si
+.RE
+.PP
+\-m|\-\-mbytes
+.RS 4
+show sizes in MiB, or MB with \-\-si
+.RE
+.PP
+\-g|\-\-gbytes
+.RS 4
+show sizes in GiB, or GB with \-\-si
+.RE
+.PP
+\-t|\-\-tbytes
+.RS 4
+show sizes in TiB, or TB with \-\-si
+.RE
+.PP
+\-T
+.RS 4
+show data in tabular format
+.sp
+If conflicting options are passed, the last one takes precedence\&.
+.RE
+.RE
+.SH "EXAMPLES"
+.sp
+\fB$ btrfs filesystem defrag \-v \-r dir/\fR
+.sp
+Recursively defragment files under \fIdir/\fR, print files as they are processed\&. The file names will be printed in batches, similarly the amount of data triggered by defragmentation will be proportional to last N printed files\&. The system dirty memory throttling will slow down the defragmentation but there can still be a lot of IO load and the system may stall for a moment\&.
+.sp
+\fB$ btrfs filesystem defrag \-v \-r \-f dir/\fR
+.sp
+Recursively defragment files under \fIdir/\fR, be verbose and wait until all blocks are flushed before processing next file\&. You can note slower progress of the output and lower IO load (proportional to currently defragmented file)\&.
+.sp
+\fB$ btrfs filesystem defrag \-v \-r \-f \-clzo dir/\fR
+.sp
+Recursively defragment files under \fIdir/\fR, be verbose, wait until all blocks are flushed and force file compression\&.
+.sp
+\fB$ btrfs filesystem defrag \-v \-r \-t 64M dir/\fR
+.sp
+Recursively defragment files under \fIdir/\fR, be verbose and try to merge extents to be about 64MiB\&. As stated above, the success rate depends on actual free space fragmentation and the final result is not guaranteed to meet the target even if run repeatedly\&.
+.sp
+\fB$ btrfs filesystem resize \-1G /path\fR
+.sp
+\fB$ btrfs filesystem resize 1:\-1G /path\fR
+.sp
+Shrink size of the filesystem\(cqs device id 1 by 1GiB\&. The first syntax expects a device with id 1 to exist, otherwise fails\&. The second is equivalent and more explicit\&. For a single\-device filesystem it\(cqs typically not necessary to specify the devid though\&.
+.sp
+\fB$ btrfs filesystem resize max /path\fR
+.sp
+\fB$ btrfs filesystem resize 1:max /path\fR
+.sp
+Let\(cqs assume that devid 1 exists and the filesystem does not occupy the whole block device, eg\&. it has been enlarged and we want to grow the filesystem\&. By simply using \fImax\fR as size we will achieve that\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+There are two ways to minimize the filesystem on a given device\&. The \fBbtrfs inspect\-internal min\-dev\-size\fR command, or iteratively shrink in steps\&.
+.sp .5v
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs filesystem\fR returns a zero exit status if it succeeds\&. Non zero is returned in case of failure\&.
+.SH "AVAILABILITY"
+.sp
+\fBbtrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8),
diff --git a/Documentation/btrfs-filesystem.8.gz b/Documentation/btrfs-filesystem.8.gz
deleted file mode 100644
index 5da186b2..00000000
--- a/Documentation/btrfs-filesystem.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-find-root.8 b/Documentation/btrfs-find-root.8
new file mode 100644
index 00000000..bda5d689
--- /dev/null
+++ b/Documentation/btrfs-find-root.8
@@ -0,0 +1,64 @@
+'\" t
+.\"     Title: btrfs-find-root
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-FIND\-ROOT" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-find-root \- filter to find btrfs root
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs\-find\-root\fR [options] \fI<device>\fR
+.SH "DESCRIPTION"
+.sp
+\fBbtrfs\-find\-root\fR is used to find the satisfied root, you can filter by root tree\(cqs objectid, generation, level\&.
+.SH "OPTIONS"
+.PP
+\-a
+.RS 4
+Search through all metadata extents, even the root has been already found\&.
+.RE
+.PP
+\-g \fI<generation>\fR
+.RS 4
+Filter root tree by it\(cqs original transaction id, tree root\(cqs generation in default\&.
+.RE
+.PP
+\-o \fI<objectid>\fR
+.RS 4
+Filter root tree by it\(cqs objectid,tree root\(cqs objectid in default\&.
+.RE
+.PP
+\-l \fI<level>\fR
+.RS 4
+Filter root tree by B\-+ tree\(cqs level, level 0 in default\&.
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs\-find\-root\fR will return 0 if no error happened\&. If any problems happened, 1 will be returned\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8)
diff --git a/Documentation/btrfs-find-root.8.gz b/Documentation/btrfs-find-root.8.gz
deleted file mode 100644
index 9143c5a7..00000000
--- a/Documentation/btrfs-find-root.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-image.8 b/Documentation/btrfs-image.8
new file mode 100644
index 00000000..b97b846f
--- /dev/null
+++ b/Documentation/btrfs-image.8
@@ -0,0 +1,83 @@
+'\" t
+.\"     Title: btrfs-image
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-IMAGE" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-image \- create/restore an image of the filesystem
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs\-image\fR [options] \fI<source>\fR \fI<target>\fR
+.SH "DESCRIPTION"
+.sp
+\fBbtrfs\-image\fR is used to create an image of a btrfs filesystem\&. All data will be zeroed, but metadata and the like is preserved\&. Mainly used for debugging purposes\&.
+.sp
+In the dump mode, source is the btrfs device/file and target is the output file (use \fI\-\fR for stdout)\&.
+.sp
+In the restore mode (option \-r), source is the dumped image and target is the btrfs device/file\&.
+.SH "OPTIONS"
+.PP
+\-r
+.RS 4
+Restore metadump image\&. By default, this fixes super\(cqs chunk tree, by using 1 stripe pointing to primary device, so that file system can be restored by running tree log reply if possible\&. To restore without changing number of stripes in chunk tree check \-o option\&.
+.RE
+.PP
+\-c \fI<value>\fR
+.RS 4
+Compression level (0 ~ 9)\&.
+.RE
+.PP
+\-t \fI<value>\fR
+.RS 4
+Number of threads (1 ~ 32) to be used to process the image dump or restore\&.
+.RE
+.PP
+\-o
+.RS 4
+Use the old restore method, this does not fixup the chunk tree so the restored file system will not be able to be mounted\&.
+.RE
+.PP
+\-s
+.RS 4
+Sanitize the file names when generating the image\&. One \-s means just generate random garbage, which means that the directory indexes won\(cqt match up since the hashes won\(cqt match with the garbage filenames\&. Using \-ss will calculate a collision for the filename so that the hashes match, and if it can\(cqt calculate a collision then it will just generate garbage\&. The collision calculator is very time and CPU intensive so only use it if you are having problems with your file system tree and need to have it mostly working\&.
+.RE
+.PP
+\-w
+.RS 4
+Walk all the trees manually and copy any blocks that are referenced\&. Use this option if your extent tree is corrupted to make sure that all of the metadata is captured\&.
+.RE
+.PP
+\-m
+.RS 4
+Restore for multiple devices, more than 1 device should be provided\&.
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs\-image\fR will return 0 if no error happened\&. If any problems happened, 1 will be returned\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8)
diff --git a/Documentation/btrfs-image.8.gz b/Documentation/btrfs-image.8.gz
deleted file mode 100644
index 7b9b34e7..00000000
--- a/Documentation/btrfs-image.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-inspect-internal.8 b/Documentation/btrfs-inspect-internal.8
new file mode 100644
index 00000000..3cd467ac
--- /dev/null
+++ b/Documentation/btrfs-inspect-internal.8
@@ -0,0 +1,355 @@
+'\" t
+.\"     Title: btrfs-inspect-internal
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-INSPECT\-INTE" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-inspect-internal \- query various internal information
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs inspect\-internal\fR \fI<subcommand>\fR \fI<args>\fR
+.SH "DESCRIPTION"
+.sp
+This command group provides an interface to query internal information\&. The functionality ranges from a simple UI to an ioctl or a more complex query that assembles the result from several internal structures\&. The latter usually requires calls to privileged ioctls\&.
+.SH "SUBCOMMAND"
+.PP
+\fBdump\-super\fR [options] \fI<device>\fR [device\&...]
+.RS 4
+(replaces the standalone tool
+\fBbtrfs\-show\-super\fR)
+.sp
+Show btrfs superblock information stored on given devices in textual form\&. By default the first superblock is printed, more details about all copies or additional backup data can be printed\&.
+.sp
+Besides verification of the filesystem signature, there are no other sanity checks\&. The superblock checksum status is reported, the device item and filesystem UUIDs are checked and reported\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+the meaning of option
+\fI\-s\fR
+has changed in version 4\&.8 to be consistent with other tools to specify superblock copy rather the offset\&. The old way still works, but prints a warning\&. Please update your scripts to use
+\fI\-\-bytenr\fR
+instead\&. The option
+\fI\-i\fR
+has been deprecated\&.
+.sp .5v
+.RE
+\fBOptions\fR
+.PP
+\-f|\-\-full
+.RS 4
+print full superblock information, including the system chunk array and backup roots
+.RE
+.PP
+\-a|\-\-all
+.RS 4
+print information about all present superblock copies (cannot be used together with
+\fI\-s\fR
+option)
+.RE
+.PP
+\-i \fI<super>\fR
+.RS 4
+(deprecated since 4\&.8, same behaviour as
+\fI\-\-super\fR)
+.RE
+.PP
+\-\-bytenr \fI<bytenr>\fR
+.RS 4
+specify offset to a superblock in a non\-standard location at
+\fIbytenr\fR, useful for debugging (disables the
+\fI\-f\fR
+option)
+.sp
+If there are multiple options specified, only the last one applies\&.
+.RE
+.PP
+\-F|\-\-force
+.RS 4
+attempt to print the superblock even if a valid BTRFS signature is not found; the result may be completely wrong if the data does not resemble a superblock
+.RE
+.PP
+\-s|\-\-super \fI<bytenr>\fR
+.RS 4
+(see compatibility note above)
+.sp
+specify which mirror to print, valid values are 0, 1 and 2 and the superblock must be present on the device with a valid signature, can be used together with
+\fI\-\-force\fR
+.RE
+.RE
+.PP
+\fBdump\-tree\fR [options] \fI<device>\fR
+.RS 4
+(replaces the standalone tool
+\fBbtrfs\-debug\-tree\fR)
+.sp
+Dump tree structures from a given device in textual form, expand keys to human readable equivalents where possible\&. This is useful for analyzing filesystem state or inconsistencies and has a positive educational effect on understanding the internal filesystem structure\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+contains file names, consider that if you\(cqre asked to send the dump for analysis\&. Does not contain file data\&.
+.sp .5v
+.RE
+\fBOptions\fR
+.PP
+\-e|\-\-extents
+.RS 4
+print only extent\-related information: extent and device trees
+.RE
+.PP
+\-d|\-\-device
+.RS 4
+print only device\-related information: tree root, chunk and device trees
+.RE
+.PP
+\-r|\-\-roots
+.RS 4
+print only short root node information, ie\&. the root tree keys
+.RE
+.PP
+\-R|\-\-backups
+.RS 4
+same as \-\-roots plus print backup root info, ie\&. the backup root keys and the respective tree root block offset
+.RE
+.PP
+\-u|\-\-uuid
+.RS 4
+print only the uuid tree information, empty output if the tree does not exist
+.RE
+.PP
+\-b \fI<block_num>\fR
+.RS 4
+print info of the specified block only
+.RE
+.PP
+\-\-follow
+.RS 4
+use with
+\fI\-b\fR, print all children tree blocks of
+\fI\fI<block_num>\fR\fR
+.RE
+.PP
+\-\-dfs
+.RS 4
+use depth\-first search to print trees\&. (default) the nodes and leaves are intermixed in the output
+.RE
+.PP
+\-\-bfs
+.RS 4
+use breadth\-first search to print trees\&. the nodes are printed before all leaves
+.RE
+.PP
+\-t \fI<tree_id>\fR
+.RS 4
+print only the tree with the specified ID, where the ID can be numerical or common name in a flexible human readable form
+.sp
+The tree id name recognition rules:
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+case does not matter
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+the C source definition, eg\&. BTRFS_ROOT_TREE_OBJECTID
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+short forms without BTRFS_ prefix, without _TREE and _OBJECTID suffix, eg\&. ROOT_TREE, ROOT
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+convenience aliases, eg\&. DEVICE for the DEV tree, CHECKSUM for CSUM
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+unrecognized ID is an error
+.RE
+.RE
+.RE
+.PP
+\fBinode\-resolve\fR [\-v] \fI<ino>\fR \fI<path>\fR
+.RS 4
+(needs root privileges)
+.sp
+resolve paths to all files with given inode number
+\fIino\fR
+in a given subvolume at
+\fIpath\fR, ie\&. all hardlinks
+.sp
+\fBOptions\fR
+.PP
+\-v
+.RS 4
+verbose mode, print count of returned paths and ioctl() return value
+.RE
+.RE
+.PP
+\fBlogical\-resolve\fR [\-Pv] [\-s \fI<bufsize>\fR] \fI<logical>\fR \fI<path>\fR
+.RS 4
+(needs root privileges)
+.sp
+resolve paths to all files at given
+\fIlogical\fR
+address in the linear filesystem space
+.sp
+\fBOptions\fR
+.PP
+\-P
+.RS 4
+skip the path resolving and print the inodes instead
+.RE
+.PP
+\-v
+.RS 4
+verbose mode, print count of returned paths and all ioctl() return values
+.RE
+.PP
+\-s \fI<bufsize>\fR
+.RS 4
+set internal buffer for storing the file names to
+\fIbufsize\fR, default is 4096, maximum 64k
+.RE
+.RE
+.PP
+\fBmin\-dev\-size\fR [options] \fI<path>\fR
+.RS 4
+(needs root privileges)
+.sp
+return the minimum size the device can be shrunk to, without performing any resize operation, this may be useful before executing the actual resize operation
+.sp
+\fBOptions\fR
+.PP
+\-\-id \fI<id>\fR
+.RS 4
+specify the device
+\fIid\fR
+to query, default is 1 if this option is not used
+.RE
+.RE
+.PP
+\fBrootid\fR \fI<path>\fR
+.RS 4
+for a given file or directory, return the containing tree root id, but for a subvolume itself return its own tree id (ie\&. subvol id)
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+The result is undefined for the so\-called empty subvolumes (identified by inode number 2), but such a subvolume does not contain any files anyway
+.sp .5v
+.RE
+.RE
+.PP
+\fBsubvolid\-resolve\fR \fI<subvolid>\fR \fI<path>\fR
+.RS 4
+(needs root privileges)
+.sp
+resolve the absolute path of the subvolume id
+\fIsubvolid\fR
+.RE
+.PP
+\fBtree\-stats\fR [options] \fI<device>\fR
+.RS 4
+(needs root privileges)
+.sp
+Print sizes and statistics of trees\&.
+.sp
+\fBOptions\fR
+.PP
+\-b
+.RS 4
+Print raw numbers in bytes\&.
+.RE
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs inspect\-internal\fR returns a zero exit status if it succeeds\&. Non zero is returned in case of failure\&.
+.SH "AVAILABILITY"
+.sp
+\fBbtrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8)
diff --git a/Documentation/btrfs-inspect-internal.8.gz b/Documentation/btrfs-inspect-internal.8.gz
deleted file mode 100644
index ea67330c..00000000
--- a/Documentation/btrfs-inspect-internal.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-inspect-internal.asciidoc b/Documentation/btrfs-inspect-internal.asciidoc
index e2db6466..381497d2 100644
--- a/Documentation/btrfs-inspect-internal.asciidoc
+++ b/Documentation/btrfs-inspect-internal.asciidoc
@@ -89,6 +89,12 @@ print only the uuid tree information, empty output if the tree does not exist
 print info of the specified block only
 --follow::::
 use with '-b', print all children tree blocks of '<block_num>'
+--dfs::::
+use depth-first search to print trees. (default) the nodes and leaves are
+intermixed in the output
+--bfs::::
+use breadth-first search to print trees. the nodes are printed before all
+leaves
 -t <tree_id>::::
 print only the tree with the specified ID, where the ID can be numerical or
 common name in a flexible human readable form
diff --git a/Documentation/btrfs-man5.asciidoc b/Documentation/btrfs-man5.asciidoc
index 0529496a..c358cef1 100644
--- a/Documentation/btrfs-man5.asciidoc
+++ b/Documentation/btrfs-man5.asciidoc
@@ -138,8 +138,10 @@ Otherwise some simple heuristics are applied to detect an incompressible file.
 If the first blocks written to a file are not compressible, the whole file is
 permanently marked to skip compression. As this is too simple, the
 'compress-force' is a workaround that will compress most of the files at the
-cost of some wasted CPU cycles on failed attempts. The heuristics of 'compress'
-will improve in the future so this will not be necessary.
+cost of some wasted CPU cycles on failed attempts.
+Since kernel 4.15, a set of heuristic algorithms have been improved by using
+frequency sampling, repeated pattern detection and Shannon entropy calculation
+to avoid that.
 +
 NOTE: If compression is enabled, 'nodatacow' and 'nodatasum' are disabled.
 
@@ -154,7 +156,7 @@ under 'nodatacow' are also set the NOCOW file attribute (see `chattr`(1)).
 NOTE: If 'nodatacow' or 'nodatasum' are enabled, compression is disabled.
 +
 Updates in-place improve performance for workloads that do frequent overwrites,
-at the cost of potential partial writes, in case the write is interruted
+at the cost of potential partial writes, in case the write is interrupted
 (system crash, device failure).
 
 *datasum*::
@@ -169,7 +171,7 @@ corresponding file attribute (see `chattr`(1)).
 NOTE: If 'nodatacow' or 'nodatasum' are enabled, compression is disabled.
 +
 There is a slight performance gain when checksums are turned off, the
-correspoinding metadata blocks holding the checksums do not need to updated.
+corresponding metadata blocks holding the checksums do not need to updated.
 The cost of checksumming of the blocks in memory is much lower than the IO,
 modern CPUs feature hardware support of the checksumming algorithm.
 
@@ -183,7 +185,7 @@ missing, for example if a stripe member is completely missing from RAID0.
 Since 4.14, the constraint checks have been improved and are verified on the
 chunk level, not an the device level. This allows degraded mounts of
 filesystems with mixed RAID profiles for data and metadata, even if the
-device number constraints would not be satisfied for some of the prifles.
+device number constraints would not be satisfied for some of the profiles.
 +
 Example: metadata -- raid1, data -- single, devices -- /dev/sda, /dev/sdb
 +
@@ -300,6 +302,7 @@ which will effectively start the inode numbers from the beginning again.
 (default: on, even read-only)
 +
 Enable/disable log replay at mount time. See also 'treelog'.
+Note that 'nologreplay' is the same as 'norecovery'.
 +
 WARNING: currently, the tree log is replayed even with a read-only mount! To
 disable that behaviour, mount also with 'nologreplay'.
@@ -336,7 +339,8 @@ inlined files).
 (since: 4.5, default: off)
 +
 Do not attempt any data recovery at mount time. This will disable 'logreplay'
-and avoids other write operations.
+and avoids other write operations. Note that this option is the same as
+'nologreplay'.
 +
 NOTE: The opposite option 'recovery' used to have different meaning but was
 changed for consistency with other filesystems, where 'norecovery' is used for
@@ -388,12 +392,13 @@ chosen, which is 'v1'.
 *ssd*::
 *ssd_spread*::
 *nossd*::
+*nossd_spread*::
 (default: SSD autodetected)
 +
 Options to control SSD allocation schemes.  By default, BTRFS will
 enable or disable SSD optimizations depending on status of a device with
 respect to rotational or non-rotational type. This is determined by the
-contents of '/sys/block/DEV/queue/rotational'). If it is 1, the 'ssd' option is
+contents of '/sys/block/DEV/queue/rotational'). If it is 0, the 'ssd' option is
 turned on.  The option 'nossd' will disable the autodetection.
 +
 The optimizations make use of the absence of the seek penalty that's inherent
@@ -411,7 +416,7 @@ fragmentation. The layout tuning has been kept intact for the option
 The 'ssd_spread' mount option attempts to allocate into bigger and aligned
 chunks of unused space, and may perform better on low-end SSDs.  'ssd_spread'
 implies 'ssd', enabling all other SSD heuristics as well. The option 'nossd'
-will disable all SSD options.
+will disable all SSD options while 'nossd_spread' only disables 'ssd_spread'.
 
 *subvol='path'*::
 Mount subvolume from 'path' rather than the toplevel subvolume. The
@@ -470,7 +475,10 @@ root user can do that.
 NOTE: historically, any user could create a snapshot even if he was not owner
 of the source subvolume, the subvolume deletion has been restricted for that
 reason. The subvolume creation has been restricted but this mount option is
-still required. This is a usability issue and will be addressed in the future.
+still required. This is a usability issue.
+Since 4.18, the `rmdir`(2) syscall can delete an empty subvolume just like an
+ordinary directory. Whether this is possible can be detected at runtime, see
+'rmdir_subvol' feature in 'FILESYSTEM FEATURES'.
 
 DEPRECATED MOUNT OPTIONS
 ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -605,12 +613,17 @@ stored as an extent, saves a few percent of metadata if sparse files are used
 +
 the filesystem contains or contained a raid56 profile of block groups
 
+*rmdir_subvol*::
+(since: 4.18)
++
+indicate that `rmdir`(2) syscall can delete an empty subvolume just like an
+ordinary directory. Note that this feature only depends on the kernel version.
+
 *skinny_metadata*::
 (since: 3.10)
 +
 reduced-size metadata for extent references, saves a few percent of metadata
 
-
 FILESYSTEM LIMITS
 -----------------
 
@@ -636,7 +649,7 @@ inherent limit of btrfs is 2^64^ (16 EiB) but the linux VFS limit is 2^63^ (8 Ei
 
 maximum number of subvolumes::
 2^64^ but depends on the available metadata space, the space consumed by all
-subvolume metadata includes bookeeping of the shared extents can be large (MiB,
+subvolume metadata includes bookkeeping of the shared extents can be large (MiB,
 GiB)
 
 maximum number of hardlinks of a file in a directory::
diff --git a/Documentation/btrfs-map-logical.8 b/Documentation/btrfs-map-logical.8
new file mode 100644
index 00000000..75f7fd3a
--- /dev/null
+++ b/Documentation/btrfs-map-logical.8
@@ -0,0 +1,66 @@
+'\" t
+.\"     Title: btrfs-map-logical
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-MAP\-LOGICAL" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-map-logical \- map btrfs logical extent to physical extent
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs\-map\-logical\fR \fI<options>\fR \fI<device>\fR
+.SH "DESCRIPTION"
+.sp
+\fBbtrfs\-map\-logical\fR can be used to find out what the physical offsets are on the mirrors, the result is dumped to stdout by default\&.
+.sp
+Mainly used for debug purpose\&.
+.SH "OPTIONS"
+.PP
+\-l|\-\-logical \fI<logical_num>\fR
+.RS 4
+Logical extent to map\&.
+.RE
+.PP
+\-c|\-\-copy \fI<copy>\fR
+.RS 4
+Copy of the extent to read(usually 1 or 2)\&.
+.RE
+.PP
+\-o|\-\-output \fI<filename>\fR
+.RS 4
+Output file to hold the extent\&.
+.RE
+.PP
+\-b|\-\-bytes \fI<bytes>\fR
+.RS 4
+Number of bytes to read\&.
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs\-map\-logical\fR will return 0 if no error happened\&. If any problems happened, 1 will be returned\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8)
diff --git a/Documentation/btrfs-map-logical.8.gz b/Documentation/btrfs-map-logical.8.gz
deleted file mode 100644
index 89875909..00000000
--- a/Documentation/btrfs-map-logical.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-property.8 b/Documentation/btrfs-property.8
new file mode 100644
index 00000000..b8df629e
--- /dev/null
+++ b/Documentation/btrfs-property.8
@@ -0,0 +1,117 @@
+'\" t
+.\"     Title: btrfs-property
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-PROPERTY" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-property \- get/set/list properties for given filesystem object
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs property\fR \fI<subcommand>\fR \fI<args>\fR
+.SH "DESCRIPTION"
+.sp
+\fBbtrfs property\fR is used to get/set/list property for given filesystem object\&. The object can be an inode (file or directory), subvolume or the whole filesystem\&. See the description of \fBget\fR subcommand for more information about both btrfs object and property\&.
+.sp
+\fBbtrfs property\fR provides an unified and user\-friendly method to tune different btrfs properties instead of using the traditional method like \fBchattr\fR(1) or \fBlsattr\fR(1)\&.
+.SH "SUBCOMMAND"
+.PP
+\fBget\fR [\-t \fI<type>\fR] \fI<object>\fR [\fI<name>\fR]
+.RS 4
+get property from a btrfs
+\fI<object>\fR
+of given
+\fI<type>\fR
+.sp
+A btrfs object, which is set by
+\fI<object>\fR, can be a btrfs filesystem itself, a btrfs subvolume, an inode (file or directory) inside btrfs, or a device on which a btrfs exists\&.
+.sp
+The option
+\fI\-t\fR
+can be used to explicitly specify what type of object you meant\&. This is only needed when a property could be set for more then one object type\&.
+.sp
+Possible types are
+\fIs[ubvol]\fR,
+\fIf[ilesystem]\fR,
+\fIi[node]\fR
+and
+\fId[evice]\fR, where the first lettes is a shortcut\&.
+.sp
+Set the name of property by
+\fIname\fR\&. If no
+\fIname\fR
+is specified, all properties for the given object are printed\&.
+\fIname\fR
+is one of the following:
+.PP
+ro
+.RS 4
+read\-only flag of subvolume: true or false
+.RE
+.PP
+label
+.RS 4
+label of device
+.RE
+.PP
+compression
+.RS 4
+compression algorithm set for an inode, possible values:
+\fIlzo\fR,
+\fIzlib\fR,
+\fIzstd\fR\&. To disable compression use "" (empty string),
+\fIno\fR
+or
+\fInone\fR\&.
+.RE
+.RE
+.PP
+\fBlist\fR [\-t \fI<type>\fR] \fI<object>\fR
+.RS 4
+Lists available properties with their descriptions for the given object\&.
+.sp
+See the description of
+\fBget\fR
+subcommand for the meaning of each option\&.
+.RE
+.PP
+\fBset\fR [\-t \fI<type>\fR] \fI<object>\fR \fI<name>\fR \fI<value>\fR
+.RS 4
+Sets a property on a btrfs object\&.
+.sp
+See the description of
+\fBget\fR
+subcommand for the meaning of each option\&.
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs property\fR returns a zero exit status if it succeeds\&. Non zero is returned in case of failure\&.
+.SH "AVAILABILITY"
+.sp
+\fBbtrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8), \fBlsattr\fR(1), \fBchattr\fR(1)
diff --git a/Documentation/btrfs-property.8.gz b/Documentation/btrfs-property.8.gz
deleted file mode 100644
index 95e8f69b..00000000
--- a/Documentation/btrfs-property.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-qgroup.8 b/Documentation/btrfs-qgroup.8
new file mode 100644
index 00000000..ed01649e
--- /dev/null
+++ b/Documentation/btrfs-qgroup.8
@@ -0,0 +1,295 @@
+'\" t
+.\"     Title: btrfs-qgroup
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-QGROUP" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-qgroup \- control the quota group of a btrfs filesystem
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs qgroup\fR \fI<subcommand>\fR \fI<args>\fR
+.SH "DESCRIPTION"
+.sp
+\fBbtrfs qgroup\fR is used to control quota group (qgroup) of a btrfs filesystem\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+To use qgroup you need to enable quota first using \fBbtrfs quota enable\fR command\&.
+.sp .5v
+.RE
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBWarning\fR
+.ps -1
+.br
+.sp
+Qgroup is not stable yet and will impact performance in current mainline kernel (v4\&.14)\&.
+.sp .5v
+.RE
+.SH "QGROUP"
+.sp
+Quota groups or qgroup in btrfs make a tree hierarchy, the leaf qgroups are attached to subvolumes\&. The size limits are set per qgroup and apply when any limit is reached in tree that contains a given subvolume\&.
+.sp
+The limits are separated between shared and exclusive and reflect the extent ownership\&. For example a fresh snapshot shares almost all the blocks with the original subvolume, new writes to either subvolume will raise towards the exclusive limit\&.
+.sp
+The qgroup identifiers conform to \fIlevel/id\fR where level 0 is reserved to the qgroups associated with subvolumes\&. Such qgroups are created automatically\&.
+.sp
+The qgroup hierarchy is built by commands \fBcreate\fR and \fBassign\fR\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+If the qgroup of a subvolume is destroyed, quota about the subvolume will not be functional until qgroup \fI0/\fR\fI\fI<subvolume id>\fR\fR is created again\&.
+.sp .5v
+.RE
+.SH "SUBCOMMAND"
+.PP
+\fBassign\fR [options] \fI<src>\fR \fI<dst>\fR \fI<path>\fR
+.RS 4
+Assign qgroup
+\fI<src>\fR
+as the child qgroup of
+\fI<dst>\fR
+in the btrfs filesystem identified by
+\fI<path>\fR\&.
+.sp
+\fBOptions\fR
+.PP
+\-\-rescan
+.RS 4
+(default since: 4\&.19) Automatically schedule quota rescan if the new qgroup assignment would lead to quota inconsistency\&. See
+\fIQUOTA RESCAN\fR
+for more information\&.
+.RE
+.PP
+\-\-no\-rescan
+.RS 4
+Explicitly ask not to do a rescan, even if the assignment will make the quotas inconsistent\&. This may be useful for repeated calls where the rescan would add unnecessary overhead\&.
+.RE
+.RE
+.PP
+\fBcreate\fR \fI<qgroupid>\fR \fI<path>\fR
+.RS 4
+Create a subvolume quota group\&.
+.sp
+For the
+\fI0/\fR\fI\fI<subvolume id>\fR\fR
+qgroup, a qgroup can be created even before the subvolume is created\&.
+.RE
+.PP
+\fBdestroy\fR \fI<qgroupid>\fR \fI<path>\fR
+.RS 4
+Destroy a qgroup\&.
+.sp
+If a qgroup is not isolated, meaning it is a parent or child qgroup, then it can only be destroyed after the relationship is removed\&.
+.RE
+.PP
+\fBlimit\fR [options] \fI<size>\fR|none [\fI<qgroupid>\fR] \fI<path>\fR
+.RS 4
+Limit the size of a qgroup to
+\fI<size>\fR
+or no limit in the btrfs filesystem identified by
+\fI<path>\fR\&.
+.sp
+If
+\fI<qgroupid>\fR
+is not given, qgroup of the subvolume identified by
+\fI<path>\fR
+is used if possible\&.
+.sp
+\fBOptions\fR
+.PP
+\-c
+.RS 4
+limit amount of data after compression\&. This is the default, it is currently not possible to turn off this option\&.
+.RE
+.PP
+\-e
+.RS 4
+limit space exclusively assigned to this qgroup\&.
+.RE
+.RE
+.PP
+\fBremove\fR \fI<src>\fR \fI<dst>\fR \fI<path>\fR
+.RS 4
+Remove the relationship between child qgroup
+\fI<src>\fR
+and parent qgroup
+\fI<dst>\fR
+in the btrfs filesystem identified by
+\fI<path>\fR\&.
+.sp
+\fBOptions\fR
+.sp
+The same as
+\fBassign\fR
+subcommand\&.
+.RE
+.PP
+\fBshow\fR [options] \fI<path>\fR
+.RS 4
+Show all qgroups in the btrfs filesystem identified by
+\fI<path>\fR\&.
+.sp
+\fBOptions\fR
+.PP
+\-p
+.RS 4
+print parent qgroup id\&.
+.RE
+.PP
+\-c
+.RS 4
+print child qgroup id\&.
+.RE
+.PP
+\-r
+.RS 4
+print limit of referenced size of qgroup\&.
+.RE
+.PP
+\-e
+.RS 4
+print limit of exclusive size of qgroup\&.
+.RE
+.PP
+\-F
+.RS 4
+list all qgroups which impact the given path(include ancestral qgroups)
+.RE
+.PP
+\-f
+.RS 4
+list all qgroups which impact the given path(exclude ancestral qgroups)
+.RE
+.PP
+\-\-raw
+.RS 4
+raw numbers in bytes, without the
+\fIB\fR
+suffix\&.
+.RE
+.PP
+\-\-human\-readable
+.RS 4
+print human friendly numbers, base 1024, this is the default
+.RE
+.PP
+\-\-iec
+.RS 4
+select the 1024 base for the following options, according to the IEC standard\&.
+.RE
+.PP
+\-\-si
+.RS 4
+select the 1000 base for the following options, according to the SI standard\&.
+.RE
+.PP
+\-\-kbytes
+.RS 4
+show sizes in KiB, or kB with \-\-si\&.
+.RE
+.PP
+\-\-mbytes
+.RS 4
+show sizes in MiB, or MB with \-\-si\&.
+.RE
+.PP
+\-\-gbytes
+.RS 4
+show sizes in GiB, or GB with \-\-si\&.
+.RE
+.PP
+\-\-tbytes
+.RS 4
+show sizes in TiB, or TB with \-\-si\&.
+.RE
+.PP
+\-\-sort=[+/\-]\fI<attr>\fR[,[+/\-]\fI<attr>\fR]\&...
+.RS 4
+list qgroups in order of
+\fI<attr>\fR\&.
+.sp
+\fI<attr>\fR
+can be one or more of qgroupid,rfer,excl,max_rfer,max_excl\&.
+.sp
+Prefix \*(Aq+\*(Aq means ascending order and \*(Aq\-\*(Aq means descending order of
+\fI<attr>\fR\&. If no prefix is given, use ascending order by default\&.
+.sp
+If multiple
+\fI<attr>\fRs is given, use comma to separate\&.
+.RE
+.PP
+\-\-sync
+.RS 4
+To retrieve information after updating the state of qgroups, force sync of the filesystem identified by
+\fI<path>\fR
+before getting information\&.
+.RE
+.RE
+.SH "QUOTA RESCAN"
+.sp
+The rescan reads all extent sharing metadata and updates the respective qgoups accordingly\&.
+.sp
+The information consists of bytes owned exclusively (\fIexcl\fR) or shared/referred to (\fIrfer\fR)\&. There\(cqs no explicit information about which extents are shared or owned exclusively\&. This means when qgroup relationship changes, extent owners change and qgroup numbers are no longer consistent unless we do a full rescan\&.
+.sp
+However there are cases where we can avoid a full rescan, if a subvolume whose \fIrfer\fR number equals its \fIexcl\fR number, which means all bytes are exclusively owned, then assigning/removing this subvolume only needs to add/subtract \fIrfer\fR number from its parent qgroup\&. This can speed up the rescan\&.
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs qgroup\fR returns a zero exit status if it succeeds\&. Non zero is returned in case of failure\&.
+.SH "AVAILABILITY"
+.sp
+\fBbtrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8), \fBbtrfs\-subvolume\fR(8), \fBbtrfs\-quota\fR(8),
diff --git a/Documentation/btrfs-qgroup.8.gz b/Documentation/btrfs-qgroup.8.gz
deleted file mode 100644
index b52ee493..00000000
--- a/Documentation/btrfs-qgroup.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-qgroup.asciidoc b/Documentation/btrfs-qgroup.asciidoc
index 3108457c..0c9f5940 100644
--- a/Documentation/btrfs-qgroup.asciidoc
+++ b/Documentation/btrfs-qgroup.asciidoc
@@ -47,10 +47,13 @@ identified by <path>.
 `Options`
 +
 --rescan::::
-Automatically schedule quota rescan if the new qgroup assignment leads to
-quota inconsistency.
+(default since: 4.19) Automatically schedule quota rescan if the new qgroup
+assignment would lead to quota inconsistency. See 'QUOTA RESCAN' for more
+information.
 --no-rescan::::
-Explicitly ask not to do a rescan.
+Explicitly ask not to do a rescan, even if the assignment will make the quotas
+inconsistent. This may be useful for repeated calls where the rescan would add
+unnecessary overhead.
 
 *create* <qgroupid> <path>::
 Create a subvolume quota group.
@@ -83,6 +86,10 @@ limit space exclusively assigned to this qgroup.
 *remove* <src> <dst> <path>::
 Remove the relationship between child qgroup <src> and parent qgroup <dst> in
 the btrfs filesystem identified by <path>.
++
+`Options`
++
+The same as *assign* subcommand.
 
 *show* [options] <path>::
 Show all qgroups in the btrfs filesystem identified by <path>.
@@ -131,6 +138,21 @@ If multiple <attr>s is given, use comma to separate.
 To retrieve information after updating the state of qgroups,
 force sync of the filesystem identified by <path> before getting information.
 
+QUOTA RESCAN
+------------
+The rescan reads all extent sharing metadata and updates the respective qgoups
+accordingly.
+
+The information consists of bytes owned exclusively ('excl') or shared/referred
+to ('rfer'). There's no explicit information about which extents are shared or
+owned exclusively.  This means when qgroup relationship changes, extent owners
+change and qgroup numbers are no longer consistent unless we do a full rescan.
+
+However there are cases where we can avoid a full rescan, if a subvolume whose
+'rfer' number equals its 'excl' number, which means all bytes are exclusively
+owned, then assigning/removing this subvolume only needs to add/subtract 'rfer'
+number from its parent qgroup. This can speed up the rescan.
+
 EXIT STATUS
 -----------
 *btrfs qgroup* returns a zero exit status if it succeeds. Non zero is
diff --git a/Documentation/btrfs-quota.8 b/Documentation/btrfs-quota.8
new file mode 100644
index 00000000..bc2dc239
--- /dev/null
+++ b/Documentation/btrfs-quota.8
@@ -0,0 +1,216 @@
+'\" t
+.\"     Title: btrfs-quota
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-QUOTA" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-quota \- control the global quota status of a btrfs filesystem
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs quota\fR \fI<subcommand>\fR \fI<args>\fR
+.SH "DESCRIPTION"
+.sp
+The commands under \fBbtrfs quota\fR are used to affect the global status of quotas of a btrfs filesystem\&. The quota groups (qgroups) are managed by the subcommand \fBbtrfs qgroup\fR(8)\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+Qgroups are different than the traditional user quotas and designed to track shared and exclusive data per\-subvolume\&. Please refer to the section \fIHIERARCHICAL QUOTA GROUP CONCEPTS\fR for a detailed description\&.
+.sp .5v
+.RE
+.SS "PERFORMANCE IMPLICATIONS"
+.sp
+When quotas are activated, they affect all extent processing, which takes a performance hit\&. Activation of qgroups is not recommended unless the user intends to actually use them\&.
+.SS "STABILITY STATUS"
+.sp
+The qgroup implementation has turned out to be quite difficult as it affects the core of the filesystem operation\&. Qgroup users have hit various corner cases over time, such as incorrect accounting or system instability\&. The situation is gradually improving and issues found and fixed\&.
+.SH "HIERARCHICAL QUOTA GROUP CONCEPTS"
+.sp
+The concept of quota has a long\-standing tradition in the Unix world\&. Ever since computers allow multiple users to work simultaneously in one filesystem, there is the need to prevent one user from using up the entire space\&. Every user should get his fair share of the available resources\&.
+.sp
+In case of files, the solution is quite straightforward\&. Each file has an \fIowner\fR recorded along with it, and it has a size\&. Traditional quota just restricts the total size of all files that are owned by a user\&. The concept is quite flexible: if a user hits his quota limit, the administrator can raise it on the fly\&.
+.sp
+On the other hand, the traditional approach has only a poor solution to restrict directories\&. At installation time, the harddisk can be partitioned so that every directory (eg\&. /usr, /var/, \&...) that needs a limit gets its own partition\&. The obvious problem is that those limits cannot be changed without a reinstallation\&. The btrfs subvolume feature builds a bridge\&. Subvolumes correspond in many ways to partitions, as every subvolume looks like its own filesystem\&. With subvolume quota, it is now possible to restrict each subvolume like a partition, but keep the flexibility of quota\&. The space for each subvolume can be expanded or restricted on the fly\&.
+.sp
+As subvolumes are the basis for snapshots, interesting questions arise as to how to account used space in the presence of snapshots\&. If you have a file shared between a subvolume and a snapshot, whom to account the file to? The creator? Both? What if the file gets modified in the snapshot, should only these changes be accounted to it? But wait, both the snapshot and the subvolume belong to the same user home\&. I just want to limit the total space used by both! But somebody else might not want to charge the snapshots to the users\&.
+.sp
+Btrfs subvolume quota solves these problems by introducing groups of subvolumes and let the user put limits on them\&. It is even possible to have groups of groups\&. In the following, we refer to them as \fIqgroups\fR\&.
+.sp
+Each qgroup primarily tracks two numbers, the amount of total referenced space and the amount of exclusively referenced space\&.
+.PP
+referenced
+.RS 4
+space is the amount of data that can be reached from any of the subvolumes contained in the qgroup, while
+.RE
+.PP
+exclusive
+.RS 4
+is the amount of data where all references to this data can be reached from within this qgroup\&.
+.RE
+.SS "SUBVOLUME QUOTA GROUPS"
+.sp
+The basic notion of the Subvolume Quota feature is the quota group, short qgroup\&. Qgroups are notated as \fIlevel/id\fR, eg\&. the qgroup 3/2 is a qgroup of level 3\&. For level 0, the leading \fI0/\fR can be omitted\&. Qgroups of level 0 get created automatically when a subvolume/snapshot gets created\&. The ID of the qgroup corresponds to the ID of the subvolume, so 0/5 is the qgroup for the root subvolume\&. For the \fBbtrfs qgroup\fR command, the path to the subvolume can also be used instead of \fI0/ID\fR\&. For all higher levels, the ID can be chosen freely\&.
+.sp
+Each qgroup can contain a set of lower level qgroups, thus creating a hierarchy of qgroups\&. Figure 1 shows an example qgroup tree\&.
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+                          +\-\-\-+
+                          |2/1|
+                          +\-\-\-+
+                         /     \e
+                   +\-\-\-+/       \e+\-\-\-+
+                   |1/1|         |1/2|
+                   +\-\-\-+         +\-\-\-+
+                  /     \e       /     \e
+            +\-\-\-+/       \e+\-\-\-+/       \e+\-\-\-+
+qgroups     |0/1|         |0/2|         |0/3|
+            +\-+\-+         +\-\-\-+         +\-\-\-+
+              |          /     \e       /     \e
+              |         /       \e     /       \e
+              |        /         \e   /         \e
+extents       1       2            3            4
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+Figure1: Sample qgroup hierarchy
+.sp
+At the bottom, some extents are depicted showing which qgroups reference which extents\&. It is important to understand the notion of \fIreferenced\fR vs \fIexclusive\fR\&. In the example, qgroup 0/2 references extents 2 and 3, while 1/2 references extents 2\-4, 2/1 references all extents\&.
+.sp
+On the other hand, extent 1 is exclusive to 0/1, extent 2 is exclusive to 0/2, while extent 3 is neither exclusive to 0/2 nor to 0/3\&. But because both references can be reached from 1/2, extent 3 is exclusive to 1/2\&. All extents are exclusive to 2/1\&.
+.sp
+So exclusive does not mean there is no other way to reach the extent, but it does mean that if you delete all subvolumes contained in a qgroup, the extent will get deleted\&.
+.sp
+Exclusive of a qgroup conveys the useful information how much space will be freed in case all subvolumes of the qgroup get deleted\&.
+.sp
+All data extents are accounted this way\&. Metadata that belongs to a specific subvolume (i\&.e\&. its filesystem tree) is also accounted\&. Checksums and extent allocation information are not accounted\&.
+.sp
+In turn, the referenced count of a qgroup can be limited\&. All writes beyond this limit will lead to a \fIQuota Exceeded\fR error\&.
+.SS "INHERITANCE"
+.sp
+Things get a bit more complicated when new subvolumes or snapshots are created\&. The case of (empty) subvolumes is still quite easy\&. If a subvolume should be part of a qgroup, it has to be added to the qgroup at creation time\&. To add it at a later time, it would be necessary to at least rescan the full subvolume for a proper accounting\&.
+.sp
+Creation of a snapshot is the hard case\&. Obviously, the snapshot will reference the exact amount of space as its source, and both source and destination now have an exclusive count of 0 (the filesystem nodesize to be precise, as the roots of the trees are not shared)\&. But what about qgroups of higher levels? If the qgroup contains both the source and the destination, nothing changes\&. If the qgroup contains only the source, it might lose some exclusive\&.
+.sp
+But how much? The tempting answer is, subtract all exclusive of the source from the qgroup, but that is wrong, or at least not enough\&. There could have been an extent that is referenced from the source and another subvolume from that qgroup\&. This extent would have been exclusive to the qgroup, but not to the source subvolume\&. With the creation of the snapshot, the qgroup would also lose this extent from its exclusive set\&.
+.sp
+So how can this problem be solved? In the instant the snapshot gets created, we already have to know the correct exclusive count\&. We need to have a second qgroup that contains all the subvolumes as the first qgroup, except the subvolume we want to snapshot\&. The moment we create the snapshot, the exclusive count from the second qgroup needs to be copied to the first qgroup, as it represents the correct value\&. The second qgroup is called a tracking qgroup\&. It is only there in case a snapshot is needed\&.
+.SS "USE CASES"
+.sp
+Below are some usecases that do not mean to be extensive\&. You can find your own way how to integrate qgroups\&.
+.sp
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBSINGLE-USER MACHINE\fR
+.RS 4
+.sp
+\fBReplacement for partitions\fR
+.sp
+The simplest use case is to use qgroups as simple replacement for partitions\&. Btrfs takes the disk as a whole, and /, /usr, /var, etc\&. are created as subvolumes\&. As each subvolume gets it own qgroup automatically, they can simply be restricted\&. No hierarchy is needed for that\&.
+.sp
+\fBTrack usage of snapshots\fR
+.sp
+When a snapshot is taken, a qgroup for it will automatically be created with the correct values\&. \fIReferenced\fR will show how much is in it, possibly shared with other subvolumes\&. \fIExclusive\fR will be the amount of space that gets freed when the subvolume is deleted\&.
+.RE
+.sp
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBMULTI-USER MACHINE\fR
+.RS 4
+.sp
+\fBRestricting homes\fR
+.sp
+When you have several users on a machine, with home directories probably under /home, you might want to restrict /home as a whole, while restricting every user to an individual limit as well\&. This is easily accomplished by creating a qgroup for /home , eg\&. 1/1, and assigning all user subvolumes to it\&. Restricting this qgroup will limit /home, while every user subvolume can get its own (lower) limit\&.
+.sp
+\fBAccounting snapshots to the user\fR
+.sp
+Let\(cqs say the user is allowed to create snapshots via some mechanism\&. It would only be fair to account space used by the snapshots to the user\&. This does not mean the user doubles his usage as soon as he takes a snapshot\&. Of course, files that are present in his home and the snapshot should only be accounted once\&. This can be accomplished by creating a qgroup for each user, say \fI1/UID\fR\&. The user home and all snapshots are assigned to this qgroup\&. Limiting it will extend the limit to all snapshots, counting files only once\&. To limit /home as a whole, a higher level group 2/1 replacing 1/1 from the previous example is needed, with all user qgroups assigned to it\&.
+.sp
+\fBDo not account snapshots\fR
+.sp
+On the other hand, when the snapshots get created automatically, the user has no chance to control them, so the space used by them should not be accounted to him\&. This is already the case when creating snapshots in the example from the previous section\&.
+.sp
+\fBSnapshots for backup purposes\fR
+.sp
+This scenario is a mixture of the previous two\&. The user can create snapshots, but some snapshots for backup purposes are being created by the system\&. The user\(cqs snapshots should be accounted to the user, not the system\&. The solution is similar to the one from section \fIAccounting snapshots to the user\fR, but do not assign system snapshots to user\(cqs qgroup\&.
+.RE
+.SH "SUBCOMMAND"
+.PP
+\fBdisable\fR \fI<path>\fR
+.RS 4
+Disable subvolume quota support for a filesystem\&.
+.RE
+.PP
+\fBenable\fR \fI<path>\fR
+.RS 4
+Enable subvolume quota support for a filesystem\&.
+.RE
+.PP
+\fBrescan\fR [\-s] \fI<path>\fR
+.RS 4
+Trash all qgroup numbers and scan the metadata again with the current config\&.
+.sp
+\fBOptions\fR
+.PP
+\-s
+.RS 4
+show status of a running rescan operation\&.
+.RE
+.PP
+\-w
+.RS 4
+wait for rescan operation to finish(can be already in progress)\&.
+.RE
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs quota\fR returns a zero exit status if it succeeds\&. Non zero is returned in case of failure\&.
+.SH "AVAILABILITY"
+.sp
+\fBbtrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8), \fBbtrfs\-subvolume\fR(8), \fBbtrfs\-qgroup\fR(8)
diff --git a/Documentation/btrfs-quota.8.gz b/Documentation/btrfs-quota.8.gz
deleted file mode 100644
index a1be6bed..00000000
--- a/Documentation/btrfs-quota.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-receive.8 b/Documentation/btrfs-receive.8
new file mode 100644
index 00000000..acc0dd52
--- /dev/null
+++ b/Documentation/btrfs-receive.8
@@ -0,0 +1,153 @@
+'\" t
+.\"     Title: btrfs-receive
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-RECEIVE" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-receive \- receive subvolumes from send stream
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs receive\fR [options] \fI<path>\fR
+.sp
+or
+.sp
+\fBbtrfs receive\fR \-\-dump [options]
+.SH "DESCRIPTION"
+.sp
+Receive a stream of changes and replicate one or more subvolumes that were previously generated by \fBbtrfs send\fR\&. The received subvolumes are stored to \fIpath\fR, unless \fI\-\-dump\fR option is given\&.
+.sp
+If \fI\-\-dump\fR option is specified, \fBbtrfs receive\fR will only do the validation of the stream, and print the stream metadata, one operation per line\&.
+.sp
+\fBbtrfs receive\fR will fail in the following cases:
+.sp
+.RS 4
+.ie n \{\
+\h'-04' 1.\h'+01'\c
+.\}
+.el \{\
+.sp -1
+.IP "  1." 4.2
+.\}
+receiving subvolume already exists
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04' 2.\h'+01'\c
+.\}
+.el \{\
+.sp -1
+.IP "  2." 4.2
+.\}
+previously received subvolume has been changed after it was received
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04' 3.\h'+01'\c
+.\}
+.el \{\
+.sp -1
+.IP "  3." 4.2
+.\}
+default subvolume has changed or you didn\(cqt mount the filesystem at the toplevel subvolume
+.RE
+.sp
+A subvolume is made read\-only after the receiving process finishes successfully (see BUGS below)\&.
+.sp
+\fBOptions\fR
+.PP
+\-v
+.RS 4
+increase verbosity about performed actions, print details about each operation
+.RE
+.PP
+\-f \fI<FILE>\fR
+.RS 4
+read the stream from
+\fI<FILE>\fR
+instead of stdin,
+.RE
+.PP
+\-C|\-\-chroot
+.RS 4
+confine the process to
+\fIpath\fR
+using
+\fBchroot\fR(1)
+.RE
+.PP
+\-e
+.RS 4
+terminate after receiving an
+\fIend cmd\fR
+marker in the stream\&.
+.sp
+Without this option the receiver side terminates only in case of an error on end of file\&.
+.RE
+.PP
+\-E|\-\-max\-errors \fI<NERR>\fR
+.RS 4
+terminate as soon as NERR errors occur while stream processing commands from the stream
+.sp
+Default value is 1\&. A value of 0 means no limit\&.
+.RE
+.PP
+\-m \fI<ROOTMOUNT>\fR
+.RS 4
+the root mount point of the destination filesystem
+.sp
+By default the mountpoint is searched in
+\fI/proc/self/mounts\fR\&. If
+\fI/proc\fR
+is not accessible, eg\&. in a chroot environment, use this option to tell us where this filesystem is mounted\&.
+.RE
+.PP
+\-\-dump
+.RS 4
+dump the stream metadata, one line per operation
+.sp
+Does not require the
+\fIpath\fR
+parameter\&. The filesystem remains unchanged\&.
+.RE
+.SH "BUGS"
+.sp
+\fBbtrfs receive\fR sets the subvolume read\-only after it completes successfully\&. However, while the receive is in progress, users who have write access to files or directories in the receiving \fIpath\fR can add, remove, or modify files, in which case the resulting read\-only subvolume will not be an exact copy of the sent subvolume\&.
+.sp
+If the intention is to create an exact copy, the receiving \fIpath\fR should be protected from access by users until the receive operation has completed and the subvolume is set to read\-only\&.
+.sp
+Additionally, receive does not currently do a very good job of validating that an incremental send stream actually makes sense, and it is thus possible for a specially crafted send stream to create a subvolume with reflinks to arbitrary files in the same filesystem\&. Because of this, users are advised to not use \fBbtrfs receive\fR on send streams from untrusted sources, and to protect trusted streams when sending them across untrusted networks\&.
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs receive\fR returns a zero exit status if it succeeds\&. Non zero is returned in case of failure\&.
+.SH "AVAILABILITY"
+.sp
+\fBbtrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8), \fBbtrfs\-send\fR(8)
diff --git a/Documentation/btrfs-receive.8.gz b/Documentation/btrfs-receive.8.gz
deleted file mode 100644
index 0bdac0ab..00000000
--- a/Documentation/btrfs-receive.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-replace.8 b/Documentation/btrfs-replace.8
new file mode 100644
index 00000000..95baf9c6
--- /dev/null
+++ b/Documentation/btrfs-replace.8
@@ -0,0 +1,116 @@
+'\" t
+.\"     Title: btrfs-replace
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-REPLACE" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-replace \- replace devices managed by btrfs with other device\&.
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs replace\fR \fI<subcommand>\fR \fI<args>\fR
+.SH "DESCRIPTION"
+.sp
+\fBbtrfs replace\fR is used to replace btrfs managed devices with other device\&.
+.SH "SUBCOMMAND"
+.PP
+\fBcancel\fR \fI<mount_point>\fR
+.RS 4
+Cancel a running device replace operation\&.
+.RE
+.PP
+\fBstart\fR [\-Bfr] \fI<srcdev>\fR|\fI<devid>\fR \fI<targetdev>\fR \fI<path>\fR
+.RS 4
+Replace device of a btrfs filesystem\&.
+.sp
+On a live filesystem, duplicate the data to the target device which is currently stored on the source device\&. If the source device is not available anymore, or if the \-r option is set, the data is built only using the RAID redundancy mechanisms\&. After completion of the operation, the source device is removed from the filesystem\&. If the
+\fI<srcdev>\fR
+is a numerical value, it is assumed to be the device id of the filesystem which is mounted at
+\fI<path>\fR, otherwise it is the path to the source device\&. If the source device is disconnected, from the system, you have to use the devid parameter format\&. The
+\fI<targetdev>\fR
+needs to be same size or larger than the
+\fI<srcdev>\fR\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+the filesystem has to be resized to fully take advantage of a larger target device; this can be achieved with
+\fBbtrfs filesystem resize <devid>:max /path\fR
+.sp .5v
+.RE
+\fBOptions\fR
+.PP
+\-r
+.RS 4
+only read from
+\fI<srcdev>\fR
+if no other zero\-defect mirror exists\&. (enable this if your drive has lots of read errors, the access would be very slow)
+.RE
+.PP
+\-f
+.RS 4
+force using and overwriting
+\fI<targetdev>\fR
+even if it looks like it contains a valid btrfs filesystem\&.
+.sp
+A valid filesystem is assumed if a btrfs superblock is found which contains a correct checksum\&. Devices that are currently mounted are never allowed to be used as the
+\fI<targetdev>\fR\&.
+.RE
+.PP
+\-B
+.RS 4
+no background replace\&.
+.RE
+.RE
+.PP
+\fBstatus\fR [\-1] \fI<mount_point>\fR
+.RS 4
+Print status and progress information of a running device replace operation\&.
+.sp
+\fBOptions\fR
+.PP
+\-1
+.RS 4
+print once instead of print continuously until the replace operation finishes (or is cancelled)
+.RE
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs replace\fR returns a zero exit status if it succeeds\&. Non zero is returned in case of failure\&.
+.SH "AVAILABILITY"
+.sp
+\fBbtrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8), \fBbtrfs\-device\fR(8),
diff --git a/Documentation/btrfs-replace.8.gz b/Documentation/btrfs-replace.8.gz
deleted file mode 100644
index 754aa8b3..00000000
--- a/Documentation/btrfs-replace.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-rescue.8 b/Documentation/btrfs-rescue.8
new file mode 100644
index 00000000..ccf542a7
--- /dev/null
+++ b/Documentation/btrfs-rescue.8
@@ -0,0 +1,188 @@
+'\" t
+.\"     Title: btrfs-rescue
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-RESCUE" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-rescue \- Recover a damaged btrfs filesystem
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs rescue\fR \fI<subcommand>\fR \fI<args>\fR
+.SH "DESCRIPTION"
+.sp
+\fBbtrfs rescue\fR is used to try to recover a damaged btrfs filesystem\&.
+.SH "SUBCOMMAND"
+.PP
+\fBchunk\-recover\fR [options] \fI<device>\fR
+.RS 4
+Recover the chunk tree by scanning the devices
+.sp
+\fBOptions\fR
+.PP
+\-y
+.RS 4
+assume an answer of
+\fIyes\fR
+to all questions\&.
+.RE
+.PP
+\-v
+.RS 4
+verbose mode\&.
+.RE
+.PP
+\-h
+.RS 4
+help\&.
+.RE
+.RE
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+Since \fBchunk\-recover\fR will scan the whole device, it will be \fBVERY\fR slow especially executed on a large device\&.
+.sp .5v
+.RE
+.PP
+\fBfix\-device\-size\fR \fI<device>\fR
+.RS 4
+fix device size and super block total bytes values that are do not match
+.sp
+Kernel 4\&.11 starts to check the device size more strictly and this might mismatch the stored value of total bytes\&. See the exact error message below\&. Newer kernel will refuse to mount the filesystem where the values do not match\&. This error is not fatal and can be fixed\&. This command will fix the device size values if possible\&.
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+BTRFS error (device sdb): super_total_bytes 92017859088384 mismatch with fs_devices total_rw_bytes 92017859094528
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+The mismatch may also exhibit as a kernel warning:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+WARNING: CPU: 3 PID: 439 at fs/btrfs/ctree\&.h:1559 btrfs_update_device+0x1c5/0x1d0 [btrfs]
+.fi
+.if n \{\
+.RE
+.\}
+.RE
+.PP
+\fBsuper\-recover\fR [options] \fI<device>\fR
+.RS 4
+Recover bad superblocks from good copies\&.
+.sp
+\fBOptions\fR
+.PP
+\-y
+.RS 4
+assume an answer of
+\fIyes\fR
+to all questions\&.
+.RE
+.PP
+\-v
+.RS 4
+verbose mode\&.
+.RE
+.RE
+.PP
+\fBzero\-log\fR \fI<device>\fR
+.RS 4
+clear the filesystem log tree
+.sp
+This command will clear the filesystem log tree\&. This may fix a specific set of problem when the filesystem mount fails due to the log replay\&. See below for sample stacktraces that may show up in system log\&.
+.sp
+The common case where this happens was fixed a long time ago, so it is unlikely that you will see this particular problem, but the command is kept around\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+clearing the log may lead to loss of changes that were made since the last transaction commit\&. This may be up to 30 seconds (default commit period) or less if the commit was implied by other filesystem activity\&.
+.sp .5v
+.RE
+One can determine whether
+\fBzero\-log\fR
+is needed according to the kernel backtrace:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+? replay_one_dir_item+0xb5/0xb5 [btrfs]
+? walk_log_tree+0x9c/0x19d [btrfs]
+? btrfs_read_fs_root_no_radix+0x169/0x1a1 [btrfs]
+? btrfs_recover_log_trees+0x195/0x29c [btrfs]
+? replay_one_dir_item+0xb5/0xb5 [btrfs]
+? btree_read_extent_buffer_pages+0x76/0xbc [btrfs]
+? open_ctree+0xff6/0x132c [btrfs]
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+If the errors are like above, then
+\fBzero\-log\fR
+should be used to clear the log and the filesystem may be mounted normally again\&. The keywords to look for are
+\fIopen_ctree\fR
+which says that it\(cqs during mount and function names that contain
+\fIreplay\fR,
+\fIrecover\fR
+or
+\fIlog_tree\fR\&.
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs rescue\fR returns a zero exit status if it succeeds\&. Non zero is returned in case of failure\&.
+.SH "AVAILABILITY"
+.sp
+\fBbtrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8), \fBbtrfs\-scrub\fR(8), \fBbtrfs\-check\fR(8)
diff --git a/Documentation/btrfs-rescue.8.gz b/Documentation/btrfs-rescue.8.gz
deleted file mode 100644
index e0100dbf..00000000
--- a/Documentation/btrfs-rescue.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-restore.8 b/Documentation/btrfs-restore.8
new file mode 100644
index 00000000..db635693
--- /dev/null
+++ b/Documentation/btrfs-restore.8
@@ -0,0 +1,160 @@
+'\" t
+.\"     Title: btrfs-restore
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-RESTORE" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-restore \- try to restore files from a damaged btrfs filesystem image
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs restore\fR [options] \fI<device>\fR \fI<path>\fR | \-l \fI<device>\fR
+.SH "DESCRIPTION"
+.sp
+\fBbtrfs restore\fR is used to try to salvage files from a damaged filesystem and restore them into \fI<path>\fR or just list the subvolume tree roots\&. The filesystem image is not modified\&.
+.sp
+If the filesystem is damaged and cannot be repaired by the other tools (\fBbtrfs\-check\fR(8) or \fBbtrfs\-rescue\fR(8)), \fBbtrfs restore\fR could be used to retrieve file data, as far as the metadata are readable\&. The checks done by restore are less strict and the process is usually able to get far enough to retrieve data from the whole filesystem\&. This comes at a cost that some data might be incomplete or from older versions if they\(cqre available\&.
+.sp
+There are several options to attempt restoration of various file metadata type\&. You can try a dry run first to see how well the process goes and use further options to extend the set of restored metadata\&.
+.sp
+For images with damaged tree structures, there are several options to point the process to some spare copy\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+It is recommended to read the following btrfs wiki page if your data is not salvaged with default option: \m[blue]\fBhttps://btrfs\&.wiki\&.kernel\&.org/index\&.php/Restore\fR\m[]
+.sp .5v
+.RE
+.SH "OPTIONS"
+.PP
+\-s|\-\-snapshots
+.RS 4
+get also snapshots that are skipped by default
+.RE
+.PP
+\-x|\-\-xattr
+.RS 4
+get extended attributes
+.RE
+.PP
+\-m|\-\-metadata
+.RS 4
+restore owner, mode and times for files and directories
+.RE
+.PP
+\-S|\-\-symlinks
+.RS 4
+restore symbolic links as well as normal files
+.RE
+.PP
+\-v|\-\-verbose
+.RS 4
+be verbose and print what is being restored
+.RE
+.PP
+\-i|\-\-ignore\-errors
+.RS 4
+ignore errors during restoration and continue
+.RE
+.PP
+\-o|\-\-overwrite
+.RS 4
+overwrite directories/files in
+\fI<path>\fR, eg\&. for repeated runs
+.RE
+.PP
+\-t \fI<bytenr>\fR
+.RS 4
+use
+\fI<bytenr>\fR
+to read the root tree
+.RE
+.PP
+\-f \fI<bytenr>\fR
+.RS 4
+only restore files that are under specified subvolume root pointed by
+\fI<bytenr>\fR
+.RE
+.PP
+\-u|\-\-super \fI<mirror>\fR
+.RS 4
+use given superblock mirror identified by
+\fI<mirror>\fR, it can be 0,1 or 2
+.RE
+.PP
+\-r|\-\-root \fI<rootid>\fR
+.RS 4
+only restore files that are under a specified subvolume whose objectid is
+\fI<rootid>\fR
+.RE
+.PP
+\-d
+.RS 4
+find directory
+.RE
+.PP
+\-l|\-\-list\-roots
+.RS 4
+list subvolume tree roots, can be used as argument for
+\fI\-r\fR
+.RE
+.PP
+\-D|\-\-dry\-run
+.RS 4
+dry run (only list files that would be recovered)
+.RE
+.PP
+\-\-path\-regex \fI<regex>\fR
+.RS 4
+restore only filenames matching a regular expression (\fBregex\fR(7)) with a mandatory format
+.sp
+\fB^/(|home(|/username(|/Desktop(|/\&.*))))$\fR
+.sp
+The format is not very comfortable and restores all files in the directories in the whole path, so this is not useful for restoring single file in a deep hierarchy\&.
+.RE
+.PP
+\-c
+.RS 4
+ignore case (\-\-path\-regex only)
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs restore\fR returns a zero exit status if it succeeds\&. Non zero is returned in case of failure\&.
+.SH "AVAILABILITY"
+.sp
+\fBbtrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8), \fBbtrfs\-rescue\fR(8), \fBbtrfs\-check\fR(8)
diff --git a/Documentation/btrfs-restore.8.gz b/Documentation/btrfs-restore.8.gz
deleted file mode 100644
index 65eaf98f..00000000
--- a/Documentation/btrfs-restore.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-scrub.8 b/Documentation/btrfs-scrub.8
new file mode 100644
index 00000000..a393a553
--- /dev/null
+++ b/Documentation/btrfs-scrub.8
@@ -0,0 +1,189 @@
+'\" t
+.\"     Title: btrfs-scrub
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-SCRUB" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-scrub \- scrub btrfs filesystem, verify block checksums
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs scrub\fR \fI<subcommand>\fR \fI<args>\fR
+.SH "DESCRIPTION"
+.sp
+\fBbtrfs scrub\fR is used to scrub a btrfs filesystem, which will read all data and metadata blocks from all devices and verify checksums\&. Automatically repair corrupted blocks if there\(cqs a correct copy available\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+Scrub is not a filesystem checker (fsck) and does not verify nor repair structural damage in the filesystem\&.
+.sp .5v
+.RE
+.sp
+The user is supposed to run it manually or via a periodic system service\&. The recommended period is a month but could be less\&. The estimated device bandwidth utilization is about 80% on an idle filesystem\&. The IO priority class is by default \fIidle\fR so background scrub should not significantly interfere with normal filesystem operation\&.
+.sp
+The scrubbing status is recorded in \fI/var/lib/btrfs/\fR in textual files named \fIscrub\&.status\&.UUID\fR for a filesystem identified by the given UUID\&. (Progress state is communicated through a named pipe in file \fIscrub\&.progress\&.UUID\fR in the same directory\&.) The status file is updated every 5 seconds\&. A resumed scrub will continue from the last saved position\&.
+.SH "SUBCOMMAND"
+.PP
+\fBcancel\fR \fI<path>\fR|\fI<device>\fR
+.RS 4
+If a scrub is running on the filesystem identified by
+\fIpath\fR
+cancel it\&.
+.sp
+If a
+\fIdevice\fR
+is specified, the corresponding filesystem is found and
+\fBbtrfs scrub cancel\fR
+behaves as if it was called on that filesystem\&.
+.RE
+.PP
+\fBresume\fR [\-BdqrR] [\-c \fI<ioprio_class>\fR \-n \fI<ioprio_classdata>\fR] \fI<path>\fR|\fI<device>\fR
+.RS 4
+Resume a cancelled or interrupted scrub on the filesystem identified by
+\fIpath\fR
+or on a given
+\fIdevice\fR\&.
+.sp
+Does not start a new scrub if the last scrub finished successfully\&.
+.sp
+\fBOptions\fR
+.sp
+see
+\fBscrub start\fR\&.
+.RE
+.PP
+\fBstart\fR [\-BdqrRf] [\-c \fI<ioprio_class>\fR \-n \fI<ioprio_classdata>\fR] \fI<path>\fR|\fI<device>\fR
+.RS 4
+Start a scrub on all devices of the filesystem identified by
+\fIpath\fR
+or on a single
+\fIdevice\fR\&. If a scrub is already running, the new one fails\&.
+.sp
+Without options, scrub is started as a background process\&.
+.sp
+The default IO priority of scrub is the idle class\&. The priority can be configured similar to the
+\fBionice\fR(1) syntax using
+\fI\-c\fR
+and
+\fI\-n\fR
+options\&.
+.sp
+\fBOptions\fR
+.PP
+\-B
+.RS 4
+do not background and print scrub statistics when finished
+.RE
+.PP
+\-d
+.RS 4
+print separate statistics for each device of the filesystem (\fI\-B\fR
+only) at the end
+.RE
+.PP
+\-q
+.RS 4
+be quiet, omit error messages and statistics
+.RE
+.PP
+\-r
+.RS 4
+run in read\-only mode, do not attempt to correct anything, can be run on a read\-only filesystem
+.RE
+.PP
+\-R
+.RS 4
+raw print mode, print full data instead of summary
+.RE
+.PP
+\-c \fI<ioprio_class>\fR
+.RS 4
+set IO priority class (see
+\fBionice\fR(1) manpage)
+.RE
+.PP
+\-n \fI<ioprio_classdata>\fR
+.RS 4
+set IO priority classdata (see
+\fBionice\fR(1) manpage)
+.RE
+.PP
+\-f
+.RS 4
+force starting new scrub even if a scrub is already running, this can useful when scrub status file is damaged and reports a running scrub although it is not, but should not normally be necessary
+.RE
+.RE
+.PP
+\fBstatus\fR [\-d] \fI<path>\fR|\fI<device>\fR
+.RS 4
+Show status of a running scrub for the filesystem identified by
+\fIpath\fR
+or for the specified
+\fIdevice\fR\&.
+.sp
+If no scrub is running, show statistics of the last finished or cancelled scrub for that filesystem or device\&.
+.sp
+\fBOptions\fR
+.PP
+\-d
+.RS 4
+print separate statistics for each device of the filesystem
+.RE
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs scrub\fR returns a zero exit status if it succeeds\&. Non zero is returned in case of failure:
+.PP
+1
+.RS 4
+scrub couldn\(cqt be performed
+.RE
+.PP
+2
+.RS 4
+there is nothing to resume
+.RE
+.PP
+3
+.RS 4
+scrub found uncorrectable errors
+.RE
+.SH "AVAILABILITY"
+.sp
+\fBbtrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8), \fBionice\fR(1)
diff --git a/Documentation/btrfs-scrub.8.gz b/Documentation/btrfs-scrub.8.gz
deleted file mode 100644
index 4a44a430..00000000
--- a/Documentation/btrfs-scrub.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-scrub.asciidoc b/Documentation/btrfs-scrub.asciidoc
index d2d20627..4c49269e 100644
--- a/Documentation/btrfs-scrub.asciidoc
+++ b/Documentation/btrfs-scrub.asciidoc
@@ -69,7 +69,7 @@ be quiet, omit error messages and statistics
 run in read-only mode, do not attempt to correct anything, can be run on a read-only
 filesystem
 -R::::
-print raw statistics per-device instead of a summary
+raw print mode, print full data instead of summary
 -c <ioprio_class>::::
 set IO priority class (see `ionice`(1) manpage)
 -n <ioprio_classdata>::::
diff --git a/Documentation/btrfs-select-super.8 b/Documentation/btrfs-select-super.8
new file mode 100644
index 00000000..e56b290a
--- /dev/null
+++ b/Documentation/btrfs-select-super.8
@@ -0,0 +1,111 @@
+'\" t
+.\"     Title: btrfs-select-super
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-SELECT\-SUPER" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-select-super \- overwrite primary superblock with a backup copy
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs\-select\-super\fR \-s number \fI<device>\fR
+.SH "DESCRIPTION"
+.sp
+Destructively overwrite all copies of the superblock with a specified copy\&. This helps in certain cases, for example when write barriers were disabled during a power failure and not all superblocks were written, or if the primary superblock is damaged, eg\&. accidentally overwritten\&.
+.sp
+The filesystem specified by \fIdevice\fR must not be mounted\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+\fBPrior to overwriting the primary superblock, please make sure that the backup copies are valid!\fR
+.sp .5v
+.RE
+.sp
+To dump a superblock use the \fBbtrfs inspect\-internal dump\-super\fR command\&.
+.sp
+Then run the check (in the non\-repair mode) using the command \fBbtrfs check \-s\fR where \fI\-s\fR specifies the superblock copy to use\&.
+.sp
+Superblock copies exist in the following offsets on the device:
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+primary:
+\fI64KiB\fR
+(65536)
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+1st copy:
+\fI64MiB\fR
+(67108864)
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+2nd copy:
+\fI256GiB\fR
+(274877906944)
+.RE
+.sp
+A superblock size is \fI4KiB\fR (4096)\&.
+.SH "OPTIONS"
+.PP
+\-s|\-\-super \fI<superblock>\fR
+.RS 4
+use \*(Aqsuperblock\(cqth superblock copy, valid values are 0 1 or 2 if the respective superblock offset is within the device size
+.RE
+.SH "SEE ALSO"
+.sp
+\fBbtrfs\-inspect\-internal\fR(8), \fBbtrfsck check\fR(8)
diff --git a/Documentation/btrfs-select-super.8.gz b/Documentation/btrfs-select-super.8.gz
deleted file mode 100644
index 0e025c10..00000000
--- a/Documentation/btrfs-select-super.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-send.8 b/Documentation/btrfs-send.8
new file mode 100644
index 00000000..1a29bfa0
--- /dev/null
+++ b/Documentation/btrfs-send.8
@@ -0,0 +1,102 @@
+'\" t
+.\"     Title: btrfs-send
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-SEND" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-send \- generate a stream of changes between two subvolume snapshots
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs send\fR [\-ve] [\-p \fI<parent>\fR] [\-c \fI<clone\-src>\fR] [\-f \fI<outfile>\fR] \fI<subvol>\fR [\fI<subvol>\fR\&...]
+.SH "DESCRIPTION"
+.sp
+This command will generate a stream of instructions that describe changes between two subvolume snapshots\&. The stream can be consumed by the \fBbtrfs receive\fR command to replicate the sent snapshot on a different filesystem\&. The command operates in two modes: full and incremental\&.
+.sp
+All snapshots involved in one send command must be read\-only, and this status cannot be changed as long as there\(cqs a running send operation that uses the snapshot\&.
+.sp
+In the full mode, the entire snapshot data and metadata will end up in the stream\&.
+.sp
+In the incremental mode (options \fI\-p\fR and \fI\-c\fR), previously sent snapshots that are available on both the sending and receiving side can be used to reduce the amount of information that has to be sent to reconstruct the sent snapshot on a different filesystem\&.
+.sp
+The \fI\-p \fR\fI\fI<parent>\fR\fR option can be omitted when \fI\-c \fR\fI\fI<clone\-src>\fR\fR options are given, in which case \fBbtrfs send\fR will determine a suitable parent from among the clone sources\&.
+.sp
+You must not specify clone sources unless you guarantee that these snapshots are exactly in the same state on both sides\(emboth for the sender and the receiver\&.
+.sp
+\fBOptions\fR
+.PP
+\-e
+.RS 4
+if sending multiple subvolumes at once, use the new format and omit the
+\fIend cmd\fR
+marker in the stream separating the subvolumes
+.RE
+.PP
+\-p \fI<parent>\fR
+.RS 4
+send an incremental stream from
+\fIparent\fR
+to
+\fIsubvol\fR
+.RE
+.PP
+\-c \fI<clone\-src>\fR
+.RS 4
+use this snapshot as a clone source for an incremental send (multiple allowed)
+.RE
+.PP
+\-f \fI<outfile>\fR
+.RS 4
+output is normally written to standard output so it can be, for example, piped to btrfs receive\&. Use this option to write it to a file instead\&.
+.RE
+.PP
+\-\-no\-data
+.RS 4
+send in
+\fINO_FILE_DATA\fR
+mode
+.sp
+The output stream does not contain any file data and thus cannot be used to transfer changes\&. This mode is faster and is useful to show the differences in metadata\&.
+.RE
+.PP
+\-v|\-\-verbose
+.RS 4
+enable verbose output, print generated commands in a readable form, (each occurrence of this option increases the verbosity level)
+.RE
+.PP
+\-q|\-\-quiet
+.RS 4
+suppress all messages except errors
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs send\fR returns a zero exit status if it succeeds\&. Non zero is returned in case of failure\&.
+.SH "AVAILABILITY"
+.sp
+\fBbtrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8), \fBbtrfs\-receive\fR(8)
diff --git a/Documentation/btrfs-send.8.gz b/Documentation/btrfs-send.8.gz
deleted file mode 100644
index 36fba40f..00000000
--- a/Documentation/btrfs-send.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-subvolume.8 b/Documentation/btrfs-subvolume.8
new file mode 100644
index 00000000..76688bd2
--- /dev/null
+++ b/Documentation/btrfs-subvolume.8
@@ -0,0 +1,382 @@
+'\" t
+.\"     Title: btrfs-subvolume
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-SUBVOLUME" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-subvolume \- manage btrfs subvolumes
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs subvolume\fR \fI<subcommand>\fR [\fI<args>\fR]
+.SH "DESCRIPTION"
+.sp
+\fBbtrfs subvolume\fR is used to create/delete/list/show btrfs subvolumes and snapshots\&.
+.SH "SUBVOLUME AND SNAPSHOT"
+.sp
+A subvolume is a part of filesystem with its own independent file/directory hierarchy\&. Subvolumes can share file extents\&. A snapshot is also subvolume, but with a given initial content of the original subvolume\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+A subvolume in btrfs is not like an LVM logical volume, which is block\-level snapshot while btrfs subvolumes are file extent\-based\&.
+.sp .5v
+.RE
+.sp
+A subvolume looks like a normal directory, with some additional operations described below\&. Subvolumes can be renamed or moved, nesting subvolumes is not restricted but has some implications regarding snapshotting\&.
+.sp
+A subvolume in btrfs can be accessed in two ways:
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+like any other directory that is accessible to the user
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+like a separately mounted filesystem (options
+\fIsubvol\fR
+or
+\fIsubvolid\fR)
+.RE
+.sp
+In the latter case the parent directory is not visible and accessible\&. This is similar to a bind mount, and in fact the subvolume mount does exactly that\&.
+.sp
+A freshly created filesystem is also a subvolume, called \fItop\-level\fR, internally has an id 5\&. This subvolume cannot be removed or replaced by another subvolume\&. This is also the subvolume that will be mounted by default, unless the default subvolume has been changed (see subcommand \fIset\-default\fR)\&.
+.sp
+A snapshot is a subvolume like any other, with given initial content\&. By default, snapshots are created read\-write\&. File modifications in a snapshot do not affect the files in the original subvolume\&.
+.SH "SUBCOMMAND"
+.PP
+\fBcreate\fR [\-i \fI<qgroupid>\fR] [<dest>/]\fI<name>\fR
+.RS 4
+Create a subvolume
+\fI<name>\fR
+in
+\fI<dest>\fR\&.
+.sp
+If
+\fI<dest>\fR
+is not given, subvolume
+\fI<name>\fR
+will be created in the current directory\&.
+.sp
+\fBOptions\fR
+.PP
+\-i \fI<qgroupid>\fR
+.RS 4
+Add the newly created subvolume to a qgroup\&. This option can be given multiple times\&.
+.RE
+.RE
+.PP
+\fBdelete\fR [options] \fI<subvolume>\fR [\fI<subvolume>\fR\&...]
+.RS 4
+Delete the subvolume(s) from the filesystem\&.
+.sp
+If
+\fI<subvolume>\fR
+is not a subvolume, btrfs returns an error but continues if there are more arguments to process\&.
+.sp
+The corresponding directory is removed instantly but the data blocks are removed later in the background\&. The command returns immediately\&. See
+\fBbtrfs subvolume sync\fR
+how to wait until the subvolume gets completely removed\&.
+.sp
+The deletion does not involve full transaction commit by default due to performance reasons\&. As a consequence, the subvolume may appear again after a crash\&. Use one of the
+\fI\-\-commit\fR
+options to wait until the operation is safely stored on the device\&.
+.sp
+\fBOptions\fR
+.PP
+\-c|\-\-commit\-after
+.RS 4
+wait for transaction commit at the end of the operation\&.
+.RE
+.PP
+\-C|\-\-commit\-each
+.RS 4
+wait for transaction commit after deleting each subvolume\&.
+.RE
+.PP
+\-v|\-\-verbose
+.RS 4
+verbose output of operations\&.
+.RE
+.RE
+.PP
+\fBfind\-new\fR \fI<subvolume>\fR \fI<last_gen>\fR
+.RS 4
+List the recently modified files in a subvolume, after
+\fI<last_gen>\fR
+generation\&.
+.RE
+.PP
+\fBget\-default\fR \fI<path>\fR
+.RS 4
+Get the default subvolume of the filesystem
+\fI<path>\fR\&.
+.sp
+The output format is similar to
+\fBsubvolume list\fR
+command\&.
+.RE
+.PP
+\fBlist\fR [options] [\-G [+|\-]\fI<value>\fR] [\-C [+|\-]\fI<value>\fR] [\-\-sort=rootid,gen,ogen,path] \fI<path>\fR
+.RS 4
+List the subvolumes present in the filesystem
+\fI<path>\fR\&.
+.sp
+For every subvolume the following information is shown by default:
+.sp
+ID
+\fI<ID>\fR
+gen
+\fI<generation>\fR
+top level
+\fI<ID>\fR
+path
+\fI<path>\fR
+.sp
+where ID is subvolume\(cqs id, gen is an internal counter which is updated every transaction, top level is the same as parent subvolume\(cqs id, and path is the relative path of the subvolume to the top level subvolume\&. The subvolume\(cqs ID may be used by the subvolume set\-default command, or at mount time via the subvolid= option\&.
+.sp
+\fBOptions\fR
+.PP
+Path filtering
+.RS 4
+.PP
+\-o
+.RS 4
+print only subvolumes below specified
+\fI<path>\fR\&.
+.RE
+.PP
+\-a
+.RS 4
+print all the subvolumes in the filesystem and distinguish between absolute and relative path with respect to the given
+\fI<path>\fR\&.
+.RE
+.RE
+.PP
+Field selection
+.RS 4
+.PP
+\-p
+.RS 4
+print the parent ID (\fIparent\fR
+here means the subvolume which contains this subvolume)\&.
+.RE
+.PP
+\-c
+.RS 4
+print the ogeneration of the subvolume, aliases: ogen or origin generation\&.
+.RE
+.PP
+\-g
+.RS 4
+print the generation of the subvolume (default)\&.
+.RE
+.PP
+\-u
+.RS 4
+print the UUID of the subvolume\&.
+.RE
+.PP
+\-q
+.RS 4
+print the parent UUID of the subvolume (\fIparent\fR
+here means subvolume of which this subvolume is a snapshot)\&.
+.RE
+.PP
+\-R
+.RS 4
+print the UUID of the sent subvolume, where the subvolume is the result of a receive operation\&.
+.RE
+.RE
+.PP
+Type filtering
+.RS 4
+.PP
+\-s
+.RS 4
+only snapshot subvolumes in the filesystem will be listed\&.
+.RE
+.PP
+\-r
+.RS 4
+only readonly subvolumes in the filesystem will be listed\&.
+.RE
+.PP
+\-d
+.RS 4
+list deleted subvolumes that are not yet cleaned\&.
+.RE
+.RE
+.PP
+Other
+.RS 4
+.PP
+\-t
+.RS 4
+print the result as a table\&.
+.RE
+.RE
+.PP
+Sorting
+.RS 4
+.PP
+\-G [+|\-]\fI<value>\fR
+.RS 4
+list subvolumes in the filesystem that its generation is >=, \(la or = value\&. \*(Aq+\*(Aq means >= value, \*(Aq\-\*(Aq means <= value, If there is neither \*(Aq+\*(Aq nor \*(Aq\-\*(Aq, it means = value\&.
+.RE
+.PP
+\-C [+|\-]\fI<value>\fR
+.RS 4
+list subvolumes in the filesystem that its ogeneration is >=, <= or = value\&. The usage is the same to
+\fI\-G\fR
+option\&.
+.RE
+.PP
+\-\-sort=rootid,gen,ogen,path
+.RS 4
+list subvolumes in order by specified items\&. you can add \*(Aq+\*(Aq or \*(Aq\-\*(Aq in front of each items, \*(Aq+\*(Aq means ascending, \*(Aq\-\*(Aq means descending\&. The default is ascending\&.
+.sp
+for \-\-sort you can combine some items together by \*(Aq,\*(Aq, just like \-\-sort=+ogen,\-gen,path,rootid\&.
+.RE
+.RE
+.RE
+.PP
+\fBset\-default\fR [\fI<subvolume>\fR|\fI<id>\fR \fI<path>\fR]
+.RS 4
+Set the default subvolume for the (mounted) filesystem\&.
+.sp
+Set the default subvolume for the (mounted) filesystem at
+\fI<path>\fR\&. This will hide the top\-level subvolume (i\&.e\&. the one mounted with
+\fIsubvol=/\fR
+or
+\fIsubvolid=5\fR)\&. Takes action on next mount\&.
+.sp
+There are two ways how to specify the subvolume, by
+\fI<id>\fR
+or by the
+\fI<subvolume>\fR
+path\&. The id can be obtained from
+\fBbtrfs subvolume list\fR,
+\fBbtrfs subvolume show\fR
+or
+\fBbtrfs inspect\-internal rootid\fR\&.
+.RE
+.PP
+\fBshow\fR [options] \fI<path>\fR|\fI<mnt>\fR
+.RS 4
+Show information of a given subvolume in the
+\fI<path>\fR\&.
+.sp
+\fBOptions\fR
+.PP
+\-r|\-\-rootid
+.RS 4
+rootid of the subvolume\&.
+.PP
+\-u|\-\-uuid
+.RS 4
+UUID of the subvolume\&.
+.RE
+.sp
+If no option is specified, subvolume information of
+\fI<path>\fR
+is shown, otherwise the subvolume information of rootid or UUID in the filesystem is shown\&.
+.RE
+.RE
+.PP
+\fBsnapshot\fR [\-r|\-i \fI<qgroupid>\fR] \fI<source>\fR \fI<dest>\fR|[<dest>/]\fI<name>\fR
+.RS 4
+Create a snapshot of the subvolume
+\fI<source>\fR
+with the name
+\fI<name>\fR
+in the
+\fI<dest>\fR
+directory\&.
+.sp
+If only
+\fI<dest>\fR
+is given, the subvolume will be named the basename of
+\fI<source>\fR\&. If
+\fI<source>\fR
+is not a subvolume, btrfs returns an error\&.
+.sp
+\fBOptions\fR
+.PP
+\-r
+.RS 4
+Make the new snapshot read only\&.
+.RE
+.PP
+\-i \fI<qgroupid>\fR
+.RS 4
+Add the newly created subvolume to a qgroup\&. This option can be given multiple times\&.
+.RE
+.RE
+.PP
+\fBsync\fR \fI<path>\fR [subvolid\&...]
+.RS 4
+Wait until given subvolume(s) are completely removed from the filesystem after deletion\&. If no subvolume id is given, wait until all current deletion requests are completed, but do not wait for subvolumes deleted in the meantime\&.
+.sp
+\fBOptions\fR
+.PP
+\-s \fI<N>\fR
+.RS 4
+sleep N seconds between checks (default: 1)
+.RE
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs subvolume\fR returns a zero exit status if it succeeds\&. A non\-zero value is returned in case of failure\&.
+.SH "AVAILABILITY"
+.sp
+\fBbtrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBmkfs\&.btrfs\fR(8), \fBmount\fR(8), \fBbtrfs\-quota\fR(8), \fBbtrfs\-qgroup\fR(8),
diff --git a/Documentation/btrfs-subvolume.8.gz b/Documentation/btrfs-subvolume.8.gz
deleted file mode 100644
index 48cd18c7..00000000
--- a/Documentation/btrfs-subvolume.8.gz
+++ /dev/null
diff --git a/Documentation/btrfs-subvolume.asciidoc b/Documentation/btrfs-subvolume.asciidoc
index a8c4af4b..f3eb4e26 100644
--- a/Documentation/btrfs-subvolume.asciidoc
+++ b/Documentation/btrfs-subvolume.asciidoc
@@ -77,13 +77,16 @@ safely stored on the device.
 `Options`
 +
 -c|--commit-after::::
-wait for transaction commit at the end of the operation
+wait for transaction commit at the end of the operation.
 +
 -C|--commit-each::::
-wait for transaction commit after deleting each subvolume
+wait for transaction commit after deleting each subvolume.
++
+-v|--verbose::::
+verbose output of operations.
 
 *find-new* <subvolume> <last_gen>::
-List the recently modified files in a subvolume, after <last_gen> ID.
+List the recently modified files in a subvolume, after <last_gen> generation.
 
 *get-default* <path>::
 Get the default subvolume of the filesystem <path>.
@@ -93,40 +96,54 @@ The output format is similar to *subvolume list* command.
 *list* [options] [-G [\+|-]<value>] [-C [+|-]<value>] [--sort=rootid,gen,ogen,path] <path>::
 List the subvolumes present in the filesystem <path>.
 +
-For every subvolume the following information is shown by default. +
-ID <ID> top level <ID> path <path> +
-where path is the relative path of the subvolume to the top level subvolume.
+For every subvolume the following information is shown by default:
++
+ID <ID> gen <generation> top level <ID> path <path>
++
+where ID is subvolume's id, gen is an internal counter which is updated
+every transaction, top level is the same as parent subvolume's id, and
+path is the relative path of the subvolume to the top level subvolume.
 The subvolume's ID may be used by the subvolume set-default command,
 or at mount time via the subvolid= option.
-If `-p` is given, then parent <ID> is added to the output between ID
-and top level. The parent's ID may be used at mount time via the
-`subvolrootid=` option.
 +
 `Options`
 +
--p::::
-print parent ID.
+Path filtering;;
+-o::::
+print only subvolumes below specified <path>.
 -a::::
 print all the subvolumes in the filesystem and distinguish between
 absolute and relative path with respect to the given <path>.
+
+Field selection;;
+-p::::
+print the parent ID
+('parent' here means the subvolume which contains this subvolume).
 -c::::
 print the ogeneration of the subvolume, aliases: ogen or origin generation.
 -g::::
-print the generation of the subvolume.
--o::::
-print only subvolumes below specified <path>.
+print the generation of the subvolume (default).
 -u::::
 print the UUID of the subvolume.
 -q::::
-print the parent uuid of subvolumes (and snapshots).
+print the parent UUID of the subvolume
+('parent' here means subvolume of which this subvolume is a snapshot).
 -R::::
-print the UUID of the sent subvolume, where the subvolume is the result of a receive operation
--t::::
-print the result as a table.
+print the UUID of the sent subvolume, where the subvolume is the result of a receive operation.
+
+Type filtering;;
 -s::::
 only snapshot subvolumes in the filesystem will be listed.
 -r::::
 only readonly subvolumes in the filesystem will be listed.
+-d::::
+list deleted subvolumes that are not yet cleaned.
+
+Other;;
+-t::::
+print the result as a table.
+
+Sorting;;
 -G [+|-]<value>::::
 list subvolumes in the filesystem that its generation is
 >=, \<= or = value. \'\+' means >= value, \'-' means \<= value, If there is
@@ -144,9 +161,9 @@ for --sort you can combine some items together by \',', just like
 
 *set-default* [<subvolume>|<id> <path>]::
 Set the default subvolume for the (mounted) filesystem.
-
++
 Set the default subvolume for the (mounted) filesystem at <path>. This will hide
-the top-level subvolume (ie. the one mounted with 'subvol=/' or 'subvolid=5').
+the top-level subvolume (i.e. the one mounted with 'subvol=/' or 'subvolid=5').
 Takes action on next mount.
 +
 There are two ways how to specify the subvolume, by <id> or by the <subvolume>
@@ -154,10 +171,22 @@ path.
 The id can be obtained from *btrfs subvolume list*, *btrfs subvolume show* or
 *btrfs inspect-internal rootid*.
 
-*show* <path>::
+*show* [options] <path>|<mnt>::
 Show information of a given subvolume in the <path>.
++
+`Options`
++
+-r|--rootid::::
+rootid of the subvolume.
+-u|--uuid:::
+UUID of the subvolume.
+
++
+If no option is specified, subvolume information of <path> is shown,
+otherwise the subvolume information of rootid or UUID in the filesystem
+is shown.
 
-*snapshot* [-r] <source> <dest>|[<dest>/]<name>::
+*snapshot* [-r|-i <qgroupid>] <source> <dest>|[<dest>/]<name>::
 Create a snapshot of the subvolume <source> with the
 name <name> in the <dest> directory.
 +
@@ -168,6 +197,10 @@ If <source> is not a subvolume, btrfs returns an error.
 +
 -r::::
 Make the new snapshot read only.
++
+-i <qgroupid>::::
+Add the newly created subvolume to a qgroup. This option can be given multiple
+times.
 
 *sync* <path> [subvolid...]::
 Wait until given subvolume(s) are completely removed from the filesystem after
diff --git a/Documentation/btrfs.5 b/Documentation/btrfs.5
new file mode 100644
index 00000000..433e6dd6
--- /dev/null
+++ b/Documentation/btrfs.5
@@ -0,0 +1,1131 @@
+'\" t
+.\"     Title: btrfs-man5
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS\-MAN5" "5" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs-man5 \- topics about the BTRFS filesystem (mount options, supported file attributes and other)
+.SH "DESCRIPTION"
+.sp
+This document describes topics related to BTRFS that are not specific to the tools\&. Currently covers:
+.sp
+.RS 4
+.ie n \{\
+\h'-04' 1.\h'+01'\c
+.\}
+.el \{\
+.sp -1
+.IP "  1." 4.2
+.\}
+mount options
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04' 2.\h'+01'\c
+.\}
+.el \{\
+.sp -1
+.IP "  2." 4.2
+.\}
+filesystem features
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04' 3.\h'+01'\c
+.\}
+.el \{\
+.sp -1
+.IP "  3." 4.2
+.\}
+filesystem limits
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04' 4.\h'+01'\c
+.\}
+.el \{\
+.sp -1
+.IP "  4." 4.2
+.\}
+file attributes
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04' 5.\h'+01'\c
+.\}
+.el \{\
+.sp -1
+.IP "  5." 4.2
+.\}
+control device
+.RE
+.SH "MOUNT OPTIONS"
+.sp
+This section describes mount options specific to BTRFS\&. For the generic mount options please refer to \fBmount\fR(8) manpage\&. The options are sorted alphabetically (discarding the \fIno\fR prefix)\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+most mount options apply to the whole filesystem and only options in the first mounted subvolume will take effect\&. This is due to lack of implementation and may change in the future\&. This means that (for example) you can\(cqt set per\-subvolume \fInodatacow\fR, \fInodatasum\fR, or \fIcompress\fR using mount options\&. This should eventually be fixed, but it has proved to be difficult to implement correctly within the Linux VFS framework\&.
+.sp .5v
+.RE
+.PP
+\fBacl\fR, \fBnoacl\fR
+.RS 4
+(default: on)
+.sp
+Enable/disable support for Posix Access Control Lists (ACLs)\&. See the
+\fBacl\fR(5) manual page for more information about ACLs\&.
+.sp
+The support for ACL is build\-time configurable (BTRFS_FS_POSIX_ACL) and mount fails if
+\fIacl\fR
+is requested but the feature is not compiled in\&.
+.RE
+.PP
+\fBautodefrag\fR, \fBnoautodefrag\fR
+.RS 4
+(since: 3\&.0, default: off)
+.sp
+Enable automatic file defragmentation\&. When enabled, small random writes into files (in a range of tens of kilobytes, currently it\(cqs 64K) are detected and queued up for the defragmentation process\&. Not well suited for large database workloads\&.
+.sp
+The read latency may increase due to reading the adjacent blocks that make up the range for defragmentation, successive write will merge the blocks in the new location\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBWarning\fR
+.ps -1
+.br
+Defragmenting with Linux kernel versions < 3\&.9 or \(>= 3\&.14\-rc2 as well as with Linux stable kernel versions \(>= 3\&.10\&.31, \(>= 3\&.12\&.12 or \(>= 3\&.13\&.4 will break up the reflinks of COW data (for example files copied with
+\fBcp \-\-reflink\fR, snapshots or de\-duplicated data)\&. This may cause considerable increase of space usage depending on the broken up reflinks\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fBbarrier\fR, \fBnobarrier\fR
+.RS 4
+(default: on)
+.sp
+Ensure that all IO write operations make it through the device cache and are stored permanently when the filesystem is at its consistency checkpoint\&. This typically means that a flush command is sent to the device that will synchronize all pending data and ordinary metadata blocks, then writes the superblock and issues another flush\&.
+.sp
+The write flushes incur a slight hit and also prevent the IO block scheduler to reorder requests in a more effective way\&. Disabling barriers gets rid of that penalty but will most certainly lead to a corrupted filesystem in case of a crash or power loss\&. The ordinary metadata blocks could be yet unwritten at the time the new superblock is stored permanently, expecting that the block pointers to metadata were stored permanently before\&.
+.sp
+On a device with a volatile battery\-backed write\-back cache, the
+\fInobarrier\fR
+option will not lead to filesystem corruption as the pending blocks are supposed to make it to the permanent storage\&.
+.RE
+.PP
+\fBcheck_int\fR, \fBcheck_int_data\fR, \fBcheck_int_print_mask=\fR\fB\fIvalue\fR\fR
+.RS 4
+(since: 3\&.0, default: off)
+.sp
+These debugging options control the behavior of the integrity checking module (the BTRFS_FS_CHECK_INTEGRITY config option required)\&. The main goal is to verify that all blocks from a given transaction period are properly linked\&.
+.sp
+\fIcheck_int\fR
+enables the integrity checker module, which examines all block write requests to ensure on\-disk consistency, at a large memory and CPU cost\&.
+.sp
+\fIcheck_int_data\fR
+includes extent data in the integrity checks, and implies the
+\fIcheck_int\fR
+option\&.
+.sp
+\fIcheck_int_print_mask\fR
+takes a bitmask of BTRFSIC_PRINT_MASK_* values as defined in
+\fIfs/btrfs/check\-integrity\&.c\fR, to control the integrity checker module behavior\&.
+.sp
+See comments at the top of
+\fIfs/btrfs/check\-integrity\&.c\fR
+for more information\&.
+.RE
+.PP
+\fBclear_cache\fR
+.RS 4
+Force clearing and rebuilding of the disk space cache if something has gone wrong\&. See also:
+\fIspace_cache\fR\&.
+.RE
+.PP
+\fBcommit=\fR\fB\fIseconds\fR\fR
+.RS 4
+(since: 3\&.12, default: 30)
+.sp
+Set the interval of periodic transaction commit when data are synchronized to permanent storage\&. Higher interval values lead to larger amount of unwritten data, which has obvious consequences when the system crashes\&. The upper bound is not forced, but a warning is printed if it\(cqs more than 300 seconds (5 minutes)\&. Use with care\&.
+.RE
+.PP
+\fBcompress\fR, \fBcompress=\fR\fB\fItype\fR\fR, \fBcompress\-force\fR, \fBcompress\-force=\fR\fB\fItype\fR\fR
+.RS 4
+(default: off)
+.sp
+Control BTRFS file data compression\&. Type may be specified as
+\fIzlib\fR,
+\fIlzo\fR,
+\fIzstd\fR
+or
+\fIno\fR
+(for no compression, used for remounting)\&. If no type is specified,
+\fIzlib\fR
+is used\&. If
+\fIcompress\-force\fR
+is specified, then compression will always be attempted, but the data may end up uncompressed if the compression would make them larger\&.
+.sp
+Otherwise some simple heuristics are applied to detect an incompressible file\&. If the first blocks written to a file are not compressible, the whole file is permanently marked to skip compression\&. As this is too simple, the
+\fIcompress\-force\fR
+is a workaround that will compress most of the files at the cost of some wasted CPU cycles on failed attempts\&. Since kernel 4\&.15, a set of heuristic algorithms have been improved by using frequency sampling, repeated pattern detection and Shannon entropy calculation to avoid that\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+If compression is enabled,
+\fInodatacow\fR
+and
+\fInodatasum\fR
+are disabled\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fBdatacow\fR, \fBnodatacow\fR
+.RS 4
+(default: on)
+.sp
+Enable data copy\-on\-write for newly created files\&.
+\fINodatacow\fR
+implies
+\fInodatasum\fR, and disables
+\fIcompression\fR\&. All files created under
+\fInodatacow\fR
+are also set the NOCOW file attribute (see
+\fBchattr\fR(1))\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+If
+\fInodatacow\fR
+or
+\fInodatasum\fR
+are enabled, compression is disabled\&.
+.sp .5v
+.RE
+Updates in\-place improve performance for workloads that do frequent overwrites, at the cost of potential partial writes, in case the write is interrupted (system crash, device failure)\&.
+.RE
+.PP
+\fBdatasum\fR, \fBnodatasum\fR
+.RS 4
+(default: on)
+.sp
+Enable data checksumming for newly created files\&.
+\fIDatasum\fR
+implies
+\fIdatacow\fR, ie\&. the normal mode of operation\&. All files created under
+\fInodatasum\fR
+inherit the "no checksums" property, however there\(cqs no corresponding file attribute (see
+\fBchattr\fR(1))\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+If
+\fInodatacow\fR
+or
+\fInodatasum\fR
+are enabled, compression is disabled\&.
+.sp .5v
+.RE
+There is a slight performance gain when checksums are turned off, the corresponding metadata blocks holding the checksums do not need to updated\&. The cost of checksumming of the blocks in memory is much lower than the IO, modern CPUs feature hardware support of the checksumming algorithm\&.
+.RE
+.PP
+\fBdegraded\fR
+.RS 4
+(default: off)
+.sp
+Allow mounts with less devices than the RAID profile constraints require\&. A read\-write mount (or remount) may fail when there are too many devices missing, for example if a stripe member is completely missing from RAID0\&.
+.sp
+Since 4\&.14, the constraint checks have been improved and are verified on the chunk level, not an the device level\&. This allows degraded mounts of filesystems with mixed RAID profiles for data and metadata, even if the device number constraints would not be satisfied for some of the profiles\&.
+.sp
+Example: metadata \(em raid1, data \(em single, devices \(em /dev/sda, /dev/sdb
+.sp
+Suppose the data are completely stored on
+\fIsda\fR, then missing
+\fIsdb\fR
+will not prevent the mount, even if 1 missing device would normally prevent (any)
+\fIsingle\fR
+profile to mount\&. In case some of the data chunks are stored on
+\fIsdb\fR, then the constraint of single/data is not satisfied and the filesystem cannot be mounted\&.
+.RE
+.PP
+\fBdevice=\fR\fB\fIdevicepath\fR\fR
+.RS 4
+Specify a path to a device that will be scanned for BTRFS filesystem during mount\&. This is usually done automatically by a device manager (like udev) or using the
+\fBbtrfs device scan\fR
+command (eg\&. run from the initial ramdisk)\&. In cases where this is not possible the
+\fIdevice\fR
+mount option can help\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+booting eg\&. a RAID1 system may fail even if all filesystem\(cqs
+\fIdevice\fR
+paths are provided as the actual device nodes may not be discovered by the system at that point\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fBdiscard\fR, \fBnodiscard\fR
+.RS 4
+(default: off)
+.sp
+Enable discarding of freed file blocks\&. This is useful for SSD devices, thinly provisioned LUNs, or virtual machine images; however, every storage layer must support discard for it to work\&. if the backing device does not support asynchronous queued TRIM, then this operation can severely degrade performance, because a synchronous TRIM operation will be attempted instead\&. Queued TRIM requires newer than SATA revision 3\&.1 chipsets and devices\&.
+.RE
+.sp
+If it is not necessary to immediately discard freed blocks, then the \fBfstrim\fR tool can be used to discard all free blocks in a batch\&. Scheduling a TRIM during a period of low system activity will prevent latent interference with the performance of other operations\&. Also, a device may ignore the TRIM command if the range is too small, so running a batch discard has a greater probability of actually discarding the blocks\&.
+.sp
+If discarding is not necessary to be done at the block freeing time, there\(cqs \fBfstrim\fR(8) tool that lets the filesystem discard all free blocks in a batch, possibly not much interfering with other operations\&. Also, the device may ignore the TRIM command if the range is too small, so running the batch discard can actually discard the blocks\&.
+.PP
+\fBenospc_debug\fR, \fBnoenospc_debug\fR
+.RS 4
+(default: off)
+.sp
+Enable verbose output for some ENOSPC conditions\&. It\(cqs safe to use but can be noisy if the system reaches near\-full state\&.
+.RE
+.PP
+\fBfatal_errors=\fR\fB\fIaction\fR\fR
+.RS 4
+(since: 3\&.4, default: bug)
+.sp
+Action to take when encountering a fatal error\&.
+.PP
+\fBbug\fR
+.RS 4
+\fIBUG()\fR
+on a fatal error, the system will stay in the crashed state and may be still partially usable, but reboot is required for full operation
+.RE
+.PP
+\fBpanic\fR
+.RS 4
+\fIpanic()\fR
+on a fatal error, depending on other system configuration, this may be followed by a reboot\&. Please refer to the documentation of kernel boot parameters, eg\&.
+\fIpanic\fR,
+\fIoops\fR
+or
+\fIcrashkernel\fR\&.
+.RE
+.RE
+.PP
+\fBflushoncommit\fR, \fBnoflushoncommit\fR
+.RS 4
+(default: off)
+.sp
+This option forces any data dirtied by a write in a prior transaction to commit as part of the current commit, effectively a full filesystem sync\&.
+.sp
+This makes the committed state a fully consistent view of the file system from the application\(cqs perspective (i\&.e\&. it includes all completed file system operations)\&. This was previously the behavior only when a snapshot was created\&.
+.sp
+When off, the filesystem is consistent but buffered writes may last more than one transaction commit\&.
+.RE
+.PP
+\fBfragment=\fR\fB\fItype\fR\fR
+.RS 4
+(depends on compile\-time option BTRFS_DEBUG, since: 4\&.4, default: off)
+.sp
+A debugging helper to intentionally fragment given
+\fItype\fR
+of block groups\&. The type can be
+\fIdata\fR,
+\fImetadata\fR
+or
+\fIall\fR\&. This mount option should not be used outside of debugging environments and is not recognized if the kernel config option
+\fIBTRFS_DEBUG\fR
+is not enabled\&.
+.RE
+.PP
+\fBinode_cache\fR, \fBnoinode_cache\fR
+.RS 4
+(since: 3\&.0, default: off)
+.sp
+Enable free inode number caching\&. Not recommended to use unless files on your filesystem get assigned inode numbers that are approaching 2^64\&. Normally, new files in each subvolume get assigned incrementally (plus one from the last time) and are not reused\&. The mount option turns on caching of the existing inode numbers and reuse of inode numbers of deleted files\&.
+.sp
+This option may slow down your system at first run, or after mounting without the option\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+Defaults to off due to a potential overflow problem when the free space checksums don\(cqt fit inside a single page\&.
+.sp .5v
+.RE
+Don\(cqt use this option unless you really need it\&. The inode number limit on 64bit system is 2^64, which is practically enough for the whole filesystem lifetime\&. Due to implementation of linux VFS layer, the inode numbers on 32bit systems are only 32 bits wide\&. This lowers the limit significantly and makes it possible to reach it\&. In such case, this mount option will help\&. Alternatively, files with high inode numbers can be copied to a new subvolume which will effectively start the inode numbers from the beginning again\&.
+.RE
+.PP
+\fBlogreplay\fR, \fBnologreplay\fR
+.RS 4
+(default: on, even read\-only)
+.sp
+Enable/disable log replay at mount time\&. See also
+\fItreelog\fR\&. Note that
+\fInologreplay\fR
+is the same as
+\fInorecovery\fR\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBWarning\fR
+.ps -1
+.br
+currently, the tree log is replayed even with a read\-only mount! To disable that behaviour, mount also with
+\fInologreplay\fR\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fBmax_inline=\fR\fB\fIbytes\fR\fR
+.RS 4
+(default: min(2048, page size) )
+.sp
+Specify the maximum amount of space, that can be inlined in a metadata B\-tree leaf\&. The value is specified in bytes, optionally with a K suffix (case insensitive)\&. In practice, this value is limited by the filesystem block size (named
+\fIsectorsize\fR
+at mkfs time), and memory page size of the system\&. In case of sectorsize limit, there\(cqs some space unavailable due to leaf headers\&. For example, a 4k sectorsize, maximum size of inline data is about 3900 bytes\&.
+.sp
+Inlining can be completely turned off by specifying 0\&. This will increase data block slack if file sizes are much smaller than block size but will reduce metadata consumption in return\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+the default value has changed to 2048 in kernel 4\&.6\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fBmetadata_ratio=\fR\fB\fIvalue\fR\fR
+.RS 4
+(default: 0, internal logic)
+.sp
+Specifies that 1 metadata chunk should be allocated after every
+\fIvalue\fR
+data chunks\&. Default behaviour depends on internal logic, some percent of unused metadata space is attempted to be maintained but is not always possible if there\(cqs not enough space left for chunk allocation\&. The option could be useful to override the internal logic in favor of the metadata allocation if the expected workload is supposed to be metadata intense (snapshots, reflinks, xattrs, inlined files)\&.
+.RE
+.PP
+\fBnorecovery\fR
+.RS 4
+(since: 4\&.5, default: off)
+.sp
+Do not attempt any data recovery at mount time\&. This will disable
+\fIlogreplay\fR
+and avoids other write operations\&. Note that this option is the same as
+\fInologreplay\fR\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+The opposite option
+\fIrecovery\fR
+used to have different meaning but was changed for consistency with other filesystems, where
+\fInorecovery\fR
+is used for skipping log replay\&. BTRFS does the same and in general will try to avoid any write operations\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fBrescan_uuid_tree\fR
+.RS 4
+(since: 3\&.12, default: off)
+.sp
+Force check and rebuild procedure of the UUID tree\&. This should not normally be needed\&.
+.RE
+.PP
+\fBskip_balance\fR
+.RS 4
+(since: 3\&.3, default: off)
+.sp
+Skip automatic resume of an interrupted balance operation\&. The operation can later be resumed with
+\fBbtrfs balance resume\fR, or the paused state can be removed with
+\fBbtrfs balance cancel\fR\&. The default behaviour is to resume an interrupted balance immediately after a volume is mounted\&.
+.RE
+.PP
+\fBspace_cache\fR, \fBspace_cache=\fR\fB\fIversion\fR\fR, \fBnospace_cache\fR
+.RS 4
+(\fInospace_cache\fR
+since: 3\&.2,
+\fIspace_cache=v1\fR
+and
+\fIspace_cache=v2\fR
+since 4\&.5, default:
+\fIspace_cache=v1\fR)
+.sp
+Options to control the free space cache\&. The free space cache greatly improves performance when reading block group free space into memory\&. However, managing the space cache consumes some resources, including a small amount of disk space\&.
+.sp
+There are two implementations of the free space cache\&. The original one, referred to as
+\fIv1\fR, is the safe default\&. The
+\fIv1\fR
+space cache can be disabled at mount time with
+\fInospace_cache\fR
+without clearing\&.
+.sp
+On very large filesystems (many terabytes) and certain workloads, the performance of the
+\fIv1\fR
+space cache may degrade drastically\&. The
+\fIv2\fR
+implementation, which adds a new B\-tree called the free space tree, addresses this issue\&. Once enabled, the
+\fIv2\fR
+space cache will always be used and cannot be disabled unless it is cleared\&. Use
+\fIclear_cache,space_cache=v1\fR
+or
+\fIclear_cache,nospace_cache\fR
+to do so\&. If
+\fIv2\fR
+is enabled, kernels without
+\fIv2\fR
+support will only be able to mount the filesystem in read\-only mode\&. The
+\fBbtrfs\fR(8) command currently only has read\-only support for
+\fIv2\fR\&. A read\-write command may be run on a
+\fIv2\fR
+filesystem by clearing the cache, running the command, and then remounting with
+\fIspace_cache=v2\fR\&.
+.sp
+If a version is not explicitly specified, the default implementation will be chosen, which is
+\fIv1\fR\&.
+.RE
+.PP
+\fBssd\fR, \fBssd_spread\fR, \fBnossd\fR, \fBnossd_spread\fR
+.RS 4
+(default: SSD autodetected)
+.sp
+Options to control SSD allocation schemes\&. By default, BTRFS will enable or disable SSD optimizations depending on status of a device with respect to rotational or non\-rotational type\&. This is determined by the contents of
+\fI/sys/block/DEV/queue/rotational\fR)\&. If it is 0, the
+\fIssd\fR
+option is turned on\&. The option
+\fInossd\fR
+will disable the autodetection\&.
+.sp
+The optimizations make use of the absence of the seek penalty that\(cqs inherent for the rotational devices\&. The blocks can be typically written faster and are not offloaded to separate threads\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+Since 4\&.14, the block layout optimizations have been dropped\&. This used to help with first generations of SSD devices\&. Their FTL (flash translation layer) was not effective and the optimization was supposed to improve the wear by better aligning blocks\&. This is no longer true with modern SSD devices and the optimization had no real benefit\&. Furthermore it caused increased fragmentation\&. The layout tuning has been kept intact for the option
+\fIssd_spread\fR\&.
+.sp .5v
+.RE
+The
+\fIssd_spread\fR
+mount option attempts to allocate into bigger and aligned chunks of unused space, and may perform better on low\-end SSDs\&.
+\fIssd_spread\fR
+implies
+\fIssd\fR, enabling all other SSD heuristics as well\&. The option
+\fInossd\fR
+will disable all SSD options while
+\fInossd_spread\fR
+only disables
+\fIssd_spread\fR\&.
+.RE
+.PP
+\fBsubvol=\fR\fB\fIpath\fR\fR
+.RS 4
+Mount subvolume from
+\fIpath\fR
+rather than the toplevel subvolume\&. The
+\fIpath\fR
+is always treated as relative to the toplevel subvolume\&. This mount option overrides the default subvolume set for the given filesystem\&.
+.RE
+.PP
+\fBsubvolid=\fR\fB\fIsubvolid\fR\fR
+.RS 4
+Mount subvolume specified by a
+\fIsubvolid\fR
+number rather than the toplevel subvolume\&. You can use
+\fBbtrfs subvolume list\fR
+of
+\fBbtrfs subvolume show\fR
+to see subvolume ID numbers\&. This mount option overrides the default subvolume set for the given filesystem\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+if both
+\fIsubvolid\fR
+and
+\fIsubvol\fR
+are specified, they must point at the same subvolume, otherwise the mount will fail\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fBthread_pool=\fR\fB\fInumber\fR\fR
+.RS 4
+(default: min(NRCPUS + 2, 8) )
+.sp
+The number of worker threads to start\&. NRCPUS is number of on\-line CPUs detected at the time of mount\&. Small number leads to less parallelism in processing data and metadata, higher numbers could lead to a performance hit due to increased locking contention, process scheduling, cache\-line bouncing or costly data transfers between local CPU memories\&.
+.RE
+.PP
+\fBtreelog\fR, \fBnotreelog\fR
+.RS 4
+(default: on)
+.sp
+Enable the tree logging used for
+\fIfsync\fR
+and
+\fIO_SYNC\fR
+writes\&. The tree log stores changes without the need of a full filesystem sync\&. The log operations are flushed at sync and transaction commit\&. If the system crashes between two such syncs, the pending tree log operations are replayed during mount\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBWarning\fR
+.ps -1
+.br
+currently, the tree log is replayed even with a read\-only mount! To disable that behaviour, also mount with
+\fInologreplay\fR\&.
+.sp .5v
+.RE
+The tree log could contain new files/directories, these would not exist on a mounted filesystem if the log is not replayed\&.
+.RE
+.PP
+\fBusebackuproot\fR, \fBnousebackuproot\fR
+.RS 4
+(since: 4\&.6, default: off)
+.sp
+Enable autorecovery attempts if a bad tree root is found at mount time\&. Currently this scans a backup list of several previous tree roots and tries to use the first readable\&. This can be used with read\-only mounts as well\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+This option has replaced
+\fIrecovery\fR\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fBuser_subvol_rm_allowed\fR
+.RS 4
+(default: off)
+.sp
+Allow subvolumes to be deleted by their respective owner\&. Otherwise, only the root user can do that\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+historically, any user could create a snapshot even if he was not owner of the source subvolume, the subvolume deletion has been restricted for that reason\&. The subvolume creation has been restricted but this mount option is still required\&. This is a usability issue\&. Since 4\&.18, the
+\fBrmdir\fR(2) syscall can delete an empty subvolume just like an ordinary directory\&. Whether this is possible can be detected at runtime, see
+\fIrmdir_subvol\fR
+feature in
+\fIFILESYSTEM FEATURES\fR\&.
+.sp .5v
+.RE
+.RE
+.SS "DEPRECATED MOUNT OPTIONS"
+.sp
+List of mount options that have been removed, kept for backward compatibility\&.
+.PP
+\fBalloc_start=\fR\fB\fIbytes\fR\fR
+.RS 4
+(default: 1M, minimum: 1M, deprecated since: 4\&.13)
+.sp
+Debugging option to force all block allocations above a certain byte threshold on each block device\&. The value is specified in bytes, optionally with a K, M, or G suffix (case insensitive)\&.
+.RE
+.PP
+\fBrecovery\fR
+.RS 4
+(since: 3\&.2, default: off, deprecated since: 4\&.5)
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+this option has been replaced by
+\fIusebackuproot\fR
+and should not be used but will work on 4\&.5+ kernels\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fBsubvolrootid=\fR\fB\fIobjectid\fR\fR
+.RS 4
+(irrelevant since: 3\&.2, formally deprecated since: 3\&.10)
+.sp
+A workaround option from times (pre 3\&.2) when it was not possible to mount a subvolume that did not reside directly under the toplevel subvolume\&.
+.RE
+.SS "NOTES ON GENERIC MOUNT OPTIONS"
+.sp
+Some of the general mount options from \fBmount\fR(8) that affect BTRFS and are worth mentioning\&.
+.PP
+\fBnoatime\fR
+.RS 4
+under read intensive work\-loads, specifying
+\fInoatime\fR
+significantly improves performance because no new access time information needs to be written\&. Without this option, the default is
+\fIrelatime\fR, which only reduces the number of inode atime updates in comparison to the traditional
+\fIstrictatime\fR\&. The worst case for atime updates under
+\fIrelatime\fR
+occurs when many files are read whose atime is older than 24 h and which are freshly snapshotted\&. In that case the atime is updated
+\fIand\fR
+COW happens \- for each file \- in bulk\&. See also
+\m[blue]\fBhttps://lwn\&.net/Articles/499293/\fR\m[]
+\-
+\fIAtime and btrfs: a bad combination? (LWN, 2012\-05\-31)\fR\&.
+.sp
+Note that
+\fInoatime\fR
+may break applications that rely on atime uptimes like the venerable Mutt (unless you use maildir mailboxes)\&.
+.RE
+.SH "FILESYSTEM FEATURES"
+.sp
+The basic set of filesystem features gets extended over time\&. The backward compatibility is maintained and the features are optional, need to be explicitly asked for so accidental use will not create incompatibilities\&.
+.sp
+There are several classes and the respective tools to manage the features:
+.PP
+at mkfs time only
+.RS 4
+This is namely for core structures, like the b\-tree nodesize, see
+\fBmkfs\&.btrfs\fR(8) for more details\&.
+.RE
+.PP
+after mkfs, on an unmounted filesystem
+.RS 4
+Features that may optimize internal structures or add new structures to support new functionality, see
+\fBbtrfstune\fR(8)\&. The command
+\fBbtrfs inspect\-internal dump\-super device\fR
+will dump a superblock, you can map the value of
+\fIincompat_flags\fR
+to the features listed below
+.RE
+.PP
+after mkfs, on a mounted filesystem
+.RS 4
+The features of a filesystem (with a given UUID) are listed in
+\fB/sys/fs/btrfs/UUID/features/\fR, one file per feature\&. The status is stored inside the file\&. The value
+\fI1\fR
+is for enabled and active, while
+\fI0\fR
+means the feature was enabled at mount time but turned off afterwards\&.
+.sp
+Whether a particular feature can be turned on a mounted filesystem can be found in the directory
+\fB/sys/fs/btrfs/features/\fR, one file per feature\&. The value
+\fI1\fR
+means the feature can be enabled\&.
+.RE
+.sp
+List of features (see also \fBmkfs\&.btrfs\fR(8) section \fIFILESYSTEM FEATURES\fR):
+.PP
+\fBbig_metadata\fR
+.RS 4
+(since: 3\&.4)
+.sp
+the filesystem uses
+\fInodesize\fR
+for metadata blocks, this can be bigger than the page size
+.RE
+.PP
+\fBcompress_lzo\fR
+.RS 4
+(since: 2\&.6\&.38)
+.sp
+the
+\fIlzo\fR
+compression has been used on the filesystem, either as a mount option or via
+\fBbtrfs filesystem defrag\fR\&.
+.RE
+.PP
+\fBcompress_zstd\fR
+.RS 4
+(since: 4\&.14)
+.sp
+the
+\fIzstd\fR
+compression has been used on the filesystem, either as a mount option or via
+\fBbtrfs filesystem defrag\fR\&.
+.RE
+.PP
+\fBdefault_subvol\fR
+.RS 4
+(since: 2\&.6\&.34)
+.sp
+the default subvolume has been set on the filesystem
+.RE
+.PP
+\fBextended_iref\fR
+.RS 4
+(since: 3\&.7)
+.sp
+increased hardlink limit per file in a directory to 65536, older kernels supported a varying number of hardlinks depending on the sum of all file name sizes that can be stored into one metadata block
+.RE
+.PP
+\fBmixed_backref\fR
+.RS 4
+(since: 2\&.6\&.31)
+.sp
+the last major disk format change, improved backreferences, now default
+.RE
+.PP
+\fBmixed_groups\fR
+.RS 4
+(since: 2\&.6\&.37)
+.sp
+mixed data and metadata block groups, ie\&. the data and metadata are not separated and occupy the same block groups, this mode is suitable for small volumes as there are no constraints how the remaining space should be used (compared to the split mode, where empty metadata space cannot be used for data and vice versa)
+.sp
+on the other hand, the final layout is quite unpredictable and possibly highly fragmented, which means worse performance
+.RE
+.PP
+\fBno_holes\fR
+.RS 4
+(since: 3\&.14)
+.sp
+improved representation of file extents where holes are not explicitly stored as an extent, saves a few percent of metadata if sparse files are used
+.RE
+.PP
+\fBraid56\fR
+.RS 4
+(since: 3\&.9)
+.sp
+the filesystem contains or contained a raid56 profile of block groups
+.RE
+.PP
+\fBrmdir_subvol\fR
+.RS 4
+(since: 4\&.18)
+.sp
+indicate that
+\fBrmdir\fR(2) syscall can delete an empty subvolume just like an ordinary directory\&. Note that this feature only depends on the kernel version\&.
+.RE
+.PP
+\fBskinny_metadata\fR
+.RS 4
+(since: 3\&.10)
+.sp
+reduced\-size metadata for extent references, saves a few percent of metadata
+.RE
+.SH "FILESYSTEM LIMITS"
+.PP
+maximum file name length
+.RS 4
+255
+.RE
+.PP
+maximum symlink target length
+.RS 4
+depends on the
+\fInodesize\fR
+value, for 4k it\(cqs 3949 bytes, for larger nodesize it\(cqs 4095
+.sp
+The symlink target may not be a valid path, ie the path name components can exceed the limits, there\(cqs no content validation at
+\fBsymlink\fR(3) creation\&.
+.RE
+.PP
+maximum number of inodes
+.RS 4
+2^64 but depends on the available metadata space as the inodes are created dynamically
+.RE
+.PP
+inode numbers
+.RS 4
+minimum number: 256 (for subvolumes), regular files and directories: 257
+.RE
+.PP
+maximum file length
+.RS 4
+inherent limit of btrfs is 2^64 (16 EiB) but the linux VFS limit is 2^63 (8 EiB)
+.RE
+.PP
+maximum number of subvolumes
+.RS 4
+2^64 but depends on the available metadata space, the space consumed by all subvolume metadata includes bookkeeping of the shared extents can be large (MiB, GiB)
+.RE
+.PP
+maximum number of hardlinks of a file in a directory
+.RS 4
+65536 when the
+\fBextref\fR
+feature is turned on during mkfs (default), roughly 100 otherwise
+.RE
+.SH "FILE ATTRIBUTES"
+.sp
+The btrfs filesystem supports setting the following file attributes using the \fBchattr\fR(1) utility:
+.PP
+\fBa\fR
+.RS 4
+\fIappend only\fR, new writes are always written at the end of the file
+.RE
+.PP
+\fBA\fR
+.RS 4
+\fIno atime updates\fR
+.RE
+.PP
+\fBc\fR
+.RS 4
+\fIcompress data\fR, all data written after this attribute is set will be compressed\&. Please note that compression is also affected by the mount options or the parent directory attributes\&.
+.sp
+When set on a directory, all newly created files will inherit this attribute\&.
+.RE
+.PP
+\fBC\fR
+.RS 4
+\fIno copy\-on\-write\fR, file modifications are done in\-place
+.sp
+When set on a directory, all newly created files will inherit this attribute\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+due to implementation limitations, this flag can be set/unset only on empty files\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fBd\fR
+.RS 4
+\fIno dump\fR, makes sense with 3rd party tools like
+\fBdump\fR(8), on BTRFS the attribute can be set/unset but no other special handling is done
+.RE
+.PP
+\fBD\fR
+.RS 4
+\fIsynchronous directory updates\fR, for more details search
+\fBopen\fR(2) for
+\fIO_SYNC\fR
+and
+\fIO_DSYNC\fR
+.RE
+.PP
+\fBi\fR
+.RS 4
+\fIimmutable\fR, no file data and metadata changes allowed even to the root user as long as this attribute is set (obviously the exception is unsetting the attribute)
+.RE
+.PP
+\fBS\fR
+.RS 4
+\fIsynchronous updates\fR, for more details search
+\fBopen\fR(2) for
+\fIO_SYNC\fR
+and
+\fIO_DSYNC\fR
+.RE
+.PP
+\fBX\fR
+.RS 4
+\fIno compression\fR, permanently turn off compression on the given file\&. Any compression mount options will not affect this file\&.
+.sp
+When set on a directory, all newly created files will inherit this attribute\&.
+.RE
+.sp
+No other attributes are supported\&. For the complete list please refer to the \fBchattr\fR(1) manual page\&.
+.SH "CONTROL DEVICE"
+.sp
+There\(cqs a character special device \fB/dev/btrfs\-control\fR with major and minor numbers 10 and 234 (the device can be found under the \fImisc\fR category)\&.
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+$ ls \-l /dev/btrfs\-control
+crw\-\-\-\-\-\-\- 1 root root 10, 234 Jan  1 12:00 /dev/btrfs\-control
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+The device accepts some ioctl calls that can perform following actions on the filesystem module:
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+scan devices for btrfs filesystem (ie\&. to let multi\-device filesystems mount automatically) and register them with the kernel module
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+similar to scan, but also wait until the device scanning process is finished for a given filesystem
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+get the supported features (can be also found under
+\fI/sys/fs/btrfs/features\fR)
+.RE
+.sp
+The device is usually created by a system device node manager (eg\&. udev), but can be created manually:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+# mknod \-\-mode=600 c 10 234 /dev/btrfs\-control
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+The control device is not strictly required but the device scanning will not work and a workaround would need to be used to mount a multi\-device filesystem\&. The mount option \fIdevice\fR can trigger the device scanning during mount\&.
+.SH "SEE ALSO"
+.sp
+\fBacl\fR(5), \fBbtrfs\fR(8), \fBchattr\fR(1), \fBfstrim\fR(8), \fBioctl\fR(2), \fBmkfs\&.btrfs\fR(8), \fBmount\fR(8)
diff --git a/Documentation/btrfs.5.gz b/Documentation/btrfs.5.gz
deleted file mode 100644
index 87783a00..00000000
--- a/Documentation/btrfs.5.gz
+++ /dev/null
diff --git a/Documentation/btrfs.8 b/Documentation/btrfs.8
new file mode 100644
index 00000000..3fc6b0e2
--- /dev/null
+++ b/Documentation/btrfs.8
@@ -0,0 +1,226 @@
+'\" t
+.\"     Title: btrfs
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFS" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfs \- a toolbox to manage btrfs filesystems
+.SH "SYNOPSIS"
+.sp
+\fBbtrfs\fR \fI<command>\fR [\fI<args>\fR]
+.SH "DESCRIPTION"
+.sp
+The \fBbtrfs\fR utility is a toolbox for managing btrfs filesystems\&. There are command groups to work with subvolumes, devices, for whole filesystem or other specific actions\&. See section \fBCOMMANDS\fR\&.
+.sp
+There are also standalone tools for some tasks like \fBbtrfs\-convert\fR or \fBbtrfstune\fR that were separate historically and/or haven\(cqt been merged to the main utility\&. See section \fISTANDALONE TOOLS\fR for more details\&.
+.sp
+For other topics (mount options, etc) please refer to the separate manual page \fBbtrfs\fR(5)\&.
+.SH "COMMAND SYNTAX"
+.sp
+Any command name can be shortened so long as the shortened form is unambiguous, however, it is recommended to use full command names in scripts\&. All command groups have their manual page named \fBbtrfs\-\fR\fB\fI<group>\fR\fR\&.
+.sp
+For example: it is possible to run \fBbtrfs sub snaps\fR instead of \fBbtrfs subvolume snapshot\fR\&. But \fBbtrfs file s\fR is not allowed, because \fBfile s\fR may be interpreted both as \fBfilesystem show\fR and as \fBfilesystem sync\fR\&.
+.sp
+If the command name is ambiguous, the list of conflicting options is printed\&.
+.sp
+For an overview of a given command use \fIbtrfs command \-\-help\fR or \fIbtrfs [command\&...] \-\-help \-\-full\fR to print all available options\&.
+.SH "COMMANDS"
+.PP
+\fBbalance\fR
+.RS 4
+Balance btrfs filesystem chunks across single or several devices\&.
+
+See
+\fBbtrfs\-balance\fR(8) for details\&.
+.RE
+.PP
+\fBcheck\fR
+.RS 4
+Do off\-line check on a btrfs filesystem\&.
+
+See
+\fBbtrfs\-check\fR(8) for details\&.
+.RE
+.PP
+\fBdevice\fR
+.RS 4
+Manage devices managed by btrfs, including add/delete/scan and so on\&.
+
+See
+\fBbtrfs\-device\fR(8) for details\&.
+.RE
+.PP
+\fBfilesystem\fR
+.RS 4
+Manage a btrfs filesystem, including label setting/sync and so on\&.
+
+See
+\fBbtrfs\-filesystem\fR(8) for details\&.
+.RE
+.PP
+\fBinspect\-internal\fR
+.RS 4
+Debug tools for developers/hackers\&.
+
+See
+\fBbtrfs\-inspect\-internal\fR(8) for details\&.
+.RE
+.PP
+\fBproperty\fR
+.RS 4
+Get/set a property from/to a btrfs object\&.
+
+See
+\fBbtrfs\-property\fR(8) for details\&.
+.RE
+.PP
+\fBqgroup\fR
+.RS 4
+Manage quota group(qgroup) for btrfs filesystem\&.
+
+See
+\fBbtrfs\-qgroup\fR(8) for details\&.
+.RE
+.PP
+\fBquota\fR
+.RS 4
+Manage quota on btrfs filesystem like enabling/rescan and etc\&.
+
+See
+\fBbtrfs\-quota\fR(8) and
+\fBbtrfs\-qgroup\fR(8) for details\&.
+.RE
+.PP
+\fBreceive\fR
+.RS 4
+Receive subvolume data from stdin/file for restore and etc\&.
+
+See
+\fBbtrfs\-receive\fR(8) for details\&.
+.RE
+.PP
+\fBreplace\fR
+.RS 4
+Replace btrfs devices\&.
+
+See
+\fBbtrfs\-replace\fR(8) for details\&.
+.RE
+.PP
+\fBrescue\fR
+.RS 4
+Try to rescue damaged btrfs filesystem\&.
+
+See
+\fBbtrfs\-rescue\fR(8) for details\&.
+.RE
+.PP
+\fBrestore\fR
+.RS 4
+Try to restore files from a damaged btrfs filesystem\&.
+
+See
+\fBbtrfs\-restore\fR(8) for details\&.
+.RE
+.PP
+\fBscrub\fR
+.RS 4
+Scrub a btrfs filesystem\&.
+
+See
+\fBbtrfs\-scrub\fR(8) for details\&.
+.RE
+.PP
+\fBsend\fR
+.RS 4
+Send subvolume data to stdout/file for backup and etc\&.
+
+See
+\fBbtrfs\-send\fR(8) for details\&.
+.RE
+.PP
+\fBsubvolume\fR
+.RS 4
+Create/delete/list/manage btrfs subvolume\&.
+
+See
+\fBbtrfs\-subvolume\fR(8) for details\&.
+.RE
+.SH "STANDALONE TOOLS"
+.sp
+New functionality could be provided using a standalone tool\&. If the functionality proves to be useful, then the standalone tool is declared obsolete and its functionality is copied to the main tool\&. Obsolete tools are removed after a long (years) depreciation period\&.
+.sp
+Tools that are still in active use without an equivalent in \fBbtrfs\fR:
+.PP
+\fBbtrfs\-convert\fR
+.RS 4
+in\-place conversion from ext2/3/4 filesystems to btrfs
+.RE
+.PP
+\fBbtrfstune\fR
+.RS 4
+tweak some filesystem properties on a unmounted filesystem
+.RE
+.PP
+\fBbtrfs\-select\-super\fR
+.RS 4
+rescue tool to overwrite primary superblock from a spare copy
+.RE
+.PP
+\fBbtrfs\-find\-root\fR
+.RS 4
+rescue helper to find tree roots in a filesystem
+.RE
+.sp
+Deprecated and obsolete tools:
+.PP
+\fBbtrfs\-debug\-tree\fR
+.RS 4
+moved to
+\fBbtrfs inspect\-internal dump\-tree\fR\&. Removed from source distribution\&.
+.RE
+.PP
+\fBbtrfs\-show\-super\fR
+.RS 4
+moved to
+\fBbtrfs inspect\-internal dump\-super\fR, standalone removed\&.
+.RE
+.PP
+\fBbtrfs\-zero\-log\fR
+.RS 4
+moved to
+\fBbtrfs rescue zero\-log\fR, standalone removed\&.
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfs\fR returns a zero exit status if it succeeds\&. Non zero is returned in case of failure\&.
+.SH "AVAILABILITY"
+.sp
+\fBbtrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBbtrfs\-balance\fR(8), \fBbtrfs\-check\fR(8), \fBbtrfs\-convert\fR(8), \fBbtrfs\-device\fR(8), \fBbtrfs\-filesystem\fR(8), \fBbtrfs\-inspect\-internal\fR(8), \fBbtrfs\-property\fR(8), \fBbtrfs\-qgroup\fR(8), \fBbtrfs\-quota\fR(8), \fBbtrfs\-receive\fR(8), \fBbtrfs\-replace\fR(8), \fBbtrfs\-rescue\fR(8), \fBbtrfs\-restore\fR(8), \fBbtrfs\-scrub\fR(8), \fBbtrfs\-send\fR(8), \fBbtrfs\-subvolume\fR(8), \fBbtrfstune\fR(8), \fBmkfs\&.btrfs\fR(8)
diff --git a/Documentation/btrfs.8.gz b/Documentation/btrfs.8.gz
deleted file mode 100644
index 64d35f83..00000000
--- a/Documentation/btrfs.8.gz
+++ /dev/null
diff --git a/Documentation/btrfsck.8 b/Documentation/btrfsck.8
new file mode 100644
index 00000000..393903d8
--- /dev/null
+++ b/Documentation/btrfsck.8
@@ -0,0 +1 @@
+.so man8/btrfs-check.8
diff --git a/Documentation/btrfsck.8.gz b/Documentation/btrfsck.8.gz
deleted file mode 120000
index 453925c7..00000000
--- a/Documentation/btrfsck.8.gz
+++ /dev/null
@@ -1 +0,0 @@
-btrfs-check.8.gz
-\ No newline at end of file
diff --git a/Documentation/btrfstune.8 b/Documentation/btrfstune.8
new file mode 100644
index 00000000..b83d9e9d
--- /dev/null
+++ b/Documentation/btrfstune.8
@@ -0,0 +1,142 @@
+'\" t
+.\"     Title: btrfstune
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "BTRFSTUNE" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+btrfstune \- tune various filesystem parameters
+.SH "SYNOPSIS"
+.sp
+\fBbtrfstune\fR [options] \fI<device>\fR [\fI<device>\fR\&...]
+.SH "DESCRIPTION"
+.sp
+\fBbtrfstune\fR can be used to enable, disable, or set various filesystem parameters\&. The filesystem must be unmounted\&.
+.sp
+The common usecase is to enable features that were not enabled at mkfs time\&. Please make sure that you have kernel support for the features\&. You can find a complete list of features and kernel version of their introduction at \m[blue]\fBhttps://btrfs\&.wiki\&.kernel\&.org/index\&.php/Changelog#By_feature\fR\m[] \&. Also, the manual page \fBmkfs\&.btrfs\fR(8) contains more details about the features\&.
+.sp
+Some of the features could be also enabled on a mounted filesystem by other means\&. Please refer to the \fIFILESYSTEM FEATURES\fR in \fBbtrfs\fR(5)\&.
+.SH "OPTIONS"
+.PP
+\-S \fI<0|1>\fR
+.RS 4
+Enable seeding on a given device\&. Value 1 will enable seeding, 0 will disable it\&.
+
+A seeding filesystem is forced to be mounted read\-only\&. A new device can be added to the filesystem and will capture all writes keeping the seeding device intact\&.
+.RE
+.PP
+\-r
+.RS 4
+(since kernel: 3\&.7)
+.sp
+Enable extended inode refs (hardlink limit per file in a directory is 65536), enabled by mkfs feature
+\fIextref\fR\&.
+.RE
+.PP
+\-x
+.RS 4
+(since kernel: 3\&.10)
+.sp
+Enable skinny metadata extent refs (more efficient representation of extents), enabled by mkfs feature
+\fIskinny\-metadata\fR\&.
+.sp
+All newly created extents will use the new representation\&. To completely switch the entire filesystem, run a full balance of the metadata\&. Please refer to
+\fBbtrfs\-balance\fR(8)\&.
+.RE
+.PP
+\-n
+.RS 4
+(since kernel: 3\&.14)
+.sp
+Enable no\-holes feature (more efficient representation of file holes), enabled by mkfs feature
+\fIno\-holes\fR\&.
+.RE
+.PP
+\-f
+.RS 4
+Allow dangerous changes, e\&.g\&. clear the seeding flag or change fsid\&. Make sure that you are aware of the dangers\&.
+.RE
+.PP
+\-u
+.RS 4
+Change fsid to a randomly generated UUID or continue previous fsid change operation in case it was interrupted\&.
+.RE
+.PP
+\-U \fI<UUID>\fR
+.RS 4
+Change fsid to
+\fIUUID\fR\&.
+.sp
+The
+\fIUUID\fR
+should be a 36 bytes string in
+\fBprintf\fR(3) format
+\fI"%08x\-%04x\-%04x\-%04x\-%012x"\fR\&. If there is a previous unfinished fsid change, it will continue only if the
+\fIUUID\fR
+matches the unfinished one or if you use the option
+\fI\-u\fR\&.
+.RE
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBWarning\fR
+.ps -1
+.br
+.sp
+Cancelling or interrupting a UUID change operation will make the filesystem temporarily unmountable\&. To fix it, rerun \fIbtrfstune \-u\fR to restore the UUID and let it complete\&.
+.sp .5v
+.RE
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBWarning\fR
+.ps -1
+.br
+.sp
+Clearing the seeding flag on a device may be dangerous\&. If a previously\-seeding device is changed, all filesystems that used that device will become unmountable\&. Setting the seeding flag back will not fix that\&. A valid usecase is \fIseeding device as a base image\fR\&. Clear the seeding flag, update the filesystem and make it seeding again, provided that it\(cqs OK to throw away all filesystems built on top of the previous base\&.
+.sp .5v
+.RE
+.SH "EXIT STATUS"
+.sp
+\fBbtrfstune\fR returns 0 if no error happened, 1 otherwise\&.
+.SH "COMPATIBILITY NOTE"
+.sp
+This deprecated tool exists for historical reasons but is still in use today\&. Its functionality will be merged to the main tool, at which time \fBbtrfstune\fR will be declared obsolete and scheduled for removal\&.
+.SH "SEE ALSO"
+.sp
+\fBbtrfs\fR(5), \fBbtrfs\-balance\fR(8), \fBmkfs\&.btrfs\fR(8)
diff --git a/Documentation/btrfstune.8.gz b/Documentation/btrfstune.8.gz
deleted file mode 100644
index d9eb5bd5..00000000
--- a/Documentation/btrfstune.8.gz
+++ /dev/null
diff --git a/Documentation/fsck.btrfs.8 b/Documentation/fsck.btrfs.8
new file mode 100644
index 00000000..c5bbcc23
--- /dev/null
+++ b/Documentation/fsck.btrfs.8
@@ -0,0 +1,63 @@
+'\" t
+.\"     Title: fsck.btrfs
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "FSCK\&.BTRFS" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+fsck.btrfs \- do nothing, successfully
+.SH "SYNOPSIS"
+.sp
+\fBfsck\&.btrfs\fR [\-aApy] [\fI<device>\fR\&...]
+.SH "DESCRIPTION"
+.sp
+\fBfsck\&.btrfs\fR is a type of utility that should exist for any filesystem and is called during system setup when the corresponding \fB/etc/fstab\fR entries contain non\-zero value for \fBfs_passno\fR, see \fBfstab\fR(5) for more\&.
+.sp
+Traditional filesystems need to run their respective fsck utility in case the filesystem was not unmounted cleanly and the log needs to be replayed before mount\&. This is not needed for BTRFS\&. You should set fs_passno to 0\&.
+.sp
+If you wish to check the consistency of a BTRFS filesystem or repair a damaged filesystem, see \fBbtrfs\-check\fR(8)\&. By default filesystem consistency is checked, the repair mode is enabled via the \fI\-\-repair\fR option (use with care!)\&.
+.SH "OPTIONS"
+.sp
+The options are all the same and detect if \fBfsck\&.btrfs\fR is executed in non\-interactive mode and exits with success, otherwise prints a message about btrfs check\&.
+.SH "EXIT STATUS"
+.sp
+There are two possible exit code returned:
+.PP
+0
+.RS 4
+No error
+.RE
+.PP
+8
+.RS 4
+Operational error, eg\&. device does not exist
+.RE
+.SH "FILES"
+.sp
+\fB/etc/fstab\fR
+.SH "SEE ALSO"
+.sp
+\fBbtrfs\fR(8), \fBfsck\fR(8), \fBfstab\fR(5),
diff --git a/Documentation/fsck.btrfs.8.gz b/Documentation/fsck.btrfs.8.gz
deleted file mode 100644
index ac34255c..00000000
--- a/Documentation/fsck.btrfs.8.gz
+++ /dev/null
diff --git a/Documentation/mkfs.btrfs.8 b/Documentation/mkfs.btrfs.8
new file mode 100644
index 00000000..fbcd1203
--- /dev/null
+++ b/Documentation/mkfs.btrfs.8
@@ -0,0 +1,626 @@
+'\" t
+.\"     Title: mkfs.btrfs
+.\"    Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author]
+.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
+.\"      Date: 12/05/2018
+.\"    Manual: Btrfs Manual
+.\"    Source: Btrfs v4.19.1
+.\"  Language: English
+.\"
+.TH "MKFS\&.BTRFS" "8" "12/05/2018" "Btrfs v4\&.19\&.1" "Btrfs Manual"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+mkfs.btrfs \- create a btrfs filesystem
+.SH "SYNOPSIS"
+.sp
+\fBmkfs\&.btrfs\fR [options] \fI<device>\fR [\fI<device>\fR\&...]
+.SH "DESCRIPTION"
+.sp
+\fBmkfs\&.btrfs\fR is used to create the btrfs filesystem on a single or multiple devices\&. \fI<device>\fR is typically a block device but can be a file\-backed image as well\&. Multiple devices are grouped by UUID of the filesystem\&.
+.sp
+Before mounting such filesystem, the kernel module must know all the devices either via preceding execution of \fBbtrfs device scan\fR or using the \fBdevice\fR mount option\&. See section \fBMULTIPLE DEVICES\fR for more details\&.
+.SH "OPTIONS"
+.PP
+\fB\-b|\-\-byte\-count \fR\fB\fI<size>\fR\fR
+.RS 4
+Specify the size of the filesystem\&. If this option is not used, then mkfs\&.btrfs uses the entire device space for the filesystem\&.
+.RE
+.PP
+\fB\-d|\-\-data \fR\fB\fI<profile>\fR\fR
+.RS 4
+Specify the profile for the data block groups\&. Valid values are
+\fIraid0\fR,
+\fIraid1\fR,
+\fIraid5\fR,
+\fIraid6\fR,
+\fIraid10\fR
+or
+\fIsingle\fR
+or dup (case does not matter)\&.
+.sp
+See
+\fIDUP PROFILES ON A SINGLE DEVICE\fR
+for more\&.
+.RE
+.PP
+\fB\-m|\-\-metadata \fR\fB\fI<profile>\fR\fR
+.RS 4
+Specify the profile for the metadata block groups\&. Valid values are
+\fIraid0\fR,
+\fIraid1\fR,
+\fIraid5\fR,
+\fIraid6\fR,
+\fIraid10\fR,
+\fIsingle\fR
+or
+\fIdup\fR, (case does not matter)\&.
+.sp
+A single device filesystem will default to
+\fIDUP\fR, unless a SSD is detected\&. Then it will default to
+\fIsingle\fR\&. The detection is based on the value of
+\fB/sys/block/DEV/queue/rotational\fR, where
+\fIDEV\fR
+is the short name of the device\&.
+.sp
+Note that the rotational status can be arbitrarily set by the underlying block device driver and may not reflect the true status (network block device, memory\-backed SCSI devices etc)\&. Use the options
+\fI\-\-data/\-\-metadata\fR
+to avoid confusion\&.
+.sp
+See
+\fIDUP PROFILES ON A SINGLE DEVICE\fR
+for more details\&.
+.RE
+.PP
+\fB\-M|\-\-mixed\fR
+.RS 4
+Normally the data and metadata block groups are isolated\&. The
+\fImixed\fR
+mode will remove the isolation and store both types in the same block group type\&. This helps to utilize the free space regardless of the purpose and is suitable for small devices\&. The separate allocation of block groups leads to a situation where the space is reserved for the other block group type, is not available for allocation and can lead to ENOSPC state\&.
+.sp
+The recommended size for the mixed mode is for filesystems less than 1GiB\&. The soft recommendation is to use it for filesystems smaller than 5GiB\&. The mixed mode may lead to degraded performance on larger filesystems, but is otherwise usable, even on multiple devices\&.
+.sp
+The
+\fInodesize\fR
+and
+\fIsectorsize\fR
+must be equal, and the block group types must match\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+versions up to 4\&.2\&.x forced the mixed mode for devices smaller than 1GiB\&. This has been removed in 4\&.3+ as it caused some usability issues\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fB\-l|\-\-leafsize \fR\fB\fI<size>\fR\fR
+.RS 4
+Alias for \-\-nodesize\&. Deprecated\&.
+.RE
+.PP
+\fB\-n|\-\-nodesize \fR\fB\fI<size>\fR\fR
+.RS 4
+Specify the nodesize, the tree block size in which btrfs stores metadata\&. The default value is 16KiB (16384) or the page size, whichever is bigger\&. Must be a multiple of the sectorsize and a power of 2, but not larger than 64KiB (65536)\&. Leafsize always equals nodesize and the options are aliases\&.
+.sp
+Smaller node size increases fragmentation but leads to taller b\-trees which in turn leads to lower locking contention\&. Higher node sizes give better packing and less fragmentation at the cost of more expensive memory operations while updating the metadata blocks\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+versions up to 3\&.11 set the nodesize to 4k\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fB\-s|\-\-sectorsize \fR\fB\fI<size>\fR\fR
+.RS 4
+Specify the sectorsize, the minimum data block allocation unit\&.
+.sp
+The default value is the page size and is autodetected\&. If the sectorsize differs from the page size, the created filesystem may not be mountable by the kernel\&. Therefore it is not recommended to use this option unless you are going to mount it on a system with the appropriate page size\&.
+.RE
+.PP
+\fB\-L|\-\-label \fR\fB\fI<string>\fR\fR
+.RS 4
+Specify a label for the filesystem\&. The
+\fIstring\fR
+should be less than 256 bytes and must not contain newline characters\&.
+.RE
+.PP
+\fB\-K|\-\-nodiscard\fR
+.RS 4
+Do not perform whole device TRIM operation on devices that are capable of that\&. This does not affect discard/trim operation when the filesystem is mounted\&. Please see the mount option
+\fIdiscard\fR
+for that in
+\fBbtrfs\fR(5)\&.
+.RE
+.PP
+\fB\-r|\-\-rootdir \fR\fB\fI<rootdir>\fR\fR
+.RS 4
+Populate the toplevel subvolume with files from
+\fIrootdir\fR\&. This does not require root permissions to write the new files or to mount the filesystem\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+This option may enlarge the image or file to ensure it\(cqs big enough to contain the files from
+\fIrootdir\fR\&. Since version 4\&.14\&.1 the filesystem size is not minimized\&. Please see option
+\fI\-\-shrink\fR
+if you need that functionality\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fB\-\-shrink\fR
+.RS 4
+Shrink the filesystem to its minimal size, only works with
+\fI\-\-rootdir\fR
+option\&.
+.sp
+If the destination is a regular file, this option will also truncate the file to the minimal size\&. Otherwise it will reduce the filesystem available space\&. Extra space will not be usable unless the filesystem is mounted and resized using
+\fIbtrfs filesystem resize\fR\&.
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+prior to version 4\&.14\&.1, the shrinking was done automatically\&.
+.sp .5v
+.RE
+.RE
+.PP
+\fB\-O|\-\-features \fR\fB\fI<feature1>\fR\fR\fB[,\fR\fB\fI<feature2>\fR\fR\fB\&...]\fR
+.RS 4
+A list of filesystem features turned on at mkfs time\&. Not all features are supported by old kernels\&. To disable a feature, prefix it with
+\fI^\fR\&.
+.sp
+See section
+\fBFILESYSTEM FEATURES\fR
+for more details\&. To see all available features that mkfs\&.btrfs supports run:
+.sp
+\fBmkfs\&.btrfs \-O list\-all\fR
+.RE
+.PP
+\fB\-f|\-\-force\fR
+.RS 4
+Forcibly overwrite the block devices when an existing filesystem is detected\&. By default, mkfs\&.btrfs will utilize
+\fIlibblkid\fR
+to check for any known filesystem on the devices\&. Alternatively you can use the
+\fBwipefs\fR
+utility to clear the devices\&.
+.RE
+.PP
+\fB\-q|\-\-quiet\fR
+.RS 4
+Print only error or warning messages\&. Options \-\-features or \-\-help are unaffected\&.
+.RE
+.PP
+\fB\-U|\-\-uuid \fR\fB\fI<UUID>\fR\fR
+.RS 4
+Create the filesystem with the given
+\fIUUID\fR\&. The UUID must not exist on any filesystem currently present\&.
+.RE
+.PP
+\fB\-V|\-\-version\fR
+.RS 4
+Print the
+\fBmkfs\&.btrfs\fR
+version and exit\&.
+.RE
+.PP
+\fB\-\-help\fR
+.RS 4
+Print help\&.
+.RE
+.PP
+\fB\-A|\-\-alloc\-start \fR\fB\fI<offset>\fR\fR
+.RS 4
+\fBdeprecated, will be removed\fR
+(An option to help debugging chunk allocator\&.) Specify the (physical) offset from the start of the device at which allocations start\&. The default value is zero\&.
+.RE
+.SH "SIZE UNITS"
+.sp
+The default unit is \fIbyte\fR\&. All size parameters accept suffixes in the 1024 base\&. The recognized suffixes are: \fIk\fR, \fIm\fR, \fIg\fR, \fIt\fR, \fIp\fR, \fIe\fR, both uppercase and lowercase\&.
+.SH "MULTIPLE DEVICES"
+.sp
+Before mounting a multiple device filesystem, the kernel module must know the association of the block devices that are attached to the filesystem UUID\&.
+.sp
+There is typically no action needed from the user\&. On a system that utilizes a udev\-like daemon, any new block device is automatically registered\&. The rules call \fBbtrfs device scan\fR\&.
+.sp
+The same command can be used to trigger the device scanning if the btrfs kernel module is reloaded (naturally all previous information about the device registration is lost)\&.
+.sp
+Another possibility is to use the mount options \fBdevice\fR to specify the list of devices to scan at the time of mount\&.
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+# mount \-o device=/dev/sdb,device=/dev/sdc /dev/sda /mnt
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+that this means only scanning, if the devices do not exist in the system, mount will fail anyway\&. This can happen on systems without initramfs/initrd and root partition created with RAID1/10/5/6 profiles\&. The mount action can happen before all block devices are discovered\&. The waiting is usually done on the initramfs/initrd systems\&.
+.sp .5v
+.RE
+.sp
+As of kernel 4\&.14, RAID5/6 is still considered experimental and shouldn\(cqt be employed for production use\&.
+.SH "FILESYSTEM FEATURES"
+.sp
+Features that can be enabled during creation time\&. See also \fBbtrfs\fR(5) section \fIFILESYSTEM FEATURES\fR\&.
+.PP
+\fBmixed\-bg\fR
+.RS 4
+(kernel support since 2\&.6\&.37)
+.sp
+mixed data and metadata block groups, also set by option
+\fI\-\-mixed\fR
+.RE
+.PP
+\fBextref\fR
+.RS 4
+(default since btrfs\-progs 3\&.12, kernel support since 3\&.7)
+.sp
+increased hardlink limit per file in a directory to 65536, older kernels supported a varying number of hardlinks depending on the sum of all file name sizes that can be stored into one metadata block
+.RE
+.PP
+\fBraid56\fR
+.RS 4
+(kernel support since 3\&.9)
+.sp
+extended format for RAID5/6, also enabled if raid5 or raid6 block groups are selected
+.RE
+.PP
+\fBskinny\-metadata\fR
+.RS 4
+(default since btrfs\-progs 3\&.18, kernel support since 3\&.10)
+.sp
+reduced\-size metadata for extent references, saves a few percent of metadata
+.RE
+.PP
+\fBno\-holes\fR
+.RS 4
+(kernel support since 3\&.14)
+.sp
+improved representation of file extents where holes are not explicitly stored as an extent, saves a few percent of metadata if sparse files are used
+.RE
+.SH "BLOCK GROUPS, CHUNKS, RAID"
+.sp
+The highlevel organizational units of a filesystem are block groups of three types: data, metadata and system\&.
+.PP
+\fBDATA\fR
+.RS 4
+store data blocks and nothing else
+.RE
+.PP
+\fBMETADATA\fR
+.RS 4
+store internal metadata in b\-trees, can store file data if they fit into the inline limit
+.RE
+.PP
+\fBSYSTEM\fR
+.RS 4
+store structures that describe the mapping between the physical devices and the linear logical space representing the filesystem
+.RE
+.sp
+Other terms commonly used:
+.PP
+\fBblock group\fR, \fBchunk\fR
+.RS 4
+a logical range of space of a given profile, stores data, metadata or both; sometimes the terms are used interchangeably
+.sp
+A typical size of metadata block group is 256MiB (filesystem smaller than 50GiB) and 1GiB (larger than 50GiB), for data it\(cqs 1GiB\&. The system block group size is a few megabytes\&.
+.RE
+.PP
+\fBRAID\fR
+.RS 4
+a block group profile type that utilizes RAID\-like features on multiple devices: striping, mirroring, parity
+.RE
+.PP
+\fBprofile\fR
+.RS 4
+when used in connection with block groups refers to the allocation strategy and constraints, see the section
+\fIPROFILES\fR
+for more details
+.RE
+.SH "PROFILES"
+.sp
+There are the following block group types available:
+.TS
+allbox tab(:);
+ct c s s ct
+^ c c ct ^
+ct ct ct ct ct
+ct ct ct ct ct
+ct ct ct ct ct
+ct ct ct ct ct
+ct ct ct ct ct
+ct ct ct ct ct
+ct ct ct ct ct.
+T{
+.sp
+\fBProfile\fR
+T}:T{
+.sp
+\fBRedundancy\fR
+T}:T{
+.sp
+\fBMin/max devices\fR
+T}
+:T{
+.sp
+\fBCopies\fR
+T}:T{
+.sp
+\fBParity\fR
+T}:T{
+.sp
+\fBStriping\fR
+T}:
+T{
+.sp
+single
+T}:T{
+.sp
+1
+T}:T{
+.sp
+T}:T{
+.sp
+T}:T{
+.sp
+1/any
+T}
+T{
+.sp
+DUP
+T}:T{
+.sp
+2 / 1 device
+T}:T{
+.sp
+T}:T{
+.sp
+T}:T{
+.sp
+1/any ^(see note 1)
+T}
+T{
+.sp
+RAID0
+T}:T{
+.sp
+T}:T{
+.sp
+T}:T{
+.sp
+1 to N
+T}:T{
+.sp
+2/any
+T}
+T{
+.sp
+RAID1
+T}:T{
+.sp
+2
+T}:T{
+.sp
+T}:T{
+.sp
+T}:T{
+.sp
+2/any
+T}
+T{
+.sp
+RAID10
+T}:T{
+.sp
+2
+T}:T{
+.sp
+T}:T{
+.sp
+1 to N
+T}:T{
+.sp
+4/any
+T}
+T{
+.sp
+RAID5
+T}:T{
+.sp
+1
+T}:T{
+.sp
+1
+T}:T{
+.sp
+2 to N \- 1
+T}:T{
+.sp
+2/any ^(see note 2)
+T}
+T{
+.sp
+RAID6
+T}:T{
+.sp
+1
+T}:T{
+.sp
+2
+T}:T{
+.sp
+3 to N \- 2
+T}:T{
+.sp
+3/any ^(see note 3)
+T}
+.TE
+.sp 1
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBWarning\fR
+.ps -1
+.br
+.sp
+It\(cqs not recommended to build btrfs with RAID0/1/10/5/6 profiles on partitions from the same device\&. Neither redundancy nor performance will be improved\&.
+.sp .5v
+.RE
+.sp
+\fINote 1:\fR DUP may exist on more than 1 device if it starts on a single device and another one is added\&. Since version 4\&.5\&.1, \fBmkfs\&.btrfs\fR will let you create DUP on multiple devices\&.
+.sp
+\fINote 2:\fR It\(cqs not recommended to use 2 devices with RAID5\&. In that case, parity stripe will contain the same data as the data stripe, making RAID5 degraded to RAID1 with more overhead\&.
+.sp
+\fINote 3:\fR It\(cqs also not recommended to use 3 devices with RAID6, unless you want to get effectively 3 copies in a RAID1\-like manner (but not exactly that)\&. N\-copies RAID1 is not implemented\&.
+.SH "DUP PROFILES ON A SINGLE DEVICE"
+.sp
+The mkfs utility will let the user create a filesystem with profiles that write the logical blocks to 2 physical locations\&. Whether there are really 2 physical copies highly depends on the underlying device type\&.
+.sp
+For example, a SSD drive can remap the blocks internally to a single copy\(emthus deduplicating them\&. This negates the purpose of increased redundancy and just wastes filesystem space without providing the expected level of redundancy\&.
+.sp
+The duplicated data/metadata may still be useful to statistically improve the chances on a device that might perform some internal optimizations\&. The actual details are not usually disclosed by vendors\&. For example we could expect that not all blocks get deduplicated\&. This will provide a non\-zero probability of recovery compared to a zero chance if the single profile is used\&. The user should make the tradeoff decision\&. The deduplication in SSDs is thought to be widely available so the reason behind the mkfs default is to not give a false sense of redundancy\&.
+.sp
+As another example, the widely used USB flash or SD cards use a translation layer between the logical and physical view of the device\&. The data lifetime may be affected by frequent plugging\&. The memory cells could get damaged, hopefully not destroying both copies of particular data in case of DUP\&.
+.sp
+The wear levelling techniques can also lead to reduced redundancy, even if the device does not do any deduplication\&. The controllers may put data written in a short timespan into the same physical storage unit (cell, block etc)\&. In case this unit dies, both copies are lost\&. BTRFS does not add any artificial delay between metadata writes\&.
+.sp
+The traditional rotational hard drives usually fail at the sector level\&.
+.sp
+In any case, a device that starts to misbehave and repairs from the DUP copy should be replaced! \fBDUP is not backup\fR\&.
+.SH "KNOWN ISSUES"
+.sp
+\fBSMALL FILESYSTEMS AND LARGE NODESIZE\fR
+.sp
+The combination of small filesystem size and large nodesize is not recommended in general and can lead to various ENOSPC\-related issues during mount time or runtime\&.
+.sp
+Since mixed block group creation is optional, we allow small filesystem instances with differing values for \fIsectorsize\fR and \fInodesize\fR to be created and could end up in the following situation:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+# mkfs\&.btrfs \-f \-n 65536 /dev/loop0
+btrfs\-progs v3\&.19\-rc2\-405\-g976307c
+See http://btrfs\&.wiki\&.kernel\&.org for more information\&.
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+Performing full device TRIM (512\&.00MiB) \&.\&.\&.
+Label:              (null)
+UUID:               49fab72e\-0c8b\-466b\-a3ca\-d1bfe56475f0
+Node size:          65536
+Sector size:        4096
+Filesystem size:    512\&.00MiB
+Block group profiles:
+  Data:             single            8\&.00MiB
+  Metadata:         DUP              40\&.00MiB
+  System:           DUP              12\&.00MiB
+SSD detected:       no
+Incompat features:  extref, skinny\-metadata
+Number of devices:  1
+Devices:
+  ID        SIZE  PATH
+   1   512\&.00MiB  /dev/loop0
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+# mount /dev/loop0 /mnt/
+mount: mount /dev/loop0 on /mnt failed: No space left on device
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+The ENOSPC occurs during the creation of the UUID tree\&. This is caused by large metadata blocks and space reservation strategy that allocates more than can fit into the filesystem\&.
+.SH "AVAILABILITY"
+.sp
+\fBmkfs\&.btrfs\fR is part of btrfs\-progs\&. Please refer to the btrfs wiki \m[blue]\fBhttp://btrfs\&.wiki\&.kernel\&.org\fR\m[] for further details\&.
+.SH "SEE ALSO"
+.sp
+\fBbtrfs\fR(5), \fBbtrfs\fR(8), \fBwipefs\fR(8)
diff --git a/Documentation/mkfs.btrfs.8.gz b/Documentation/mkfs.btrfs.8.gz
deleted file mode 100644
index e2fb07e8..00000000
--- a/Documentation/mkfs.btrfs.8.gz
+++ /dev/null
diff --git a/INSTALL b/INSTALL
index 24d6e24f..1cb01e87 100644
--- a/INSTALL
+++ b/INSTALL
@@ -62,7 +62,7 @@ documented in the Makefile.
 The build utilizes autotools, dependencies for generating the configure
 scripts are:
 
-* autconf, autoheader
+* autoconf, autoheader
 * automake, aclocal
 * pkg-config
 
diff --git a/Makefile b/Makefile
index 958bc546..ee1fc139 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 #
 # Basic build targets:
 #   all		all main tools and the shared library
-#   static      build static bnaries, requires static version of the libraries
+#   static      build static binaries, requires static version of the libraries
 #   test        run the full testsuite
 #   install     install to default location (/usr/local)
 #   clean       clean built binaries (not the documentation)
@@ -116,7 +116,8 @@ objects = ctree.o disk-io.o kernel-lib/radix-tree.o extent-tree.o print-tree.o \
 	  qgroup.o free-space-cache.o kernel-lib/list_sort.o props.o \
 	  kernel-shared/ulist.o qgroup-verify.o backref.o string-table.o task-utils.o \
 	  inode.o file.o find-root.o free-space-tree.o help.o send-dump.o \
-	  fsfeatures.o kernel-lib/tables.o kernel-lib/raid56.o transaction.o
+	  fsfeatures.o kernel-lib/tables.o kernel-lib/raid56.o transaction.o \
+	  delayed-ref.o
 cmds_objects = cmds-subvolume.o cmds-filesystem.o cmds-device.o cmds-scrub.o \
 	       cmds-inspect.o cmds-balance.o cmds-send.o cmds-receive.o \
 	       cmds-quota.o cmds-qgroup.o cmds-replace.o check/main.o \
@@ -206,23 +207,31 @@ endif
 
 MAKEOPTS = --no-print-directory Q=$(Q)
 
-# build all by default
-progs = $(progs_install) btrfsck btrfs-corrupt-block
 
-# install only selected
+# Programs to install.
 progs_install = btrfs mkfs.btrfs btrfs-map-logical btrfs-image \
-	btrfs-find-root btrfstune \
-	btrfs-select-super
+		btrfs-find-root btrfstune btrfs-select-super
 
-# other tools, not built by default
-progs_extra = btrfs-fragments
+# Programs to build.
+progs_build = $(progs_install) btrfsck btrfs-corrupt-block
 
-progs_static = $(foreach p,$(progs),$(p).static)
+# All programs. Use := instead of = so that this is expanded before we reassign
+# progs_build below.
+progs := $(progs_build) btrfs-convert btrfs-fragments
 
 ifneq ($(DISABLE_BTRFSCONVERT),1)
 progs_install += btrfs-convert
 endif
 
+# Static programs to build. Use := instead of = because `make static` should
+# still build everything even if --disable-programs was passed to ./configure.
+progs_static := $(foreach p,$(progs_build),$(p).static)
+
+ifneq ($(BUILD_PROGRAMS),1)
+progs_install =
+progs_build =
+endif
+
 # external libs required by various binaries; for btrfs-foo,
 # specify btrfs_foo_libs = <list of libs>; see $($(subst...)) rules below
 btrfs_convert_cflags = -DBTRFSCONVERT_EXT2=$(BTRFSCONVERT_EXT2)
@@ -233,7 +242,7 @@ cmds_restore_cflags = -DBTRFSRESTORE_ZSTD=$(BTRFSRESTORE_ZSTD)
 CHECKER_FLAGS += $(btrfs_convert_cflags)
 
 # collect values of the variables above
-standalone_deps = $(foreach dep,$(patsubst %,%_objects,$(subst -,_,$(filter btrfs-%, $(progs) $(progs_extra)))),$($(dep)))
+standalone_deps = $(foreach dep,$(patsubst %,%_objects,$(subst -,_,$(filter btrfs-%, $(progs)))),$($(dep)))
 
 SUBDIRS =
 BUILDDIRS = $(patsubst %,build-%,$(SUBDIRS))
@@ -266,6 +275,13 @@ libs_shared = libbtrfs.so.0.1 libbtrfsutil.so.$(libbtrfsutil_version)
 libs_static = libbtrfs.a libbtrfsutil.a
 libs = $(libs_shared) $(libs_static)
 lib_links = libbtrfs.so.0 libbtrfs.so libbtrfsutil.so.$(libbtrfsutil_major) libbtrfsutil.so
+libs_build =
+ifeq ($(BUILD_SHARED_LIBRARIES),1)
+libs_build += $(libs_shared) $(lib_links)
+endif
+ifeq ($(BUILD_STATIC_LIBRARIES),1)
+libs_build += $(libs_static)
+endif
 
 # make C=1 to enable sparse
 ifdef C
@@ -302,7 +318,7 @@ endif
 	$(Q)$(CC) $(STATIC_CFLAGS) -c $< -o $@ $($(subst -,_,$(@:%.static.o=%)-cflags)) \
 		$($(subst -,_,btrfs-$(@:%/$(notdir $@)=%)-cflags))
 
-all: $(progs) $(libs) $(lib_links) $(BUILDDIRS)
+all: $(progs_build) $(libs_build) $(BUILDDIRS)
 ifeq ($(PYTHON_BINDINGS),1)
 all: libbtrfsutil_python
 endif
@@ -495,10 +511,6 @@ btrfs-convert.static: $(static_convert_objects) $(static_objects) $(static_libbt
 	@echo "    [LD]     $@"
 	$(Q)$(CC) -o $@ $^ $(STATIC_LDFLAGS) $(btrfs_convert_libs) $(STATIC_LIBS)
 
-dir-test: dir-test.o $(objects) $(libs)
-	@echo "    [LD]     $@"
-	$(Q)$(CC) -o $@ $^ $(LDFLAGS) $(LIBS)
-
 quick-test: quick-test.o $(objects) $(libs)
 	@echo "    [LD]     $@"
 	$(Q)$(CC) -o $@ $^ $(LDFLAGS) $(LIBS)
@@ -573,9 +585,8 @@ test-build-pre:
 test-build-real:
 	$(MAKE) $(MAKEOPTS) library-test
 	-$(MAKE) $(MAKEOPTS) library-test.static
-	$(MAKE) $(MAKEOPTS) -j 8 all
+	$(MAKE) $(MAKEOPTS) -j 8 $(progs) $(libs) $(lib_links) $(BUILDDIRS)
 	-$(MAKE) $(MAKEOPTS) -j 8 static
-	$(MAKE) $(MAKEOPTS) -j 8 $(progs_extra)
 
 manpages:
 	$(Q)$(MAKE) $(MAKEOPTS) -C Documentation
@@ -603,11 +614,11 @@ clean: $(CLEANDIRS)
 		image/*.o image/*.o.d \
 		convert/*.o convert/*.o.d \
 		mkfs/*.o mkfs/*.o.d check/*.o check/*.o.d \
-	      dir-test ioctl-test quick-test library-test library-test-static \
+	      ioctl-test quick-test library-test library-test-static \
               mktables btrfs.static mkfs.btrfs.static fssum \
 	      $(check_defs) \
 	      $(libs) $(lib_links) \
-	      $(progs_static) $(progs_extra) \
+	      $(progs_static) \
 	      libbtrfsutil/*.o libbtrfsutil/*.o.d
 ifeq ($(PYTHON_BINDINGS),1)
 	$(Q)cd libbtrfsutil/python; \
@@ -620,7 +631,7 @@ clean-doc:
 
 clean-gen:
 	@echo "Cleaning Generated Files"
-	$(Q)$(RM) -rf -- version.h config.status config.cache connfig.log \
+	$(Q)$(RM) -rf -- version.h config.status config.cache config.log \
 		configure.lineno config.status.lineno Makefile.inc \
 		Documentation/Makefile tags \
 		cscope.files cscope.out cscope.in.out cscope.po.out \
@@ -631,27 +642,33 @@ $(CLEANDIRS):
 	@echo "Cleaning $(patsubst clean-%,%,$@)"
 	$(Q)$(MAKE) $(MAKEOPTS) -C $(patsubst clean-%,%,$@) clean
 
-install: $(libs) $(progs_install) $(INSTALLDIRS)
+install: $(libs_build) $(progs_install) $(INSTALLDIRS)
+ifeq ($(BUILD_PROGRAMS),1)
 	$(INSTALL) -m755 -d $(DESTDIR)$(bindir)
 	$(INSTALL) $(progs_install) $(DESTDIR)$(bindir)
 	$(INSTALL) fsck.btrfs $(DESTDIR)$(bindir)
 	# btrfsck is a link to btrfs in the src tree, make it so for installed file as well
 	$(LN_S) -f btrfs $(DESTDIR)$(bindir)/btrfsck
+ifneq ($(udevdir),)
+	$(INSTALL) -m755 -d $(DESTDIR)$(udevruledir)
+	$(INSTALL) -m644 $(udev_rules) $(DESTDIR)$(udevruledir)
+endif
+endif
+ifneq ($(libs_build),)
 	$(INSTALL) -m755 -d $(DESTDIR)$(libdir)
-	$(INSTALL) $(libs) $(DESTDIR)$(libdir)
+	$(INSTALL) $(libs_build) $(DESTDIR)$(libdir)
+ifeq ($(BUILD_SHARED_LIBRARIES),1)
 	cp -d $(lib_links) $(DESTDIR)$(libdir)
+endif
 	$(INSTALL) -m755 -d $(DESTDIR)$(incdir)/btrfs
 	$(INSTALL) -m644 $(libbtrfs_headers) $(DESTDIR)$(incdir)/btrfs
 	$(INSTALL) -m644 libbtrfsutil/btrfsutil.h $(DESTDIR)$(incdir)
-ifneq ($(udevdir),)
-	$(INSTALL) -m755 -d $(DESTDIR)$(udevruledir)
-	$(INSTALL) -m644 $(udev_rules) $(DESTDIR)$(udevruledir)
 endif
 
 ifeq ($(PYTHON_BINDINGS),1)
 install_python: libbtrfsutil_python
 	$(Q)cd libbtrfsutil/python; \
-		$(PYTHON) setup.py install --install-layout=deb --skip-build $(if $(DESTDIR),--root $(DESTDIR)) --prefix $(prefix)
+		$(PYTHON) setup.py install --skip-build $(if $(DESTDIR),--root $(DESTDIR)) --prefix $(prefix)
 
 .PHONY: install_python
 endif
diff --git a/Makefile.inc.in b/Makefile.inc.in
index fb324614..a86c528e 100644
--- a/Makefile.inc.in
+++ b/Makefile.inc.in
@@ -12,6 +12,9 @@ RMDIR = @RMDIR@
 INSTALL = @INSTALL@
 DISABLE_DOCUMENTATION = @DISABLE_DOCUMENTATION@
 DISABLE_BTRFSCONVERT = @DISABLE_BTRFSCONVERT@
+BUILD_PROGRAMS = @BUILD_PROGRAMS@
+BUILD_SHARED_LIBRARIES = @BUILD_SHARED_LIBRARIES@
+BUILD_STATIC_LIBRARIES = @BUILD_STATIC_LIBRARIES@
 BTRFSCONVERT_EXT2 = @BTRFSCONVERT_EXT2@
 BTRFSCONVERT_REISERFS = @BTRFSCONVERT_REISERFS@
 BTRFSRESTORE_ZSTD = @BTRFSRESTORE_ZSTD@
diff --git a/README.md b/README.md
index 7534ccb5..c1e4fca0 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,10 @@ This repository hosts following utilities:
 * **btrfs** &mdash; the main administration tool ([manual page](https://btrfs.wiki.kernel.org/index.php/Manpage/btrfs))
 * **mkfs.btrfs** &mdash; utility to create the filesystem ([manual page](https://btrfs.wiki.kernel.org/index.php/Manpage/mkfs.btrfs))
 
-See INSTALL for build instructions and [tests/README.md](tests/README.md) for
+The C and python 3 bindings are provided by a LGPL library **libbtrfsutil** see
+[libbtrfsutil/README.md](libbtrfsutil/README.md) for more.
+
+See [INSTALL](INSTALL) for build instructions and [tests/README.md](tests/README.md) for
 testing information.
 
 Release cycle
@@ -82,7 +85,7 @@ Documentation updates
 ---------------------
 
 Documentation fixes or updates do not need much explanation so sticking to the
-code rules in the previous section is not necessary. Github pull requests are
+code rules in the previous section is not necessary. GitHub pull requests are
 OK, patches could be sent to me directly and not required to be also in the
 mailinglist. Pointing out typos via IRC also works, although might get
 accidentally lost in the noise.
diff --git a/VERSION b/VERSION
index 7171442b..9b56abd2 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-v4.17
+v4.19.1
diff --git a/btrfs-completion b/btrfs-completion
index ae683f4e..6ae57d1b 100644
--- a/btrfs-completion
+++ b/btrfs-completion
@@ -4,13 +4,6 @@
 # (http://lists.alioth.debian.org/pipermail/bash-completion-devel/2013-June/004868.html)
 # edited by John C F <john.ch.fr at gmail.com> on 2015-02-02
 
-_btrfs_devs()
-{
-	local DEVS
-	DEVS=''; while read dev; do DEVS+="$dev "; done < <(lsblk -pnro name)
-	COMPREPLY+=( $( compgen -W "$DEVS" -- "$cur" ) )
-}
-
 _btrfs_mnts()
 {
 	local MNTS
@@ -68,14 +61,14 @@ _btrfs()
 				opts="$commands_scrub"
 				;;
 			check)
-				_btrfs_devs
+				_filedir
 				return 0
 				;;
 			rescue)
 				opts="$commands_rescue"
 				;;
 			restore)
-				_btrfs_devs
+				_filedir
 				return 0
 				;;
 			inspect-internal)
@@ -120,13 +113,13 @@ _btrfs()
 						;;
 					label)
 						_btrfs_mnts
-						_btrfs_devs
+						_filedir
 						return 0
 						;;
 				esac
 				;;
 			device|rescue)
-				_btrfs_devs
+				_filedir
 				return 0
 				;;
 			inspect-internal)
@@ -135,7 +128,7 @@ _btrfs()
 						_btrfs_mnts
 						return 0
 						;;
-					rootid)
+					dump-tree|dump-super|rootid|inode-resolve)
 						_filedir
 						return 0
 						;;
@@ -156,7 +149,7 @@ _btrfs()
 						return 0
 						;;
 					start)
-						_btrfs_devs
+						_filedir
 						return 0
 						;;
 				esac
diff --git a/btrfs-corrupt-block.c b/btrfs-corrupt-block.c
index 4fbea26c..ba467921 100644
--- a/btrfs-corrupt-block.c
+++ b/btrfs-corrupt-block.c
@@ -70,9 +70,9 @@ static int debug_corrupt_block(struct extent_buffer *eb,
 		if (!copy || mirror_num == copy) {
 			ret = read_extent_from_disk(eb, 0, eb->len);
 			if (ret < 0) {
-				error("cannot read eb bytenr %llu: %s",
-						(unsigned long long)eb->dev_bytenr,
-						strerror(-ret));
+				errno = -ret;
+				error("cannot read eb bytenr %llu: %m",
+					(unsigned long long)eb->dev_bytenr);
 				return ret;
 			}
 			printf("corrupting %llu copy %d\n", eb->start,
@@ -80,9 +80,9 @@ static int debug_corrupt_block(struct extent_buffer *eb,
 			memset(eb->data, 0, eb->len);
 			ret = write_extent_to_disk(eb);
 			if (ret < 0) {
-				error("cannot write eb bytenr %llu: %s",
-						(unsigned long long)eb->dev_bytenr,
-						strerror(-ret));
+				errno = -ret;
+				error("cannot write eb bytenr %llu: %m",
+					(unsigned long long)eb->dev_bytenr);
 				return ret;
 			}
 			fsync(eb->fd);
@@ -926,7 +926,7 @@ static int delete_csum(struct btrfs_root *root, u64 bytenr, u64 bytes)
 		return PTR_ERR(trans);
 	}
 
-	ret = btrfs_del_csums(trans, root, bytenr, bytes);
+	ret = btrfs_del_csums(trans, bytenr, bytes);
 	if (ret)
 		fprintf(stderr, "Error deleting csums %d\n", ret);
 	btrfs_commit_transaction(trans, root);
diff --git a/btrfs-find-root.c b/btrfs-find-root.c
index e2d2e70c..4419873a 100644
--- a/btrfs-find-root.c
+++ b/btrfs-find-root.c
@@ -204,8 +204,8 @@ int main(int argc, char **argv)
 			       &filter.match_gen, &filter.match_level);
 	ret = btrfs_find_root_search(fs_info, &filter, &result, &found);
 	if (ret < 0) {
-		fprintf(stderr, "Fail to search the tree root: %s\n",
-			strerror(-ret));
+		errno = -ret;
+		fprintf(stderr, "Fail to search the tree root: %m\n");
 		goto out;
 	}
 	if (ret > 0) {
diff --git a/btrfs-list.c b/btrfs-list.c
index e01c5899..adda45ab 100644
--- a/btrfs-list.c
+++ b/btrfs-list.c
@@ -44,6 +44,16 @@ struct root_lookup {
 	struct rb_root root;
 };
 
+static inline struct root_info *to_root_info(struct rb_node *node)
+{
+	return rb_entry(node, struct root_info, rb_node);
+}
+
+static inline struct root_info *to_root_info_sorted(struct rb_node *node)
+{
+	return rb_entry(node, struct root_info, sort_node);
+}
+
 static struct {
 	char	*name;
 	char	*column_name;
@@ -309,7 +319,7 @@ static int sort_tree_insert(struct root_lookup *sort_tree,
 
 	while (*p) {
 		parent = *p;
-		curr = rb_entry(parent, struct root_info, sort_node);
+		curr = to_root_info_sorted(parent);
 
 		ret = sort_comp(ins, curr, comp_set);
 		if (ret < 0)
@@ -340,7 +350,7 @@ static int root_tree_insert(struct root_lookup *root_tree,
 
 	while(*p) {
 		parent = *p;
-		curr = rb_entry(parent, struct root_info, rb_node);
+		curr = to_root_info(parent);
 
 		ret = comp_entry_with_rootid(ins, curr, 0);
 		if (ret < 0)
@@ -371,7 +381,7 @@ static struct root_info *root_tree_search(struct root_lookup *root_tree,
 	tmp.root_id = root_id;
 
 	while(n) {
-		entry = rb_entry(n, struct root_info, rb_node);
+		entry = to_root_info(n);
 
 		ret = comp_entry_with_rootid(&tmp, entry, 0);
 		if (ret < 0)
@@ -505,8 +515,9 @@ static int add_root(struct root_lookup *root_lookup,
 
 	ret = root_tree_insert(root_lookup, ri);
 	if (ret < 0) {
-		error("failed to insert subvolume %llu to tree: %s",
-				(unsigned long long)root_id, strerror(-ret));
+		errno = -ret;
+		error("failed to insert subvolume %llu to tree: %m",
+				(unsigned long long)root_id);
 		exit(1);
 	}
 	return 0;
@@ -528,7 +539,7 @@ static void free_root_info(struct rb_node *node)
 {
 	struct root_info *ri;
 
-	ri = rb_entry(node, struct root_info, rb_node);
+	ri = to_root_info(node);
 	free(ri->name);
 	free(ri->path);
 	free(ri->full_path);
@@ -1268,7 +1279,7 @@ static void filter_and_sort_subvol(struct root_lookup *all_subvols,
 
 	n = rb_last(&all_subvols->root);
 	while (n) {
-		entry = rb_entry(n, struct root_info, rb_node);
+		entry = to_root_info(n);
 
 		ret = resolve_root(all_subvols, entry, top_id);
 		if (ret == -ENOENT) {
@@ -1300,7 +1311,7 @@ static int list_subvol_fill_paths(int fd, struct root_lookup *root_lookup)
 	while (n) {
 		struct root_info *entry;
 		int ret;
-		entry = rb_entry(n, struct root_info, rb_node);
+		entry = to_root_info(n);
 		ret = lookup_ino_path(fd, entry);
 		if (ret && ret != -ENOENT)
 			return ret;
@@ -1467,7 +1478,7 @@ static void print_all_subvol_info(struct root_lookup *sorted_tree,
 
 	n = rb_first(&sorted_tree->root);
 	while (n) {
-		entry = rb_entry(n, struct root_info, sort_node);
+		entry = to_root_info_sorted(n);
 
 		/* The toplevel subvolume is not listed by default */
 		if (entry->root_id == BTRFS_FS_TREE_OBJECTID)
@@ -1558,7 +1569,7 @@ int btrfs_get_toplevel_subvol(int fd, struct root_info *the_ri)
 		return ret;
 
 	rbn = rb_first(&rl.root);
-	ri = rb_entry(rbn, struct root_info, rb_node);
+	ri = to_root_info(rbn);
 
 	if (ri->root_id != BTRFS_FS_TREE_OBJECTID)
 		return -ENOENT;
@@ -1590,7 +1601,7 @@ int btrfs_get_subvol(int fd, struct root_info *the_ri)
 
 	rbn = rb_first(&rl.root);
 	while(rbn) {
-		ri = rb_entry(rbn, struct root_info, rb_node);
+		ri = to_root_info(rbn);
 		rr = resolve_root(&rl, ri, root_id);
 		if (rr == -ENOENT) {
 			ret = -ENOENT;
@@ -1814,7 +1825,7 @@ char *btrfs_list_path_for_root(int fd, u64 root)
 	while (n) {
 		struct root_info *entry;
 
-		entry = rb_entry(n, struct root_info, rb_node);
+		entry = to_root_info(n);
 		ret = resolve_root(&root_lookup, entry, top_id);
 		if (ret == -ENOENT && entry->root_id == root) {
 			ret_path = NULL;
diff --git a/btrfs-list.h b/btrfs-list.h
index 6e5fc778..3c0954e5 100644
--- a/btrfs-list.h
+++ b/btrfs-list.h
@@ -168,7 +168,7 @@ struct btrfs_list_comparer_set *btrfs_list_alloc_comparer_set(void);
 
 int btrfs_list_subvols_print(int fd, struct btrfs_list_filter_set *filter_set,
 		       struct btrfs_list_comparer_set *comp_set,
-		       enum btrfs_list_layout layot, int full_path,
+		       enum btrfs_list_layout layout, int full_path,
 		       const char *raw_prefix);
 int btrfs_list_find_updated_files(int fd, u64 root_id, u64 oldest_gen);
 int btrfs_list_get_default_subvolume(int fd, u64 *default_id);
diff --git a/btrfs-map-logical.c b/btrfs-map-logical.c
index 4508f365..57278fe1 100644
--- a/btrfs-map-logical.c
+++ b/btrfs-map-logical.c
@@ -75,7 +75,8 @@ again:
 			ret = btrfs_previous_extent_item(fs_info->extent_root,
 							 path, 0);
 		else
-			ret = btrfs_next_item(fs_info->extent_root, path);
+			ret = btrfs_next_extent_item(fs_info->extent_root,
+						     path, 0);
 		if (ret)
 			goto out;
 		goto again;
@@ -112,9 +113,10 @@ static int __print_mapping_info(struct btrfs_fs_info *fs_info, u64 logical,
 		ret = btrfs_map_block(fs_info, READ, logical + cur_offset,
 				      &cur_len, &multi, mirror_num, NULL);
 		if (ret) {
+			errno = -ret;
 			fprintf(info_file,
-				"Error: fails to map mirror%d logical %llu: %s\n",
-				mirror_num, logical, strerror(-ret));
+				"Error: fails to map mirror%d logical %llu: %m\n",
+				mirror_num, logical);
 			return ret;
 		}
 		for (i = 0; i < multi->num_stripes; i++) {
@@ -172,17 +174,18 @@ static int write_extent_content(struct btrfs_fs_info *fs_info, int out_fd,
 		ret = read_extent_data(fs_info, buffer,
 				       logical + cur_offset, &cur_len, mirror);
 		if (ret < 0) {
+			errno = -ret;
 			fprintf(stderr,
-				"Failed to read extent at [%llu, %llu]: %s\n",
-				logical, logical + length, strerror(-ret));
+				"Failed to read extent at [%llu, %llu]: %m\n",
+				logical, logical + length);
 			return ret;
 		}
 		ret = write(out_fd, buffer, cur_len);
 		if (ret < 0 || ret != cur_len) {
 			if (ret > 0)
 				ret = -EINTR;
-			fprintf(stderr, "output file write failed: %s\n",
-				strerror(-ret));
+			errno = -ret;
+			fprintf(stderr, "output file write failed: %m\n");
 			return ret;
 		}
 		cur_offset += cur_len;
@@ -292,8 +295,9 @@ int main(int argc, char **argv)
 	/* First find the nearest extent */
 	ret = map_one_extent(root->fs_info, &cur_logical, &cur_len, 0);
 	if (ret < 0) {
-		fprintf(stderr, "Failed to find extent at [%llu,%llu): %s\n",
-			cur_logical, cur_logical + cur_len, strerror(-ret));
+		errno = -ret;
+		fprintf(stderr, "Failed to find extent at [%llu,%llu): %m\n",
+			cur_logical, cur_logical + cur_len);
 		goto out_close_fd;
 	}
 	/*
@@ -304,10 +308,10 @@ int main(int argc, char **argv)
 	if (ret > 0) {
 		ret = map_one_extent(root->fs_info, &cur_logical, &cur_len, 1);
 		if (ret < 0) {
+			errno = -ret;
 			fprintf(stderr,
-				"Failed to find extent at [%llu,%llu): %s\n",
-				cur_logical, cur_logical + cur_len,
-				strerror(-ret));
+				"Failed to find extent at [%llu,%llu): %m\n",
+				cur_logical, cur_logical + cur_len);
 			goto out_close_fd;
 		}
 		if (ret > 0) {
diff --git a/btrfs-select-super.c b/btrfs-select-super.c
index e021221e..67b3958d 100644
--- a/btrfs-select-super.c
+++ b/btrfs-select-super.c
@@ -34,7 +34,7 @@
 static void print_usage(void)
 {
 	printf("usage: btrfs-select-super -s number dev\n");
-	printf("\t-s super   copy of superbloc to overwrite the primary one (values: 1, 2)\n");
+	printf("\t-s super   copy of superblock to overwrite the primary one (values: 1, 2)\n");
 	exit(1);
 }
 
@@ -77,7 +77,8 @@ int main(int argc, char **argv)
 	radix_tree_init();
 
 	if((ret = check_mounted(argv[optind])) < 0) {
-		error("cannot check mount status: %s", strerror(-ret));
+		errno = -ret;
+		error("cannot check mount status: %m");
 		return ret;
 	} else if(ret) {
 		error("%s is currently mounted, aborting", argv[optind]);
diff --git a/btrfs.c b/btrfs.c
index 2d39f2ce..ec919a88 100644
--- a/btrfs.c
+++ b/btrfs.c
@@ -169,7 +169,7 @@ static int cmd_version(int argc, char **argv)
  * Parse global options, between binary name and first non-option argument
  * after processing all valid options (including those with arguments).
  *
- * Returns index to argv where parsting stopped, optind is reset to 1
+ * Returns index to argv where parsing stopped, optind is reset to 1
  */
 static int handle_global_options(int argc, char **argv)
 {
@@ -210,7 +210,7 @@ static int handle_global_options(int argc, char **argv)
 	return shift;
 }
 
-void handle_special_globals(int shift, int argc, char **argv)
+static void handle_special_globals(int shift, int argc, char **argv)
 {
 	int has_help = 0;
 	int has_full = 0;
diff --git a/btrfsck.h b/btrfsck.h
index e16f52f5..020593c9 100644
--- a/btrfsck.h
+++ b/btrfsck.h
@@ -113,7 +113,7 @@ struct device_extent_record {
 	u8  type;
 	u64 offset;
 
-	u64 chunk_objecteid;
+	u64 chunk_objectid;
 	u64 chunk_offset;
 	u64 length;
 };
diff --git a/btrfstune.c b/btrfstune.c
index eccedf79..1e378ba1 100644
--- a/btrfstune.c
+++ b/btrfstune.c
@@ -91,9 +91,9 @@ static int set_super_incompat_flags(struct btrfs_root *root, u64 flags)
 	return ret;
 }
 
-static int change_header_uuid(struct btrfs_root *root, struct extent_buffer *eb)
+static int change_buffer_header_uuid(struct extent_buffer *eb)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_fs_info *fs_info = eb->fs_info;
 	int same_fsid = 1;
 	int same_chunk_tree_uuid = 1;
 	int ret;
@@ -157,7 +157,7 @@ static int change_extents_uuid(struct btrfs_fs_info *fs_info)
 			ret = PTR_ERR(eb);
 			goto out;
 		}
-		ret = change_header_uuid(root, eb);
+		ret = change_buffer_header_uuid(eb);
 		free_extent_buffer(eb);
 		if (ret < 0) {
 			error("failed to change uuid of tree block: %llu",
@@ -179,10 +179,10 @@ out:
 	return ret;
 }
 
-static int change_device_uuid(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
-			      int slot)
+static int change_device_uuid(struct extent_buffer *eb, int slot)
 {
 	struct btrfs_dev_item *di;
+	struct btrfs_fs_info *fs_info = eb->fs_info;
 	int ret = 0;
 
 	di = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
@@ -217,7 +217,7 @@ static int change_devices_uuid(struct btrfs_fs_info *fs_info)
 		if (key.type != BTRFS_DEV_ITEM_KEY ||
 		    key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
 			goto next;
-		ret = change_device_uuid(fs_info, path.nodes[0], path.slots[0]);
+		ret = change_device_uuid(path.nodes[0], path.slots[0]);
 		if (ret < 0)
 			goto out;
 next:
@@ -394,6 +394,7 @@ int main(int argc, char *argv[])
 	char *new_fsid_str = NULL;
 	int ret;
 	u64 super_flags = 0;
+	int fd = -1;
 
 	while(1) {
 		static const struct option long_options[] = {
@@ -467,17 +468,26 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	ret = check_mounted(device);
+	fd = open(device, O_RDWR);
+	if (fd < 0) {
+		error("mount check: cannot open %s: %m", device);
+		return 1;
+	}
+
+	ret = check_mounted_where(fd, device, NULL, 0, NULL,
+			SBREAD_IGNORE_FSID_MISMATCH);
 	if (ret < 0) {
-		error("could not check mount status of %s: %s", device,
-			strerror(-ret));
+		errno = -ret;
+		error("could not check mount status of %s: %m", device);
+		close(fd);
 		return 1;
 	} else if (ret) {
 		error("%s is mounted", device);
+		close(fd);
 		return 1;
 	}
 
-	root = open_ctree(device, 0, ctree_flags);
+	root = open_ctree_fd(fd, device, 0, ctree_flags);
 
 	if (!root) {
 		error("open ctree failed");
@@ -512,9 +522,9 @@ int main(int argc, char *argv[])
 	if (random_fsid || new_fsid_str) {
 		if (!force) {
 			warning(
-	"it's highly recommended to run 'btrfs check' before this operation");
-			warning(
-	"also canceling running UUID change progress may cause corruption");
+"it's recommended to run 'btrfs check --readonly' before this operation.\n"
+"\tThe whole operation must finish before the filesystem can be mounted again.\n"
+"\tIf cancelled or interrupted, run 'btrfstune -u' to restart.");
 			ret = ask_user("We are going to change UUID, are your sure?");
 			if (!ret) {
 				fprintf(stderr, "UUID change canceled\n");
diff --git a/check/main.c b/check/main.c
index 3190b5d4..db18827b 100644
--- a/check/main.c
+++ b/check/main.c
@@ -25,6 +25,7 @@
 #include <unistd.h>
 #include <getopt.h>
 #include <uuid/uuid.h>
+#include <time.h>
 #include "ctree.h"
 #include "volumes.h"
 #include "repair.h"
@@ -47,20 +48,6 @@
 #include "check/mode-original.h"
 #include "check/mode-lowmem.h"
 
-enum task_position {
-	TASK_EXTENTS,
-	TASK_FREE_SPACE,
-	TASK_FS_ROOTS,
-	TASK_NOTHING, /* have to be the last element */
-};
-
-struct task_ctx {
-	int progress_enabled;
-	enum task_position tp;
-
-	struct task_info *info;
-};
-
 u64 bytes_used = 0;
 u64 total_csum_bytes = 0;
 u64 total_btree_bytes = 0;
@@ -72,6 +59,7 @@ u64 data_bytes_referenced = 0;
 LIST_HEAD(duplicate_extents);
 LIST_HEAD(delete_items);
 int no_holes = 0;
+static int is_free_space_tree = 0;
 int init_extent_tree = 0;
 int check_data_csum = 0;
 struct btrfs_fs_info *global_info;
@@ -173,28 +161,55 @@ static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
 		return compare_tree_backref(node1, node2);
 }
 
+static void print_status_check_line(void *p)
+{
+	struct task_ctx *priv = p;
+	const char *task_position_string[] = {
+		"[1/7] checking root items                     ",
+		"[2/7] checking extents                        ",
+		is_free_space_tree ?
+		"[3/7] checking free space tree                " :
+		"[3/7] checking free space cache               ",
+		"[4/7] checking fs roots                       ",
+		check_data_csum ?
+		"[5/7] checking csums against data             " :
+		"[5/7] checking csums (without verifying data) ",
+		"[6/7] checking root refs                      ",
+		"[7/7] checking quota groups                   ",
+	};
+	time_t elapsed;
+	int hours;
+	int minutes;
+	int seconds;
+
+	elapsed = time(NULL) - priv->start_time;
+	hours   = elapsed  / 3600;
+	elapsed -= hours   * 3600;
+	minutes = elapsed  / 60;
+	elapsed -= minutes * 60;
+	seconds = elapsed;
+
+	printf("%s (%d:%02d:%02d elapsed", task_position_string[priv->tp],
+			hours, minutes, seconds);
+	if (priv->item_count > 0)
+		printf(", %llu items checked)\r", priv->item_count);
+	else
+		printf(")\r");
+	fflush(stdout);
+}
 
 static void *print_status_check(void *p)
 {
 	struct task_ctx *priv = p;
-	const char work_indicator[] = { '.', 'o', 'O', 'o' };
-	uint32_t count = 0;
-	static char *task_position_string[] = {
-		"checking extents",
-		"checking free space cache",
-		"checking fs roots",
-	};
 
-	task_period_start(priv->info, 1000 /* 1s */);
+	/* 1 second */
+	task_period_start(priv->info, 1000);
 
 	if (priv->tp == TASK_NOTHING)
 		return NULL;
 
 	while (1) {
-		printf("%s [%c]\r", task_position_string[priv->tp],
-				work_indicator[count % 4]);
-		count++;
-		fflush(stdout);
+		print_status_check_line(p);
 		task_period_wait(priv->info);
 	}
 	return NULL;
@@ -202,6 +217,7 @@ static void *print_status_check(void *p)
 
 static int print_status_return(void *p)
 {
+	print_status_check_line(p);
 	printf("\n");
 	fflush(stdout);
 
@@ -578,6 +594,8 @@ static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
 		fprintf(stderr, ", orphan file extent");
 	if (errors & I_ERR_ODD_INODE_FLAGS)
 		fprintf(stderr, ", odd inode flags");
+	if (errors & I_ERR_INLINE_RAM_BYTES_WRONG)
+		fprintf(stderr, ", invalid inline ram bytes");
 	fprintf(stderr, "\n");
 	/* Print the orphan extents if needed */
 	if (errors & I_ERR_FILE_EXTENT_ORPHAN)
@@ -1472,7 +1490,7 @@ static int process_file_extent(struct btrfs_root *root,
 		u8 compression = btrfs_file_extent_compression(eb, fi);
 		struct btrfs_item *item = btrfs_item_nr(slot);
 
-		num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
+		num_bytes = btrfs_file_extent_ram_bytes(eb, fi);
 		if (num_bytes == 0)
 			rec->errors |= I_ERR_BAD_FILE_EXTENT;
 		if (compression) {
@@ -1483,6 +1501,9 @@ static int process_file_extent(struct btrfs_root *root,
 		} else {
 			if (num_bytes > max_inline_size)
 				rec->errors |= I_ERR_FILE_EXTENT_TOO_LARGE;
+			if (btrfs_file_extent_inline_item_len(eb, item) !=
+			    num_bytes)
+				rec->errors |= I_ERR_INLINE_RAM_BYTES_WRONG;
 		}
 		rec->found_size += num_bytes;
 		num_bytes = (num_bytes + mask) & ~mask;
@@ -2270,9 +2291,9 @@ static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
 
 	ret = reset_nlink(trans, root, path, rec);
 	if (ret < 0) {
+		errno = -ret;
 		fprintf(stderr,
-			"Failed to reset nlink for inode %llu: %s\n",
-			rec->ino, strerror(-ret));
+			"Failed to reset nlink for inode %llu: %m\n", rec->ino);
 		goto out;
 	}
 
@@ -2534,6 +2555,41 @@ out:
 	return ret;
 }
 
+static int repair_inline_ram_bytes(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   struct btrfs_path *path,
+				   struct inode_record *rec)
+{
+	struct btrfs_key key;
+	struct btrfs_file_extent_item *fi;
+	struct btrfs_item *i;
+	u64 on_disk_item_len;
+	int ret;
+
+	key.objectid = rec->ino;
+	key.offset = 0;
+	key.type = BTRFS_EXTENT_DATA_KEY;
+
+	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+	if (ret > 0)
+		ret = -ENOENT;
+	if (ret < 0)
+		goto out;
+
+	i = btrfs_item_nr(path->slots[0]);
+	on_disk_item_len = btrfs_file_extent_inline_item_len(path->nodes[0], i);
+	fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+			    struct btrfs_file_extent_item);
+	btrfs_set_file_extent_ram_bytes(path->nodes[0], fi, on_disk_item_len);
+	btrfs_mark_buffer_dirty(path->nodes[0]);
+	printf("Repaired inline ram_bytes for root %llu ino %llu\n",
+		root->objectid, rec->ino);
+	rec->errors &= ~I_ERR_INLINE_RAM_BYTES_WRONG;
+out:
+	btrfs_release_path(path);
+	return ret;
+}
+
 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
 {
 	struct btrfs_trans_handle *trans;
@@ -2545,8 +2601,9 @@ static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
 			     I_ERR_LINK_COUNT_WRONG |
 			     I_ERR_NO_INODE_ITEM |
 			     I_ERR_FILE_EXTENT_ORPHAN |
-			     I_ERR_FILE_EXTENT_DISCOUNT|
-			     I_ERR_FILE_NBYTES_WRONG)))
+			     I_ERR_FILE_EXTENT_DISCOUNT |
+			     I_ERR_FILE_NBYTES_WRONG |
+			     I_ERR_INLINE_RAM_BYTES_WRONG)))
 		return rec->errors;
 
 	/*
@@ -2575,6 +2632,8 @@ static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
 		ret = repair_inode_nlinks(trans, root, &path, rec);
 	if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
 		ret = repair_inode_nbytes(trans, root, &path, rec);
+	if (!ret && rec->errors & I_ERR_INLINE_RAM_BYTES_WRONG)
+		ret = repair_inline_ram_bytes(trans, root, &path, rec);
 	btrfs_commit_transaction(trans, root);
 	btrfs_release_path(&path);
 	return ret;
@@ -2676,7 +2735,10 @@ static int check_inode_recs(struct btrfs_root *root,
 				(unsigned long long)root->objectid);
 
 			ret = btrfs_make_root_dir(trans, root, root_dirid);
-			BUG_ON(ret);
+			if (ret < 0) {
+				btrfs_abort_transaction(trans, ret);
+				return ret;
+			}
 
 			btrfs_commit_transaction(trans, root);
 			return -EAGAIN;
@@ -2721,7 +2783,6 @@ static int check_inode_recs(struct btrfs_root *root,
 				free_inode_rec(rec);
 				continue;
 			}
-			ret = 0;
 		}
 
 		if (!(repair && ret == 0))
@@ -2942,6 +3003,7 @@ static int check_root_refs(struct btrfs_root *root,
 		loop = 0;
 		cache = search_cache_extent(root_cache, 0);
 		while (1) {
+			ctx.item_count++;
 			if (!cache)
 				break;
 			rec = container_of(cache, struct root_record, cache);
@@ -3108,8 +3170,8 @@ static int repair_btree(struct btrfs_root *root,
 	trans = btrfs_start_transaction(root, 1);
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
-		fprintf(stderr, "Error starting transaction: %s\n",
-			strerror(-ret));
+		errno = -ret;
+		fprintf(stderr, "Error starting transaction: %m\n");
 		return ret;
 	}
 	btrfs_init_path(&path);
@@ -3263,6 +3325,7 @@ static int check_fs_root(struct btrfs_root *root,
 	}
 
 	while (1) {
+		ctx.item_count++;
 		wret = walk_down_tree(root, &path, wc, &level, &nrefs);
 		if (wret < 0)
 			ret = wret;
@@ -3300,8 +3363,8 @@ skip_walking:
 			       root->root_key.objectid);
 			ret = repair_btree(root, &corrupt_blocks);
 			if (ret < 0)
-				fprintf(stderr, "Failed to repair btree: %s\n",
-					strerror(-ret));
+				errno = -ret;
+				fprintf(stderr, "Failed to repair btree: %m\n");
 			if (!ret)
 				printf("Btree for root %llu is fixed\n",
 				       root->root_key.objectid);
@@ -3337,14 +3400,10 @@ static int check_fs_roots(struct btrfs_fs_info *fs_info,
 	struct extent_buffer *leaf, *tree_node;
 	struct btrfs_root *tmp_root;
 	struct btrfs_root *tree_root = fs_info->tree_root;
+	u64 skip_root = 0;
 	int ret;
 	int err = 0;
 
-	if (ctx.progress_enabled) {
-		ctx.tp = TASK_FS_ROOTS;
-		task_start(ctx.info);
-	}
-
 	/*
 	 * Just in case we made any changes to the extent tree that weren't
 	 * reflected into the free space cache yet.
@@ -3357,7 +3416,10 @@ static int check_fs_roots(struct btrfs_fs_info *fs_info,
 
 again:
 	key.offset = 0;
-	key.objectid = 0;
+	if (skip_root)
+		key.objectid = skip_root + 1;
+	else
+		key.objectid = 0;
 	key.type = BTRFS_ROOT_ITEM_KEY;
 	ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
 	if (ret < 0) {
@@ -3366,6 +3428,7 @@ again:
 	}
 	tree_node = tree_root->node;
 	while (1) {
+
 		if (tree_node != tree_root->node) {
 			free_root_recs_tree(root_cache);
 			btrfs_release_path(&path);
@@ -3402,8 +3465,18 @@ again:
 				btrfs_release_path(&path);
 				goto again;
 			}
-			if (ret)
+			if (ret) {
 				err = 1;
+
+				/*
+				 * We failed to repair this root but modified
+				 * tree root, after again: label we will still
+				 * hit this root and fail to repair, so we must
+				 * skip it to avoid infinite loop.
+				 */
+				if (repair)
+					skip_root = key.objectid;
+			}
 			if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
 				btrfs_free_fs_root(tmp_root);
 		} else if (key.type == BTRFS_ROOT_REF_KEY ||
@@ -3421,8 +3494,6 @@ out:
 	if (!cache_tree_empty(&wc.shared))
 		fprintf(stderr, "warning line %d\n", __LINE__);
 
-	task_stop(ctx.info);
-
 	return err;
 }
 
@@ -3491,8 +3562,6 @@ static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
 {
 	int ret;
 
-	if (!ctx.progress_enabled)
-		fprintf(stderr, "checking fs roots\n");
 	if (check_mode == CHECK_MODE_LOWMEM)
 		ret = check_fs_roots_lowmem(fs_info);
 	else
@@ -3681,7 +3750,12 @@ static int check_owner_ref(struct btrfs_root *root,
 		if (btrfs_header_owner(buf) == back->root)
 			return 0;
 	}
-	BUG_ON(rec->is_root);
+	/*
+	 * Some unexpected root item referring to this one, return 1 to
+	 * indicate owner not found
+	 */
+	if (rec->is_root)
+		return 1;
 
 	/* try to find the block by search corresponding fs tree */
 	key.objectid = btrfs_header_owner(buf);
@@ -3977,6 +4051,11 @@ static int try_to_fix_bad_block(struct btrfs_root *root,
 
 	btrfs_init_path(&path);
 	ULIST_ITER_INIT(&iter);
+	/*
+	 * If we found no roots referencing to this tree block, there is no
+	 * chance to fix. So our default ret is -EIO.
+	 */
+	ret = -EIO;
 	while ((node = ulist_next(roots, &iter))) {
 		root_key.objectid = node->val;
 		root_key.type = BTRFS_ROOT_ITEM_KEY;
@@ -4964,7 +5043,7 @@ btrfs_new_device_extent_record(struct extent_buffer *leaf,
 	rec->offset = key->offset;
 
 	ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
-	rec->chunk_objecteid =
+	rec->chunk_objectid =
 		btrfs_dev_extent_chunk_objectid(leaf, ptr);
 	rec->chunk_offset =
 		btrfs_dev_extent_chunk_offset(leaf, ptr);
@@ -5098,18 +5177,20 @@ static int process_extent_item(struct btrfs_root *root,
 		case BTRFS_TREE_BLOCK_REF_KEY:
 			ret = add_tree_backref(extent_cache, key.objectid,
 					0, offset, 0);
-			if (ret < 0)
+			if (ret < 0) {
+				errno = -ret;
 				error(
-			"add_tree_backref failed (extent items tree block): %s",
-				      strerror(-ret));
+			"add_tree_backref failed (extent items tree block): %m");
+			}
 			break;
 		case BTRFS_SHARED_BLOCK_REF_KEY:
 			ret = add_tree_backref(extent_cache, key.objectid,
 					offset, 0, 0);
-			if (ret < 0)
+			if (ret < 0) {
+				errno = -ret;
 				error(
-			"add_tree_backref failed (extent items shared block): %s",
-				      strerror(-ret));
+		"add_tree_backref failed (extent items shared block): %m");
+			}
 			break;
 		case BTRFS_EXTENT_DATA_REF_KEY:
 			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
@@ -5321,20 +5402,8 @@ static int check_space_cache(struct btrfs_root *root)
 	int ret;
 	int error = 0;
 
-	if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
-	    btrfs_super_generation(root->fs_info->super_copy) !=
-	    btrfs_super_cache_generation(root->fs_info->super_copy)) {
-		printf("cache and super generation don't match, space cache "
-		       "will be invalidated\n");
-		return 0;
-	}
-
-	if (ctx.progress_enabled) {
-		ctx.tp = TASK_FREE_SPACE;
-		task_start(ctx.info);
-	}
-
 	while (1) {
+		ctx.item_count++;
 		cache = btrfs_lookup_first_block_group(root->fs_info, start);
 		if (!cache)
 			break;
@@ -5353,16 +5422,18 @@ static int check_space_cache(struct btrfs_root *root)
 		if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
 			ret = exclude_super_stripes(root, cache);
 			if (ret) {
-				fprintf(stderr, "could not exclude super stripes: %s\n",
-					strerror(-ret));
+				errno = -ret;
+				fprintf(stderr,
+					"could not exclude super stripes: %m\n");
 				error++;
 				continue;
 			}
 			ret = load_free_space_tree(root->fs_info, cache);
 			free_excluded_extents(root, cache);
 			if (ret < 0) {
-				fprintf(stderr, "could not load free space tree: %s\n",
-					strerror(-ret));
+				errno = -ret;
+				fprintf(stderr,
+					"could not load free space tree: %m\n");
 				error++;
 				continue;
 			}
@@ -5383,8 +5454,6 @@ static int check_space_cache(struct btrfs_root *root)
 		}
 	}
 
-	task_stop(ctx.info);
-
 	return error ? -EINVAL : 0;
 }
 
@@ -5654,6 +5723,7 @@ static int check_csums(struct btrfs_root *root)
 	}
 
 	while (1) {
+		ctx.item_count++;
 		if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
 			ret = btrfs_next_leaf(root, &path);
 			if (ret < 0) {
@@ -6056,19 +6126,21 @@ static int run_next_block(struct btrfs_root *root,
 			if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
 				ret = add_tree_backref(extent_cache,
 						key.objectid, 0, key.offset, 0);
-				if (ret < 0)
+				if (ret < 0) {
+					errno = -ret;
 					error(
-				"add_tree_backref failed (leaf tree block): %s",
-					      strerror(-ret));
+				"add_tree_backref failed (leaf tree block): %m");
+				}
 				continue;
 			}
 			if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
 				ret = add_tree_backref(extent_cache,
 						key.objectid, key.offset, 0, 0);
-				if (ret < 0)
+				if (ret < 0) {
+					errno = -ret;
 					error(
-				"add_tree_backref failed (leaf shared block): %s",
-					      strerror(-ret));
+			"add_tree_backref failed (leaf shared block): %m");
+				}
 				continue;
 			}
 			if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
@@ -6170,9 +6242,9 @@ static int run_next_block(struct btrfs_root *root,
 			ret = add_tree_backref(extent_cache, ptr, parent,
 					owner, 1);
 			if (ret < 0) {
+				errno = -ret;
 				error(
-				"add_tree_backref failed (non-leaf block): %s",
-				      strerror(-ret));
+				"add_tree_backref failed (non-leaf block): %m");
 				continue;
 			}
 
@@ -6232,8 +6304,7 @@ static int add_root_to_pending(struct extent_buffer *buf,
  * we're tracking for repair.  This hook makes sure we
  * remove any backrefs for blocks as we are fixing them.
  */
-static int free_extent_hook(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root,
+static int free_extent_hook(struct btrfs_fs_info *fs_info,
 			    u64 bytenr, u64 num_bytes, u64 parent,
 			    u64 root_objectid, u64 owner, u64 offset,
 			    int refs_to_drop)
@@ -6241,7 +6312,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans,
 	struct extent_record *rec;
 	struct cache_extent *cache;
 	int is_data;
-	struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
+	struct cache_tree *extent_cache = fs_info->fsck_extent_cache;
 
 	is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
 	cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
@@ -6800,7 +6871,7 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
 		goto out;
 
 	fprintf(stderr,
-		"attempting to repair backref discrepency for bytenr %llu\n",
+		"attempting to repair backref discrepancy for bytenr %llu\n",
 		rec->start);
 
 	/*
@@ -7780,7 +7851,7 @@ static int check_chunk_refs(struct chunk_record *chunk_rec,
 			    dev_extent_rec->length != length) {
 				if (!silent)
 					fprintf(stderr,
-						"Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
+"Chunk[%llu, %u, %llu] stripe[%llu, %llu] mismatch dev extent[%llu, %llu, %llu]\n",
 						chunk_rec->objectid,
 						chunk_rec->type,
 						chunk_rec->offset,
@@ -8047,6 +8118,7 @@ static int deal_root_from_list(struct list_head *list,
 		 * can maximize readahead.
 		 */
 		while (1) {
+			ctx.item_count++;
 			ret = run_next_block(root, bits, bits_nr, &last,
 					     pending, seen, reada, nodes,
 					     extent_cache, chunk_cache,
@@ -8075,6 +8147,89 @@ static int deal_root_from_list(struct list_head *list,
 	return ret;
 }
 
+/**
+ * parse_tree_roots - Go over all roots in the tree root and add each one to
+ *		      a list.
+ *
+ * @fs_info        - pointer to fs_info struct of the file system.
+ *
+ * @normal_trees   - list to contains all roots which don't have a drop
+ *		     operation in progress
+ *
+ * @dropping_trees - list containing all roots which have a drop operation
+ *		     pending
+ *
+ * Returns 0 on success or a negative value indicating an error.
+ */
+static int parse_tree_roots(struct btrfs_fs_info *fs_info,
+			   struct list_head *normal_trees,
+			   struct list_head *dropping_trees)
+{
+	struct btrfs_path path;
+	struct btrfs_key key;
+	struct btrfs_key found_key;
+	struct btrfs_root_item ri;
+	struct extent_buffer *leaf;
+	int slot;
+	int ret = 0;
+
+	btrfs_init_path(&path);
+	key.offset = 0;
+	key.objectid = 0;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
+	if (ret < 0)
+		goto out;
+	while (1) {
+		leaf = path.nodes[0];
+		slot = path.slots[0];
+		if (slot >= btrfs_header_nritems(path.nodes[0])) {
+			ret = btrfs_next_leaf(fs_info->tree_root, &path);
+			if (ret != 0)
+				break;
+			leaf = path.nodes[0];
+			slot = path.slots[0];
+		}
+		btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
+		if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
+			unsigned long offset;
+			u64 last_snapshot;
+			u8 level;
+
+			offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
+			read_extent_buffer(leaf, &ri, offset, sizeof(ri));
+			last_snapshot = btrfs_root_last_snapshot(&ri);
+			level = btrfs_root_level(&ri);
+			if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
+				ret = add_root_item_to_list(normal_trees,
+						found_key.objectid,
+						btrfs_root_bytenr(&ri),
+						last_snapshot, level,
+						0, NULL);
+				if (ret < 0)
+					break;
+			} else {
+				u64 objectid = found_key.objectid;
+
+				btrfs_disk_key_to_cpu(&found_key,
+						      &ri.drop_progress);
+				ret = add_root_item_to_list(dropping_trees,
+						objectid,
+						btrfs_root_bytenr(&ri),
+						last_snapshot, level,
+						ri.drop_level, &found_key);
+				if (ret < 0)
+					break;
+			}
+		}
+		path.slots[0]++;
+	}
+
+out:
+	btrfs_release_path(&path);
+	return ret;
+}
+
 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
 {
 	struct rb_root dev_cache;
@@ -8088,20 +8243,13 @@ static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
 	struct cache_tree nodes;
 	struct extent_io_tree excluded_extents;
 	struct cache_tree corrupt_blocks;
-	struct btrfs_path path;
-	struct btrfs_key key;
-	struct btrfs_key found_key;
 	int ret, err = 0;
 	struct block_info *bits;
 	int bits_nr;
-	struct extent_buffer *leaf;
-	int slot;
-	struct btrfs_root_item ri;
 	struct list_head dropping_trees;
 	struct list_head normal_trees;
 	struct btrfs_root *root1;
 	struct btrfs_root *root;
-	u64 objectid;
 	u8 level;
 
 	root = fs_info->fs_root;
@@ -8134,11 +8282,6 @@ static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
 		exit(1);
 	}
 
-	if (ctx.progress_enabled) {
-		ctx.tp = TASK_EXTENTS;
-		task_start(ctx.info);
-	}
-
 again:
 	root1 = fs_info->tree_root;
 	level = btrfs_header_level(root1->node);
@@ -8152,57 +8295,10 @@ again:
 				    root1->node->start, 0, level, 0, NULL);
 	if (ret < 0)
 		goto out;
-	btrfs_init_path(&path);
-	key.offset = 0;
-	key.objectid = 0;
-	key.type = BTRFS_ROOT_ITEM_KEY;
-	ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
+
+	ret = parse_tree_roots(fs_info, &normal_trees, &dropping_trees);
 	if (ret < 0)
 		goto out;
-	while (1) {
-		leaf = path.nodes[0];
-		slot = path.slots[0];
-		if (slot >= btrfs_header_nritems(path.nodes[0])) {
-			ret = btrfs_next_leaf(root, &path);
-			if (ret != 0)
-				break;
-			leaf = path.nodes[0];
-			slot = path.slots[0];
-		}
-		btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
-		if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
-			unsigned long offset;
-			u64 last_snapshot;
-
-			offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
-			read_extent_buffer(leaf, &ri, offset, sizeof(ri));
-			last_snapshot = btrfs_root_last_snapshot(&ri);
-			if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
-				level = btrfs_root_level(&ri);
-				ret = add_root_item_to_list(&normal_trees,
-						found_key.objectid,
-						btrfs_root_bytenr(&ri),
-						last_snapshot, level,
-						0, NULL);
-				if (ret < 0)
-					goto out;
-			} else {
-				level = btrfs_root_level(&ri);
-				objectid = found_key.objectid;
-				btrfs_disk_key_to_cpu(&found_key,
-						      &ri.drop_progress);
-				ret = add_root_item_to_list(&dropping_trees,
-						objectid,
-						btrfs_root_bytenr(&ri),
-						last_snapshot, level,
-						ri.drop_level, &found_key);
-				if (ret < 0)
-					goto out;
-			}
-		}
-		path.slots[0]++;
-	}
-	btrfs_release_path(&path);
 
 	/*
 	 * check_block can return -EAGAIN if it fixes something, please keep
@@ -8248,7 +8344,6 @@ again:
 		ret = err;
 
 out:
-	task_stop(ctx.info);
 	if (repair) {
 		free_corrupt_blocks_tree(fs_info->corrupt_blocks);
 		extent_io_tree_cleanup(&excluded_extents);
@@ -8290,8 +8385,6 @@ static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
 {
 	int ret;
 
-	if (!ctx.progress_enabled)
-		fprintf(stderr, "checking extents\n");
 	if (check_mode == CHECK_MODE_LOWMEM)
 		ret = check_chunks_and_extents_lowmem(fs_info);
 	else
@@ -8307,7 +8400,7 @@ static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
 }
 
 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
-			   struct btrfs_root *root, int overwrite)
+				  struct btrfs_root *root)
 {
 	struct extent_buffer *c;
 	struct extent_buffer *old = root->node;
@@ -8317,21 +8410,13 @@ static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
 
 	level = 0;
 
-	if (overwrite) {
-		c = old;
-		extent_buffer_get(c);
-		goto init;
-	}
 	c = btrfs_alloc_free_block(trans, root,
 				   root->fs_info->nodesize,
 				   root->root_key.objectid,
 				   &disk_key, level, 0, 0);
-	if (IS_ERR(c)) {
-		c = old;
-		extent_buffer_get(c);
-		overwrite = 1;
-	}
-init:
+	if (IS_ERR(c))
+		return PTR_ERR(c);
+
 	memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
 	btrfs_set_header_level(c, level);
 	btrfs_set_header_bytenr(c, c->start);
@@ -8350,9 +8435,7 @@ init:
 	/*
 	 * this case can happen in the following case:
 	 *
-	 * 1.overwrite previous root.
-	 *
-	 * 2.reinit reloc data root, this is because we skip pin
+	 * reinit reloc data root, this is because we skip pin
 	 * down reloc data tree before which means we can allocate
 	 * same block bytenr here.
 	 */
@@ -8537,7 +8620,7 @@ reinit_data_reloc:
 		goto out;
 	}
 	record_root_in_trans(trans, root);
-	ret = btrfs_fsck_reinit_root(trans, root, 0);
+	ret = btrfs_fsck_reinit_root(trans, root);
 	if (ret)
 		goto out;
 	ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
@@ -8571,7 +8654,7 @@ static int reinit_extent_tree(struct btrfs_trans_handle *trans,
 	 * first we need to walk all of the trees except the extent tree and pin
 	 * down/exclude the bytes that are in use so we don't overwrite any
 	 * existing metadata.
-	 * If pinnned, unpin will be done in the end of transaction.
+	 * If pinned, unpin will be done in the end of transaction.
 	 * If excluded, cleanup will be done in check_chunks_and_extents_lowmem.
 	 */
 again:
@@ -8603,7 +8686,7 @@ again:
 	}
 
 	/* Ok we can allocate now, reinit the extent root */
-	ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
+	ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root);
 	if (ret) {
 		fprintf(stderr, "extent root initialization failed\n");
 		/*
@@ -8633,7 +8716,7 @@ again:
 			fprintf(stderr, "Error adding block group\n");
 			return ret;
 		}
-		btrfs_extent_post_op(trans);
+		btrfs_run_delayed_refs(trans, -1);
 	}
 
 	ret = reset_balance(trans, fs_info);
@@ -9021,6 +9104,7 @@ static int build_roots_info_cache(struct btrfs_fs_info *info)
 		struct cache_extent *entry;
 		struct root_item_info *rii;
 
+		ctx.item_count++;
 		if (slot >= btrfs_header_nritems(leaf)) {
 			ret = btrfs_next_leaf(info->extent_root, &path);
 			if (ret < 0) {
@@ -9062,7 +9146,7 @@ static int build_roots_info_cache(struct btrfs_fs_info *info)
 		 * It's a valid extent/metadata item that has no inline ref,
 		 * but SHARED_BLOCK_REF or other shared references.
 		 * So we need to do extra check to avoid reading beyond leaf
-		 * boudnary.
+		 * boundary.
 		 */
 		if ((unsigned long)iref >= item_end)
 			goto next;
@@ -9339,7 +9423,6 @@ static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
 			ret = 1;
 			goto close_out;
 		}
-		printf("Clearing free space cache\n");
 		ret = clear_free_space_cache(fs_info);
 		if (ret) {
 			error("failed to clear free space cache");
@@ -9366,35 +9449,69 @@ close_out:
 	return ret;
 }
 
+static int validate_free_space_cache(struct btrfs_root *root)
+{
+	int ret;
+
+	if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
+	    btrfs_super_generation(root->fs_info->super_copy) !=
+	    btrfs_super_cache_generation(root->fs_info->super_copy)) {
+		printf(
+"cache and super generation don't match, space cache will be invalidated\n");
+		return 0;
+	}
+
+	ret = check_space_cache(root);
+	if (ret && btrfs_fs_compat_ro(global_info, FREE_SPACE_TREE) &&
+	    repair) {
+		ret = do_clear_free_space_cache(global_info, 2);
+		if (ret)
+			goto out;
+
+		ret = btrfs_create_free_space_tree(global_info);
+		if (ret)
+			error("couldn't repair freespace tree");
+	}
+
+out:
+	return ret ? -EINVAL : 0;
+}
+
 const char * const cmd_check_usage[] = {
 	"btrfs check [options] <device>",
 	"Check structural integrity of a filesystem (unmounted).",
 	"Check structural integrity of an unmounted filesystem. Verify internal",
 	"trees' consistency and item connectivity. In the repair mode try to",
 	"fix the problems found. ",
-	"WARNING: the repair mode is considered dangerous",
+	"WARNING: the repair mode is considered dangerous and should not be used",
+	"         without prior analysis of problems found on the filesystem."
 	"",
-	"-s|--super <superblock>     use this superblock copy",
-	"-b|--backup                 use the first valid backup root copy",
-	"--force                     skip mount checks, repair is not possible",
-	"--repair                    try to repair the filesystem",
-	"--readonly                  run in read-only mode (default)",
-	"--init-csum-tree            create a new CRC tree",
-	"--init-extent-tree          create a new extent tree",
-	"--mode <MODE>               allows choice of memory/IO trade-offs",
-	"                            where MODE is one of:",
-	"                            original - read inodes and extents to memory (requires",
-	"                                       more memory, does less IO)",
-	"                            lowmem   - try to use less memory but read blocks again",
-	"                                       when needed",
-	"--check-data-csum           verify checksums of data blocks",
-	"-Q|--qgroup-report          print a report on qgroup consistency",
-	"-E|--subvol-extents <subvolid>",
-	"                            print subvolume extents and sharing state",
-	"-r|--tree-root <bytenr>     use the given bytenr for the tree root",
-	"--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
-	"-p|--progress               indicate progress",
-	"--clear-space-cache v1|v2   clear space cache for v1 or v2",
+	"Options:",
+	"  starting point selection:",
+	"       -s|--super <superblock>     use this superblock copy",
+	"       -b|--backup                 use the first valid backup root copy",
+	"       -r|--tree-root <bytenr>     use the given bytenr for the tree root",
+	"       --chunk-root <bytenr>       use the given bytenr for the chunk tree root",
+	"  operation modes:",
+	"       --readonly                  run in read-only mode (default)",
+	"       --repair                    try to repair the filesystem",
+	"       --force                     skip mount checks, repair is not possible",
+	"       --mode <MODE>               allows choice of memory/IO trade-offs",
+	"                                   where MODE is one of:",
+	"                                   original - read inodes and extents to memory (requires",
+	"                                              more memory, does less IO)",
+	"                                   lowmem   - try to use less memory but read blocks again",
+	"                                              when needed (experimental)",
+	"  repair options:",
+	"       --init-csum-tree            create a new CRC tree",
+	"       --init-extent-tree          create a new extent tree",
+	"       --clear-space-cache v1|v2   clear space cache for v1 or v2",
+	"  check and reporting options:",
+	"       --check-data-csum           verify checksums of data blocks",
+	"       -Q|--qgroup-report          print a report on qgroup consistency",
+	"       -E|--subvol-extents <subvolid>",
+	"                                   print subvolume extents and sharing state",
+	"       -p|--progress               indicate progress",
 	NULL
 };
 
@@ -9559,14 +9676,16 @@ int cmd_check(int argc, char **argv)
 	if (repair && check_mode == CHECK_MODE_LOWMEM)
 		warning("low-memory mode repair support is only partial");
 
+	printf("Opening filesystem to check...\n");
+
 	radix_tree_init();
 	cache_tree_init(&root_cache);
 
 	ret = check_mounted(argv[optind]);
 	if (!force) {
 		if (ret < 0) {
-			error("could not check mount status: %s",
-					strerror(-ret));
+			errno = -ret;
+			error("could not check mount status: %m");
 			err |= !!ret;
 			goto err_out;
 		} else if (ret) {
@@ -9681,6 +9800,7 @@ int cmd_check(int argc, char **argv)
 			goto close_out;
 		}
 
+		trans->reinit_extent_tree = true;
 		if (init_extent_tree) {
 			printf("Creating a new extent tree\n");
 			ret = reinit_extent_tree(trans, info,
@@ -9692,7 +9812,7 @@ int cmd_check(int argc, char **argv)
 
 		if (init_csum_tree) {
 			printf("Reinitialize checksum tree\n");
-			ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
+			ret = btrfs_fsck_reinit_root(trans, info->csum_root);
 			if (ret) {
 				error("checksum tree initialization failed: %d",
 						ret);
@@ -9732,10 +9852,18 @@ int cmd_check(int argc, char **argv)
 	}
 
 	if (!init_extent_tree) {
+		if (!ctx.progress_enabled) {
+			fprintf(stderr, "[1/7] checking root items\n");
+		} else {
+			ctx.tp = TASK_ROOT_ITEMS;
+			task_start(ctx.info, &ctx.start_time, &ctx.item_count);
+		}
 		ret = repair_root_items(info);
+		task_stop(ctx.info);
 		if (ret < 0) {
 			err = !!ret;
-			error("failed to repair root items: %s", strerror(-ret));
+			errno = -ret;
+			error("failed to repair root items: %m");
 			goto close_out;
 		}
 		if (repair) {
@@ -9751,9 +9879,18 @@ int cmd_check(int argc, char **argv)
 			err |= ret;
 			goto close_out;
 		}
+	} else {
+		fprintf(stderr, "[1/7] checking root items... skipped\n");
 	}
 
+	if (!ctx.progress_enabled) {
+		fprintf(stderr, "[2/7] checking extents\n");
+	} else {
+		ctx.tp = TASK_EXTENTS;
+		task_start(ctx.info, &ctx.start_time, &ctx.item_count);
+	}
 	ret = do_check_chunks_and_extents(info);
+	task_stop(ctx.info);
 	err |= !!ret;
 	if (ret)
 		error(
@@ -9762,21 +9899,21 @@ int cmd_check(int argc, char **argv)
 	/* Only re-check super size after we checked and repaired the fs */
 	err |= !is_super_size_valid(info);
 
+	is_free_space_tree = btrfs_fs_compat_ro(info, FREE_SPACE_TREE);
+
 	if (!ctx.progress_enabled) {
-		if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
-			fprintf(stderr, "checking free space tree\n");
+		if (is_free_space_tree)
+			fprintf(stderr, "[3/7] checking free space tree\n");
 		else
-			fprintf(stderr, "checking free space cache\n");
+			fprintf(stderr, "[3/7] checking free space cache\n");
+	} else {
+		ctx.tp = TASK_FREE_SPACE;
+		task_start(ctx.info, &ctx.start_time, &ctx.item_count);
 	}
-	ret = check_space_cache(root);
+
+	ret = validate_free_space_cache(root);
+	task_stop(ctx.info);
 	err |= !!ret;
-	if (ret) {
-		if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
-			error("errors found in free space tree");
-		else
-			error("errors found in free space cache");
-		goto out;
-	}
 
 	/*
 	 * We used to have to have these hole extents in between our real
@@ -9785,19 +9922,34 @@ int cmd_check(int argc, char **argv)
 	 * ignore it when this happens.
 	 */
 	no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
+	if (!ctx.progress_enabled) {
+		fprintf(stderr, "[4/7] checking fs roots\n");
+	} else {
+		ctx.tp = TASK_FS_ROOTS;
+		task_start(ctx.info, &ctx.start_time, &ctx.item_count);
+	}
+
 	ret = do_check_fs_roots(info, &root_cache);
+	task_stop(ctx.info);
 	err |= !!ret;
 	if (ret) {
 		error("errors found in fs roots");
 		goto out;
 	}
 
-	if (check_data_csum)
-		fprintf(stderr, "checking csums against data\n");
-	else
-		fprintf(stderr,
-			"checking only csum items (without verifying data)\n");
+	if (!ctx.progress_enabled) {
+		if (check_data_csum)
+			fprintf(stderr, "[5/7] checking csums against data\n");
+		else
+			fprintf(stderr,
+		"[5/7] checking only csums items (without verifying data)\n");
+	} else {
+		ctx.tp = TASK_CSUMS;
+		task_start(ctx.info, &ctx.start_time, &ctx.item_count);
+	}
+
 	ret = check_csums(root);
+	task_stop(ctx.info);
 	/*
 	 * Data csum error is not fatal, and it may indicate more serious
 	 * corruption, continue checking.
@@ -9806,15 +9958,25 @@ int cmd_check(int argc, char **argv)
 		error("errors found in csum tree");
 	err |= !!ret;
 
-	fprintf(stderr, "checking root refs\n");
 	/* For low memory mode, check_fs_roots_v2 handles root refs */
-	if (check_mode != CHECK_MODE_LOWMEM) {
+        if (check_mode != CHECK_MODE_LOWMEM) {
+		if (!ctx.progress_enabled) {
+			fprintf(stderr, "[6/7] checking root refs\n");
+		} else {
+			ctx.tp = TASK_ROOT_REFS;
+			task_start(ctx.info, &ctx.start_time, &ctx.item_count);
+		}
+
 		ret = check_root_refs(root, &root_cache);
+		task_stop(ctx.info);
 		err |= !!ret;
 		if (ret) {
 			error("errors found in root refs");
 			goto out;
 		}
+	} else {
+		fprintf(stderr,
+	"[6/7] checking root refs done with fs roots in lowmem mode, skipping\n");
 	}
 
 	while (repair && !list_empty(&root->fs_info->recow_ebs)) {
@@ -9844,8 +10006,15 @@ int cmd_check(int argc, char **argv)
 	}
 
 	if (info->quota_enabled) {
-		fprintf(stderr, "checking quota groups\n");
+		qgroup_set_item_count_ptr(&ctx.item_count);
+		if (!ctx.progress_enabled) {
+			fprintf(stderr, "[7/7] checking quota groups\n");
+		} else {
+			ctx.tp = TASK_QGROUPS;
+			task_start(ctx.info, &ctx.start_time, &ctx.item_count);
+		}
 		ret = qgroup_verify_all(info);
+		task_stop(ctx.info);
 		err |= !!ret;
 		if (ret) {
 			error("failed to check quota groups");
@@ -9860,6 +10029,9 @@ int cmd_check(int argc, char **argv)
 		if (qgroup_report_ret && (!qgroups_repaired || ret))
 			err |= qgroup_report_ret;
 		ret = 0;
+	} else {
+		fprintf(stderr,
+		"[7/7] checking quota groups skipped (not enabled on this FS)\n");
 	}
 
 	if (!list_empty(&root->fs_info->recow_ebs)) {
diff --git a/check/mode-common.c b/check/mode-common.c
index 15e2bbd1..2efa4dc5 100644
--- a/check/mode-common.c
+++ b/check/mode-common.c
@@ -450,7 +450,8 @@ int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
 			  BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
 			  mode);
 	if (ret < 0) {
-		error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
+		errno = -ret;
+		error("failed to create '%s' dir: %m", dir_name);
 		goto out;
 	}
 	ret = btrfs_add_link(trans, root, ino, lost_found_ino,
@@ -474,8 +475,9 @@ int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
 				     name_len, filetype, NULL, 1, 0);
 	}
 	if (ret < 0) {
-		error("failed to link the inode %llu to %s dir: %s",
-		      ino, dir_name, strerror(-ret));
+		errno = -ret;
+		error("failed to link the inode %llu to %s dir: %m",
+		      ino, dir_name);
 		goto out;
 	}
 
@@ -491,7 +493,7 @@ out:
 }
 
 /*
- * Extra (optional) check for dev_item size to report possbile problem on a new
+ * Extra (optional) check for dev_item size to report possible problem on a new
  * kernel.
  */
 void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
diff --git a/check/mode-common.h b/check/mode-common.h
index a4748578..6b05f8ba 100644
--- a/check/mode-common.h
+++ b/check/mode-common.h
@@ -38,6 +38,26 @@ struct node_refs {
 	int full_backref[BTRFS_MAX_LEVEL];
 };
 
+enum task_position {
+	TASK_ROOT_ITEMS,
+	TASK_EXTENTS,
+	TASK_FREE_SPACE,
+	TASK_FS_ROOTS,
+	TASK_CSUMS,
+	TASK_ROOT_REFS,
+	TASK_QGROUPS,
+	TASK_NOTHING, /* has to be the last element */
+};
+
+struct task_ctx {
+	int progress_enabled;
+	enum task_position tp;
+	time_t start_time;
+	u64 item_count;
+
+	struct task_info *info;
+};
+
 extern u64 bytes_used;
 extern u64 total_csum_bytes;
 extern u64 total_btree_bytes;
diff --git a/check/mode-lowmem.c b/check/mode-lowmem.c
index 66da4531..6fb397ab 100644
--- a/check/mode-lowmem.c
+++ b/check/mode-lowmem.c
@@ -284,7 +284,8 @@ static int modify_block_groups_cache(struct btrfs_fs_info *fs_info, u64 flags,
 	btrfs_init_path(&path);
 	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
 	if (ret < 0) {
-		error("fail to search block groups due to %s", strerror(-ret));
+		errno = -ret;
+		error("fail to search block groups due to %m");
 		goto out;
 	}
 
@@ -341,19 +342,22 @@ static int create_chunk_and_block_group(struct btrfs_fs_info *fs_info,
 	trans = btrfs_start_transaction(root, 1);
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
-		error("error starting transaction %s", strerror(-ret));
+		errno = -ret;
+		error("error starting transaction %m");
 		return ret;
 	}
 	ret = btrfs_alloc_chunk(trans, fs_info, start, nbytes, flags);
 	if (ret) {
-		error("fail to allocate new chunk %s", strerror(-ret));
+		errno = -ret;
+		error("fail to allocate new chunk %m");
 		goto out;
 	}
 	ret = btrfs_make_block_group(trans, fs_info, 0, flags, *start,
 				     *nbytes);
 	if (ret) {
-		error("fail to make block group for chunk %llu %llu %s",
-		      *start, *nbytes, strerror(-ret));
+		errno = -ret;
+		error("fail to make block group for chunk %llu %llu %m",
+		      *start, *nbytes);
 		goto out;
 	}
 out:
@@ -521,8 +525,10 @@ static int avoid_extents_overwrite(struct btrfs_fs_info *fs_info)
 	"Try to exclude all metadata blcoks and extents, it may be slow\n");
 	ret = exclude_metadata_blocks(fs_info);
 out:
-	if (ret)
-		error("failed to avoid extents overwrite %s", strerror(-ret));
+	if (ret) {
+		errno = -ret;
+		error("failed to avoid extents overwrite %m");
+	}
 	return ret;
 }
 
@@ -552,7 +558,8 @@ static int repair_block_accounting(struct btrfs_fs_info *fs_info)
 	trans = btrfs_start_transaction(root, 1);
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
-		error("fail to start transaction %s", strerror(-ret));
+		errno = -ret;
+		error("fail to start transaction: %m");
 		return ret;
 	}
 
@@ -629,7 +636,8 @@ static int repair_tree_block_ref(struct btrfs_root *root,
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
 		trans = NULL;
-		error("fail to start transaction %s", strerror(-ret));
+		errno = -ret;
+		error("fail to start transaction: %m");
 		goto out;
 	}
 	/* insert an extent item */
@@ -701,9 +709,10 @@ out:
 		btrfs_commit_transaction(trans, extent_root);
 	btrfs_release_path(&path);
 	if (ret) {
+		errno = -ret;
 		error(
-	"failed to repair tree block ref start %llu root %llu due to %s",
-		      bytenr, root->objectid, strerror(-ret));
+	"failed to repair tree block ref start %llu root %llu due to %m",
+		      bytenr, root->objectid);
 	} else {
 		printf("Added one tree block ref start %llu %s %llu\n",
 		       bytenr, parent ? "parent" : "root",
@@ -944,7 +953,7 @@ out:
  * returns 0 means success.
  * returns not 0 means on error;
  */
-int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
+static int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
 			  u64 index, char *name, int name_len, u8 filetype,
 			  int err)
 {
@@ -1735,23 +1744,163 @@ static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
 	return ret;
 }
 
+static int repair_inline_ram_bytes(struct btrfs_root *root,
+				   struct btrfs_path *path, u64 *ram_bytes_ret)
+{
+	struct btrfs_trans_handle *trans;
+	struct btrfs_key key;
+	struct btrfs_file_extent_item *fi;
+	struct btrfs_item *item;
+	u32 on_disk_data_len;
+	int ret;
+	int recover_ret;
+
+	trans = btrfs_start_transaction(root, 1);
+	if (IS_ERR(trans)) {
+		ret = PTR_ERR(trans);
+		return ret;
+	}
+	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+	btrfs_release_path(path);
+	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+	/* Not really possible */
+	if (ret > 0) {
+		ret = -ENOENT;
+		btrfs_release_path(path);
+		goto recover;
+	}
+	if (ret < 0)
+		goto recover;
+
+	item = btrfs_item_nr(path->slots[0]);
+	on_disk_data_len = btrfs_file_extent_inline_item_len(path->nodes[0],
+			item);
+
+	fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+			    struct btrfs_file_extent_item);
+	if (btrfs_file_extent_type(path->nodes[0], fi) !=
+			BTRFS_FILE_EXTENT_INLINE ||
+	    btrfs_file_extent_compression(path->nodes[0], fi) !=
+			BTRFS_COMPRESS_NONE)
+		return -EINVAL;
+	btrfs_set_file_extent_ram_bytes(path->nodes[0], fi, on_disk_data_len);
+	btrfs_mark_buffer_dirty(path->nodes[0]);
+
+	ret = btrfs_commit_transaction(trans, root);
+	if (!ret) {
+		printf(
+	"Successfully repaired inline ram_bytes for root %llu ino %llu\n",
+			root->objectid, key.objectid);
+		*ram_bytes_ret = on_disk_data_len;
+	}
+	return ret;
+
+recover:
+	/*
+	 * COW search failed, mostly due to the extra COW work (extent
+	 * allocation, etc).  Since we have a good path from before, readonly
+	 * search should still work, or later checks will fail due to empty
+	 * path.
+	 */
+	recover_ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+
+	/* This really shouldn't happen, or we have a big problem */
+	ASSERT(recover_ret == 0);
+	return ret;
+}
+
+static int check_file_extent_inline(struct btrfs_root *root,
+				    struct btrfs_path *path, u64 *size,
+				    u64 *end)
+{
+	u32 max_inline_extent_size = min_t(u32, root->fs_info->sectorsize - 1,
+				BTRFS_MAX_INLINE_DATA_SIZE(root->fs_info));
+	struct extent_buffer *node = path->nodes[0];
+	struct btrfs_item *e = btrfs_item_nr(path->slots[0]);
+	struct btrfs_file_extent_item *fi;
+	struct btrfs_key fkey;
+	u64 extent_num_bytes;
+	u32 item_inline_len;
+	int ret;
+	int compressed = 0;
+	int err = 0;
+
+	fi = btrfs_item_ptr(node, path->slots[0], struct btrfs_file_extent_item);
+	item_inline_len = btrfs_file_extent_inline_item_len(node, e);
+	extent_num_bytes = btrfs_file_extent_ram_bytes(node, fi);
+	compressed = btrfs_file_extent_compression(node, fi);
+	btrfs_item_key_to_cpu(node, &fkey, path->slots[0]);
+
+	if (extent_num_bytes == 0) {
+		error(
+"root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
+				root->objectid, fkey.objectid, fkey.offset);
+		err |= FILE_EXTENT_ERROR;
+	}
+
+	if (compressed) {
+		if (extent_num_bytes > root->fs_info->sectorsize) {
+			error(
+"root %llu EXTENT_DATA[%llu %llu] too large inline extent ram size, have %llu, max: %u",
+				root->objectid, fkey.objectid, fkey.offset,
+				extent_num_bytes, root->fs_info->sectorsize - 1);
+			err |= FILE_EXTENT_ERROR;
+		}
+
+		if (item_inline_len > max_inline_extent_size) {
+			error(
+"root %llu EXTENT_DATA[%llu %llu] too large inline extent on-disk size, have %u, max: %u",
+				root->objectid, fkey.objectid, fkey.offset,
+				item_inline_len, max_inline_extent_size);
+			err |= FILE_EXTENT_ERROR;
+		}
+	} else {
+		if (extent_num_bytes > max_inline_extent_size) {
+			error(
+"root %llu EXTENT_DATA[%llu %llu] too large inline extent size, have %llu, max: %u",
+				root->objectid, fkey.objectid, fkey.offset,
+				extent_num_bytes, max_inline_extent_size);
+			err |= FILE_EXTENT_ERROR;
+		}
+
+		if (extent_num_bytes != item_inline_len) {
+			error(
+"root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
+				root->objectid, fkey.objectid, fkey.offset,
+				extent_num_bytes, item_inline_len);
+			if (repair) {
+				ret = repair_inline_ram_bytes(root, path,
+							      &extent_num_bytes);
+				if (ret)
+					err |= FILE_EXTENT_ERROR;
+			} else {
+				err |= FILE_EXTENT_ERROR;
+			}
+		}
+	}
+	*end += extent_num_bytes;
+	*size += extent_num_bytes;
+
+	return err;
+}
+
 /*
  * Check file extent datasum/hole, update the size of the file extents,
  * check and update the last offset of the file extent.
  *
  * @root:	the root of fs/file tree.
- * @fkey:	the key of the file extent.
  * @nodatasum:	INODE_NODATASUM feature.
  * @size:	the sum of all EXTENT_DATA items size for this inode.
  * @end:	the offset of the last extent.
  *
  * Return 0 if no error occurred.
  */
-static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
-			     struct extent_buffer *node, int slot,
+static int check_file_extent(struct btrfs_root *root, struct btrfs_path *path,
 			     unsigned int nodatasum, u64 *size, u64 *end)
 {
 	struct btrfs_file_extent_item *fi;
+	struct btrfs_key fkey;
+	struct extent_buffer *node = path->nodes[0];
 	u64 disk_bytenr;
 	u64 disk_num_bytes;
 	u64 extent_num_bytes;
@@ -1759,78 +1908,31 @@ static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
 	u64 csum_found;		/* In byte size, sectorsize aligned */
 	u64 search_start;	/* Logical range start we search for csum */
 	u64 search_len;		/* Logical range len we search for csum */
-	u32 max_inline_extent_size = min_t(u32, root->fs_info->sectorsize - 1,
-				BTRFS_MAX_INLINE_DATA_SIZE(root->fs_info));
 	unsigned int extent_type;
 	unsigned int is_hole;
+	int slot = path->slots[0];
 	int compressed = 0;
 	int ret;
 	int err = 0;
 
+	btrfs_item_key_to_cpu(node, &fkey, slot);
 	fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
-
-	/* Check inline extent */
 	extent_type = btrfs_file_extent_type(node, fi);
-	if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
-		struct btrfs_item *e = btrfs_item_nr(slot);
-		u32 item_inline_len;
-
-		item_inline_len = btrfs_file_extent_inline_item_len(node, e);
-		extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
-		compressed = btrfs_file_extent_compression(node, fi);
-		if (extent_num_bytes == 0) {
-			error(
-		"root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
-				root->objectid, fkey->objectid, fkey->offset);
-			err |= FILE_EXTENT_ERROR;
-		}
-		if (compressed) {
-			if (extent_num_bytes > root->fs_info->sectorsize) {
-				error(
-"root %llu EXTENT_DATA[%llu %llu] too large inline extent ram size, have %llu, max: %u",
-					root->objectid, fkey->objectid,
-					fkey->offset, extent_num_bytes,
-					root->fs_info->sectorsize - 1);
-				err |= FILE_EXTENT_ERROR;
-			}
-			if (item_inline_len > max_inline_extent_size) {
-				error(
-"root %llu EXTENT_DATA[%llu %llu] too large inline extent on-disk size, have %u, max: %u",
-					root->objectid, fkey->objectid,
-					fkey->offset, item_inline_len,
-					max_inline_extent_size);
-				err |= FILE_EXTENT_ERROR;
-			}
-		} else {
-			if (extent_num_bytes > max_inline_extent_size) {
- 			error(
- "root %llu EXTENT_DATA[%llu %llu] too large inline extent size, have %llu, max: %u",
- 				root->objectid, fkey->objectid, fkey->offset,
- 				extent_num_bytes, max_inline_extent_size);
-				err |= FILE_EXTENT_ERROR;
-			}
-		}
-		if (!compressed && extent_num_bytes != item_inline_len) {
-			error(
-		"root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
-				root->objectid, fkey->objectid, fkey->offset,
-				extent_num_bytes, item_inline_len);
-			err |= FILE_EXTENT_ERROR;
-		}
-		*end += extent_num_bytes;
-		*size += extent_num_bytes;
-		return err;
-	}
 
 	/* Check extent type */
 	if (extent_type != BTRFS_FILE_EXTENT_REG &&
-			extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
+	    extent_type != BTRFS_FILE_EXTENT_PREALLOC &&
+	    extent_type != BTRFS_FILE_EXTENT_INLINE) {
 		err |= FILE_EXTENT_ERROR;
 		error("root %llu EXTENT_DATA[%llu %llu] type bad",
-		      root->objectid, fkey->objectid, fkey->offset);
+		      root->objectid, fkey.objectid, fkey.offset);
 		return err;
 	}
 
+	/* Check inline extent */
+	if (extent_type == BTRFS_FILE_EXTENT_INLINE)
+		return check_file_extent_inline(root, path, size, end);
+
 	/* Check REG_EXTENT/PREALLOC_EXTENT */
 	disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
 	disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
@@ -1864,12 +1966,12 @@ static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
 	if (csum_found > 0 && nodatasum) {
 		err |= ODD_CSUM_ITEM;
 		error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
-		      root->objectid, fkey->objectid, fkey->offset);
+		      root->objectid, fkey.objectid, fkey.offset);
 	} else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
 		   !is_hole && (ret < 0 || csum_found < search_len)) {
 		err |= CSUM_ITEM_MISSING;
 		error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
-		      root->objectid, fkey->objectid, fkey->offset,
+		      root->objectid, fkey.objectid, fkey.offset,
 		      csum_found, search_len);
 	} else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
 		   csum_found > 0) {
@@ -1881,22 +1983,22 @@ static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
 			err |= ODD_CSUM_ITEM;
 			error(
 "root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
-			      root->objectid, fkey->objectid, fkey->offset,
+			      root->objectid, fkey.objectid, fkey.offset,
 			      csum_found);
 		}
 	}
 
 	/* Check EXTENT_DATA hole */
-	if (!no_holes && *end != fkey->offset) {
+	if (!no_holes && *end != fkey.offset) {
 		if (repair)
-			ret = punch_extent_hole(root, fkey->objectid,
-						*end, fkey->offset - *end);
+			ret = punch_extent_hole(root, fkey.objectid,
+						*end, fkey.offset - *end);
 		if (!repair || ret) {
 			err |= FILE_EXTENT_ERROR;
 			error(
 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
-				root->objectid, fkey->objectid, fkey->offset,
-				fkey->objectid, *end);
+				root->objectid, fkey.objectid, fkey.offset,
+				fkey.objectid, *end);
 		}
 	}
 
@@ -2374,9 +2476,8 @@ static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path)
 					root->objectid, inode_id, key.objectid,
 					key.offset);
 			}
-			ret = check_file_extent(root, &key, node, slot,
-						nodatasum, &extent_size,
-						&extent_end);
+			ret = check_file_extent(root, path, nodatasum,
+						&extent_size, &extent_end);
 			err |= ret;
 			break;
 		case BTRFS_XATTR_ITEM_KEY:
@@ -2535,7 +2636,7 @@ again:
 	if (err & LAST_ITEM)
 		goto out;
 
-	/* still have inode items in thie leaf */
+	/* still have inode items in this leaf */
 	if (cur->start == cur_bytenr)
 		goto again;
 
@@ -2573,7 +2674,7 @@ out:
 
 /*
  * @level           if @level == -1 means extent data item
- *                  else normal treeblocl.
+ *                  else normal treeblock.
  */
 static int should_check_extent_strictly(struct btrfs_root *root,
 					struct node_refs *nrefs, int level)
@@ -2914,7 +3015,8 @@ static int repair_extent_data_item(struct btrfs_root *root,
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
 		trans = NULL;
-		error("fail to start transaction %s", strerror(-ret));
+		errno = -ret;
+		error("fail to start transaction: %m");
 		goto out;
 	}
 	/* insert an extent item */
@@ -3604,6 +3706,18 @@ static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
 		if (slot >= btrfs_header_nritems(leaf) ||
 		    btrfs_header_owner(leaf) != root_id)
 			goto next;
+		/*
+		 * For tree blocks have been relocated, data backref are
+		 * shared instead of keyed. Do not account it.
+		 */
+		if (btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
+			/*
+			 * skip the leaf to speed up.
+			 */
+			slot = btrfs_header_nritems(leaf);
+			goto next;
+		}
+
 		btrfs_item_key_to_cpu(leaf, &key, slot);
 		if (key.objectid != objectid ||
 		    key.type != BTRFS_EXTENT_DATA_KEY)
@@ -3716,7 +3830,8 @@ static int repair_extent_item(struct btrfs_root *root, struct btrfs_path *path,
 	trans = btrfs_start_transaction(extent_root, 1);
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
-		error("fail to start transaction %s", strerror(-ret));
+		errno = -ret;
+		error("fail to start transaction: %m");
 		/* nothing happened */
 		ret = 0;
 		goto out;
@@ -4177,7 +4292,8 @@ static int repair_chunk_item(struct btrfs_root *chunk_root,
 	trans = btrfs_start_transaction(extent_root, 1);
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
-		error("fail to start transaction %s", strerror(-ret));
+		errno = -ret;
+		error("fail to start transaction: %m");
 		return ret;
 	}
 
@@ -4212,7 +4328,8 @@ static int delete_extent_tree_item(struct btrfs_root *root,
 	trans = btrfs_start_transaction(root, 1);
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
-		error("fail to start transaction %s", strerror(-ret));
+		errno = -ret;
+		error("fail to start transaction: %m");
 		goto out;
 	}
 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
@@ -4719,6 +4836,7 @@ static int check_btrfs_root(struct btrfs_root *root, int check_all)
 	}
 
 	while (1) {
+		ctx.item_count++;
 		ret = walk_down_tree(root, &path, &level, &nrefs, check_all);
 
 		if (ret > 0)
diff --git a/check/mode-lowmem.h b/check/mode-lowmem.h
index 91f7b6b1..0ad2a9e3 100644
--- a/check/mode-lowmem.h
+++ b/check/mode-lowmem.h
@@ -56,7 +56,7 @@
 #define BACKREF_MISMATCH	(1 << 1) /* Backref exists but does not match */
 #define BYTES_UNALIGNED		(1 << 2) /* Some bytes are not aligned */
 #define REFERENCER_MISSING	(1 << 3) /* Referencer not found */
-#define REFERENCER_MISMATCH	(1 << 4) /* Referenceer found but does not match */
+#define REFERENCER_MISMATCH	(1 << 4) /* Referencer found but does not match */
 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
 #define ITEM_SIZE_MISMATCH	(1 << 5) /* Bad item size */
 #define UNKNOWN_TYPE		(1 << 6) /* Unknown type */
diff --git a/check/mode-original.h b/check/mode-original.h
index 13cfa5b9..ec2842e0 100644
--- a/check/mode-original.h
+++ b/check/mode-original.h
@@ -187,6 +187,7 @@ struct file_extent_hole {
 #define I_ERR_FILE_EXTENT_ORPHAN	(1 << 14)
 #define I_ERR_FILE_EXTENT_TOO_LARGE	(1 << 15)
 #define I_ERR_ODD_INODE_FLAGS		(1 << 16)
+#define I_ERR_INLINE_RAM_BYTES_WRONG	(1 << 17)
 
 struct inode_record {
 	struct list_head backrefs;
diff --git a/chunk-recover.c b/chunk-recover.c
index 705bcf52..1d30db51 100644
--- a/chunk-recover.c
+++ b/chunk-recover.c
@@ -1492,7 +1492,7 @@ out:
 	return ERR_PTR(ret);
 }
 
-static int recover_prepare(struct recover_control *rc, char *path)
+static int recover_prepare(struct recover_control *rc, const char *path)
 {
 	int ret;
 	int fd;
@@ -2296,7 +2296,7 @@ static void validate_rebuild_chunks(struct recover_control *rc)
 /*
  * Return 0 when successful, < 0 on error and > 0 if aborted by user
  */
-int btrfs_recover_chunk_tree(char *path, int verbose, int yes)
+int btrfs_recover_chunk_tree(const char *path, int verbose, int yes)
 {
 	int ret = 0;
 	struct btrfs_root *root = NULL;
diff --git a/cmds-balance.c b/cmds-balance.c
index 0c91bdf1..6cc26c35 100644
--- a/cmds-balance.c
+++ b/cmds-balance.c
@@ -528,6 +528,7 @@ static int cmd_balance_start(int argc, char **argv)
 
 	memset(&args, 0, sizeof(args));
 
+	optind = 0;
 	while (1) {
 		enum { GETOPT_VAL_FULL_BALANCE = 256,
 			GETOPT_VAL_BACKGROUND = 257 };
@@ -831,6 +832,7 @@ static int cmd_balance_status(int argc, char **argv)
 	int verbose = 0;
 	int ret;
 
+	optind = 0;
 	while (1) {
 		int opt;
 		static const struct option longopts[] = {
diff --git a/cmds-device.c b/cmds-device.c
index 86459d1b..d3102ba7 100644
--- a/cmds-device.c
+++ b/cmds-device.c
@@ -57,6 +57,7 @@ static int cmd_device_add(int argc, char **argv)
 	int force = 0;
 	int last_dev;
 
+	optind = 0;
 	while (1) {
 		int c;
 		static const struct option long_options[] = {
@@ -267,6 +268,7 @@ static int cmd_device_scan(int argc, char **argv)
 	int all = 0;
 	int ret = 0;
 
+	optind = 0;
 	while (1) {
 		int c;
 		static const struct option long_options[] = {
@@ -403,6 +405,7 @@ static int cmd_device_stats(int argc, char **argv)
 	__u64 flags = 0;
 	DIR *dirstream = NULL;
 
+	optind = 0;
 	while (1) {
 		int c;
 		static const struct option long_options[] = {
@@ -439,8 +442,8 @@ static int cmd_device_stats(int argc, char **argv)
 
 	ret = get_fs_info(dev_path, &fi_args, &di_args);
 	if (ret) {
-		error("getting device info for %s failed: %s", dev_path,
-			strerror(-ret));
+		errno = -ret;
+		error("getting device info for %s failed: %m", dev_path);
 		err = 1;
 		goto out;
 	}
@@ -526,7 +529,7 @@ static const char * const cmd_device_usage_usage[] = {
 	NULL
 };
 
-static int _cmd_device_usage(int fd, char *path, unsigned unit_mode)
+static int _cmd_device_usage(int fd, const char *path, unsigned unit_mode)
 {
 	int i;
 	int ret = 0;
diff --git a/cmds-fi-du.c b/cmds-fi-du.c
index 7e6bb7f6..496a55b8 100644
--- a/cmds-fi-du.c
+++ b/cmds-fi-du.c
@@ -406,9 +406,10 @@ static int du_walk_dir(struct du_dir_ctxt *ctxt, struct rb_root *shared_extents)
 					ret = 0;
 					continue;
 				} else if (ret) {
+					errno = -ret;
 					fprintf(stderr,
-						"failed to walk dir/file: %s :%s\n",
-						entry->d_name, strerror(-ret));
+					"failed to walk dir/file: %s : %m\n",
+						entry->d_name);
 					break;
 				}
 
@@ -565,6 +566,7 @@ int cmd_filesystem_du(int argc, char **argv)
 
 	unit_mode = get_unit_mode_from_arg(&argc, argv, 1);
 
+	optind = 0;
 	while (1) {
 		static const struct option long_options[] = {
 			{ "summarize", no_argument, NULL, 's'},
@@ -600,8 +602,8 @@ int cmd_filesystem_du(int argc, char **argv)
 	for (i = optind; i < argc; i++) {
 		ret = du_add_file(argv[i], AT_FDCWD, NULL, NULL, NULL, 1);
 		if (ret) {
-			error("cannot check space of '%s': %s", argv[i],
-					strerror(-ret));
+			errno = -ret;
+			error("cannot check space of '%s': %m", argv[i]);
 			err = 1;
 		}
 
diff --git a/cmds-fi-usage.c b/cmds-fi-usage.c
index 3bd2ccdf..dca2e8d0 100644
--- a/cmds-fi-usage.c
+++ b/cmds-fi-usage.c
@@ -227,7 +227,7 @@ static int cmp_btrfs_ioctl_space_info(const void *a, const void *b)
 /*
  * This function load all the information about the space usage
  */
-static struct btrfs_ioctl_space_args *load_space_info(int fd, char *path)
+static struct btrfs_ioctl_space_args *load_space_info(int fd, const char *path)
 {
 	struct btrfs_ioctl_space_args *sargs = NULL, *sargs_orig = NULL;
 	int ret, count;
@@ -305,7 +305,7 @@ static void get_raid56_used(struct chunk_info *chunks, int chunkcount,
 #define	MIN_UNALOCATED_THRESH	SZ_16M
 static int print_filesystem_usage_overall(int fd, struct chunk_info *chunkinfo,
 		int chunkcount, struct device_info *devinfo, int devcount,
-		char *path, unsigned unit_mode)
+		const char *path, unsigned unit_mode)
 {
 	struct btrfs_ioctl_space_args *sargs = NULL;
 	int i;
@@ -933,7 +933,7 @@ static void _cmd_filesystem_usage_linear(unsigned unit_mode,
 static int print_filesystem_usage_by_chunk(int fd,
 		struct chunk_info *chunkinfo, int chunkcount,
 		struct device_info *devinfo, int devcount,
-		char *path, unsigned unit_mode, int tabular)
+		const char *path, unsigned unit_mode, int tabular)
 {
 	struct btrfs_ioctl_space_args *sargs;
 	int ret = 0;
@@ -974,6 +974,7 @@ int cmd_filesystem_usage(int argc, char **argv)
 
 	unit_mode = get_unit_mode_from_arg(&argc, argv, 1);
 
+	optind = 0;
 	while (1) {
 		int c;
 
diff --git a/cmds-filesystem.c b/cmds-filesystem.c
index 30a50bf5..d1af21ee 100644
--- a/cmds-filesystem.c
+++ b/cmds-filesystem.c
@@ -145,7 +145,8 @@ static int cmd_filesystem_df(int argc, char **argv)
 		print_df(sargs, unit_mode);
 		free(sargs);
 	} else {
-		error("get_df failed %s", strerror(-ret));
+		errno = -ret;
+		error("get_df failed: %m");
 	}
 
 	close_file_or_dir(fd, dirstream);
@@ -685,6 +686,7 @@ static int cmd_filesystem_show(int argc, char **argv)
 
 	unit_mode = get_unit_mode_from_arg(&argc, argv, 0);
 
+	optind = 0;
 	while (1) {
 		int c;
 		static const struct option long_options[] = {
@@ -924,6 +926,7 @@ static int cmd_filesystem_defrag(int argc, char **argv)
 	defrag_global_errors = 0;
 	defrag_global_verbose = 0;
 	defrag_global_errors = 0;
+	optind = 0;
 	while(1) {
 		int c = getopt(argc, argv, "vrc::fs:l:t:");
 		if (c < 0)
@@ -1052,8 +1055,8 @@ static int cmd_filesystem_defrag(int argc, char **argv)
 				break;
 			}
 			if (ret) {
-				error("defrag failed on %s: %s", argv[i],
-				      strerror(defrag_err));
+				errno = defrag_err;
+				error("defrag failed on %s: %m", argv[i]);
 				goto next;
 			}
 		}
diff --git a/cmds-inspect-dump-tree.c b/cmds-inspect-dump-tree.c
index 92a2a45b..ad5345b4 100644
--- a/cmds-inspect-dump-tree.c
+++ b/cmds-inspect-dump-tree.c
@@ -221,6 +221,7 @@ int cmd_inspect_dump_tree(int argc, char **argv)
 	int uuid_tree_only = 0;
 	int roots_only = 0;
 	int root_backups = 0;
+	int traverse = BTRFS_PRINT_TREE_DEFAULT;
 	unsigned open_ctree_flags;
 	u64 block_only = 0;
 	struct btrfs_root *tree_root_scan;
@@ -235,9 +236,11 @@ int cmd_inspect_dump_tree(int argc, char **argv)
 	 * tree blocks as possible.
 	 */
 	open_ctree_flags = OPEN_CTREE_PARTIAL | OPEN_CTREE_NO_BLOCK_GROUPS;
+	optind = 0;
 	while (1) {
 		int c;
-		enum { GETOPT_VAL_FOLLOW = 256 };
+		enum { GETOPT_VAL_FOLLOW = 256, GETOPT_VAL_DFS,
+		       GETOPT_VAL_BFS };
 		static const struct option long_options[] = {
 			{ "extents", no_argument, NULL, 'e'},
 			{ "device", no_argument, NULL, 'd'},
@@ -247,6 +250,8 @@ int cmd_inspect_dump_tree(int argc, char **argv)
 			{ "block", required_argument, NULL, 'b'},
 			{ "tree", required_argument, NULL, 't'},
 			{ "follow", no_argument, NULL, GETOPT_VAL_FOLLOW },
+			{ "bfs", no_argument, NULL, GETOPT_VAL_BFS },
+			{ "dfs", no_argument, NULL, GETOPT_VAL_DFS },
 			{ NULL, 0, NULL, 0 }
 		};
 
@@ -302,6 +307,12 @@ int cmd_inspect_dump_tree(int argc, char **argv)
 		case GETOPT_VAL_FOLLOW:
 			follow = true;
 			break;
+		case GETOPT_VAL_DFS:
+			traverse = BTRFS_PRINT_TREE_DFS;
+			break;
+		case GETOPT_VAL_BFS:
+			traverse = BTRFS_PRINT_TREE_BFS;
+			break;
 		default:
 			usage(cmd_inspect_dump_tree_usage);
 		}
@@ -312,7 +323,13 @@ int cmd_inspect_dump_tree(int argc, char **argv)
 
 	ret = check_arg_type(argv[optind]);
 	if (ret != BTRFS_ARG_BLKDEV && ret != BTRFS_ARG_REG) {
-		error("not a block device or regular file: %s", argv[optind]);
+		if (ret < 0) {
+			errno = -ret;
+			error("invalid argument %s: %m", argv[optind]);
+		} else {
+			error("not a block device or regular file: %s",
+			      argv[optind]);
+		}
 		goto out;
 	}
 
@@ -340,7 +357,7 @@ int cmd_inspect_dump_tree(int argc, char **argv)
 				(unsigned long long)block_only);
 			goto close_root;
 		}
-		btrfs_print_tree(leaf, follow);
+		btrfs_print_tree(leaf, follow, BTRFS_PRINT_TREE_DEFAULT);
 		free_extent_buffer(leaf);
 		goto close_root;
 	}
@@ -367,17 +384,20 @@ int cmd_inspect_dump_tree(int argc, char **argv)
 		} else {
 			if (info->tree_root->node) {
 				printf("root tree\n");
-				btrfs_print_tree(info->tree_root->node, 1);
+				btrfs_print_tree(info->tree_root->node, true,
+						 traverse);
 			}
 
 			if (info->chunk_root->node) {
 				printf("chunk tree\n");
-				btrfs_print_tree(info->chunk_root->node, 1);
+				btrfs_print_tree(info->chunk_root->node, true,
+						 traverse);
 			}
 
 			if (info->log_root_tree) {
 				printf("log root tree\n");
-				btrfs_print_tree(info->log_root_tree->node, 1);
+				btrfs_print_tree(info->log_root_tree->node,
+						 true, traverse);
 			}
 		}
 	}
@@ -397,7 +417,7 @@ again:
 			goto close_root;
 		}
 		printf("root tree\n");
-		btrfs_print_tree(info->tree_root->node, 1);
+		btrfs_print_tree(info->tree_root->node, true, traverse);
 		goto close_root;
 	}
 
@@ -407,7 +427,7 @@ again:
 			goto close_root;
 		}
 		printf("chunk tree\n");
-		btrfs_print_tree(info->chunk_root->node, 1);
+		btrfs_print_tree(info->chunk_root->node, true, traverse);
 		goto close_root;
 	}
 
@@ -417,7 +437,7 @@ again:
 			goto close_root;
 		}
 		printf("log root tree\n");
-		btrfs_print_tree(info->log_root_tree->node, 1);
+		btrfs_print_tree(info->log_root_tree->node, true, traverse);
 		goto close_root;
 	}
 
@@ -426,9 +446,9 @@ again:
 	key.type = BTRFS_ROOT_ITEM_KEY;
 	ret = btrfs_search_slot(NULL, tree_root_scan, &key, &path, 0, 0);
 	if (ret < 0) {
-		error("cannot read ROOT_ITEM from tree %llu: %s",
-			(unsigned long long)tree_root_scan->root_key.objectid,
-			strerror(-ret));
+		errno = -ret;
+		error("cannot read ROOT_ITEM from tree %llu: %m",
+			(unsigned long long)tree_root_scan->root_key.objectid);
 		goto close_root;
 	}
 	while (1) {
@@ -563,7 +583,7 @@ again:
 					       btrfs_header_level(buf));
 				} else {
 					printf(" \n");
-					btrfs_print_tree(buf, 1);
+					btrfs_print_tree(buf, true, traverse);
 				}
 			}
 			free_extent_buffer(buf);
diff --git a/cmds-inspect-tree-stats.c b/cmds-inspect-tree-stats.c
index eced0db9..0921a8b0 100644
--- a/cmds-inspect-tree-stats.c
+++ b/cmds-inspect-tree-stats.c
@@ -434,6 +434,7 @@ int cmd_inspect_tree_stats(int argc, char **argv)
 	int opt;
 	int ret = 0;
 
+	optind = 0;
 	while ((opt = getopt(argc, argv, "vb")) != -1) {
 		switch (opt) {
 		case 'v':
@@ -453,8 +454,8 @@ int cmd_inspect_tree_stats(int argc, char **argv)
 
 	ret = check_mounted(argv[optind]);
 	if (ret < 0) {
-		warning("unable to check mount status of: %s",
-				strerror(-ret));
+		errno = -ret;
+		warning("unable to check mount status of: %m");
 	} else if (ret) {
 		warning("%s already mounted, results may be inaccurate",
 				argv[optind]);
diff --git a/cmds-inspect.c b/cmds-inspect.c
index afd7fe48..efea0331 100644
--- a/cmds-inspect.c
+++ b/cmds-inspect.c
@@ -94,6 +94,7 @@ static int cmd_inspect_inode_resolve(int argc, char **argv)
 	int ret;
 	DIR *dirstream = NULL;
 
+	optind = 0;
 	while (1) {
 		int c = getopt(argc, argv, "v");
 		if (c < 0)
@@ -148,6 +149,7 @@ static int cmd_inspect_logical_resolve(int argc, char **argv)
 	char *path_ptr;
 	DIR *dirstream = NULL;
 
+	optind = 0;
 	while (1) {
 		int c = getopt(argc, argv, "Pvs:");
 		if (c < 0)
@@ -209,6 +211,7 @@ static int cmd_inspect_logical_resolve(int argc, char **argv)
 		error("path buffer too small: %d bytes", bytes_left);
 		goto out;
 	}
+	ret = 0;
 
 	for (i = 0; i < inodes->elem_cnt; i += 3) {
 		u64 inum = inodes->val[i];
@@ -243,7 +246,8 @@ static int cmd_inspect_logical_resolve(int argc, char **argv)
 					goto out;
 				}
 			}
-			__ino_to_path_fd(inum, path_fd, verbose, full_path);
+			ret = __ino_to_path_fd(inum, path_fd, verbose,
+						full_path);
 			if (path_fd != fd)
 				close_file_or_dir(path_fd, dirs);
 		} else {
@@ -326,7 +330,8 @@ static int cmd_inspect_rootid(int argc, char **argv)
 
 	ret = lookup_path_rootid(fd, &rootid);
 	if (ret) {
-		error("failed to lookup root id: %s", strerror(-ret));
+		errno = -ret;
+		error("failed to lookup root id: %m");
 		goto out;
 	}
 
@@ -561,7 +566,8 @@ static int print_min_dev_size(int fd, u64 devid)
 				ret = add_dev_extent(&holes, last_pos,
 					btrfs_search_header_offset(sh) - 1, 1);
 			if (ret) {
-				error("add device extent: %s", strerror(-ret));
+				errno = -ret;
+				error("add device extent: %m");
 				ret = 1;
 				goto out;
 			}
@@ -591,6 +597,7 @@ static int cmd_inspect_min_dev_size(int argc, char **argv)
 	DIR *dirstream = NULL;
 	u64 devid = 1;
 
+	optind = 0;
 	while (1) {
 		int c;
 		enum { GETOPT_VAL_DEVID = 256 };
diff --git a/cmds-property.c b/cmds-property.c
index 03bafa05..7b0a6ffa 100644
--- a/cmds-property.c
+++ b/cmds-property.c
@@ -91,7 +91,8 @@ static int check_is_root(const char *object)
 
 	ret = get_fsid(object, fsid, 0);
 	if (ret < 0) {
-		error("get_fsid for %s failed: %s", object, strerror(-ret));
+		errno = -ret;
+		error("get_fsid for %s failed: %m", object);
 		goto out;
 	}
 
@@ -103,7 +104,8 @@ static int check_is_root(const char *object)
 		ret = 1;
 		goto out;
 	} else if (ret < 0) {
-		error("get_fsid for %s failed: %s", tmp, strerror(-ret));
+		errno = -ret;
+		error("get_fsid for %s failed: %m", tmp);
 		goto out;
 	}
 
@@ -317,8 +319,8 @@ static void parse_args(int argc, char **argv,
 	if (!*types) {
 		ret = autodetect_object_types(*object, types);
 		if (ret < 0) {
-			error("failed to detect object type: %s",
-				strerror(-ret));
+			errno = -ret;
+			error("failed to detect object type: %m");
 			usage(usage_str);
 		}
 		if (!*types) {
diff --git a/cmds-qgroup.c b/cmds-qgroup.c
index 93206900..b36fd798 100644
--- a/cmds-qgroup.c
+++ b/cmds-qgroup.c
@@ -40,12 +40,13 @@ static int _cmd_qgroup_assign(int assign, int argc, char **argv,
 {
 	int ret = 0;
 	int fd;
-	int rescan = 0;
+	bool rescan = true;
 	char *path;
 	struct btrfs_ioctl_qgroup_assign_args args;
 	DIR *dirstream = NULL;
 
 	if (assign) {
+		optind = 0;
 		while (1) {
 			enum { GETOPT_VAL_RESCAN = 256, GETOPT_VAL_NO_RESCAN };
 			static const struct option long_options[] = {
@@ -61,10 +62,10 @@ static int _cmd_qgroup_assign(int assign, int argc, char **argv,
 				break;
 			switch (c) {
 			case GETOPT_VAL_RESCAN:
-				rescan = 1;
+				rescan = true;
 				break;
 			case GETOPT_VAL_NO_RESCAN:
-				rescan = 0;
+				rescan = false;
 				break;
 			default:
 				/* Usage printed by the caller */
@@ -122,6 +123,7 @@ static int _cmd_qgroup_assign(int assign, int argc, char **argv,
 				error("quota rescan failed: %m");
 		} else {
 			warning("quotas may be inconsistent, rescan needed");
+			ret = 0;
 		}
 	}
 	close_file_or_dir(fd, dirstream);
@@ -310,6 +312,7 @@ static int cmd_qgroup_show(int argc, char **argv)
 
 	unit_mode = get_unit_mode_from_arg(&argc, argv, 0);
 
+	optind = 0;
 	while (1) {
 		int c;
 		enum {
@@ -377,15 +380,14 @@ static int cmd_qgroup_show(int argc, char **argv)
 	if (sync) {
 		err = btrfs_util_sync_fd(fd);
 		if (err)
-			warning("sync ioctl failed on '%s': %s", path,
-				strerror(errno));
+			warning("sync ioctl failed on '%s': %m", path);
 	}
 
 	if (filter_flag) {
 		ret = lookup_path_rootid(fd, &qgroupid);
 		if (ret < 0) {
-			error("cannot resolve rootid for %s: %s",
-					path, strerror(-ret));
+			errno = -ret;
+			error("cannot resolve rootid for %s: %m", path);
 			close_file_or_dir(fd, dirstream);
 			goto out;
 		}
@@ -429,6 +431,7 @@ static int cmd_qgroup_limit(int argc, char **argv)
 	DIR *dirstream = NULL;
 	enum btrfs_util_error err;
 
+	optind = 0;
 	while (1) {
 		int c = getopt(argc, argv, "ce");
 		if (c < 0)
diff --git a/cmds-quota.c b/cmds-quota.c
index 745889d1..c9ea9c0f 100644
--- a/cmds-quota.c
+++ b/cmds-quota.c
@@ -119,6 +119,7 @@ static int cmd_quota_rescan(int argc, char **argv)
 	DIR *dirstream = NULL;
 	int wait_for_completion = 0;
 
+	optind = 0;
 	while (1) {
 		int c = getopt(argc, argv, "sw");
 		if (c < 0)
diff --git a/cmds-receive.c b/cmds-receive.c
index 68123a31..3888149a 100644
--- a/cmds-receive.c
+++ b/cmds-receive.c
@@ -103,8 +103,7 @@ static int finish_subvol(struct btrfs_receive *rctx)
 			   O_RDONLY | O_NOATIME);
 	if (subvol_fd < 0) {
 		ret = -errno;
-		error("cannot open %s: %s",
-				rctx->cur_subvol_path, strerror(-ret));
+		error("cannot open %s: %m", rctx->cur_subvol_path);
 		goto out;
 	}
 
@@ -121,8 +120,7 @@ static int finish_subvol(struct btrfs_receive *rctx)
 	ret = ioctl(subvol_fd, BTRFS_IOC_SET_RECEIVED_SUBVOL, &rs_args);
 	if (ret < 0) {
 		ret = -errno;
-		error("ioctl BTRFS_IOC_SET_RECEIVED_SUBVOL failed: %s",
-				strerror(-ret));
+		error("ioctl BTRFS_IOC_SET_RECEIVED_SUBVOL failed: %m");
 		goto out;
 	}
 	rctx->cur_subvol.rtransid = rs_args.rtransid;
@@ -130,8 +128,7 @@ static int finish_subvol(struct btrfs_receive *rctx)
 	ret = ioctl(subvol_fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags);
 	if (ret < 0) {
 		ret = -errno;
-		error("ioctl BTRFS_IOC_SUBVOL_GETFLAGS failed: %s",
-				strerror(-ret));
+		error("ioctl BTRFS_IOC_SUBVOL_GETFLAGS failed: %m");
 		goto out;
 	}
 
@@ -140,8 +137,7 @@ static int finish_subvol(struct btrfs_receive *rctx)
 	ret = ioctl(subvol_fd, BTRFS_IOC_SUBVOL_SETFLAGS, &flags);
 	if (ret < 0) {
 		ret = -errno;
-		error("failed to make subvolume read only: %s",
-				strerror(-ret));
+		error("failed to make subvolume read only: %m");
 		goto out;
 	}
 
@@ -215,7 +211,7 @@ static int process_subvol(const char *path, const u8 *uuid, u64 ctransid,
 	ret = ioctl(rctx->dest_dir_fd, BTRFS_IOC_SUBVOL_CREATE, &args_v1);
 	if (ret < 0) {
 		ret = -errno;
-		error("creating subvolume %s failed: %s", path, strerror(-ret));
+		error("creating subvolume %s failed: %m", path);
 		goto out;
 	}
 
@@ -360,8 +356,7 @@ static int process_snapshot(const char *path, const u8 *uuid, u64 ctransid,
 	if (args_v2.fd < 0) {
 		ret = -errno;
 		if (errno != ENOENT)
-			error("cannot open %s: %s",
-					parent_subvol->path, strerror(-ret));
+			error("cannot open %s: %m", parent_subvol->path);
 		else
 			fprintf(stderr,
 				"It seems that you have changed your default "
@@ -375,8 +370,8 @@ static int process_snapshot(const char *path, const u8 *uuid, u64 ctransid,
 	close(args_v2.fd);
 	if (ret < 0) {
 		ret = -errno;
-		error("creating snapshot %s -> %s failed: %s",
-				parent_subvol->path, path, strerror(-ret));
+		error("creating snapshot %s -> %s failed: %m",
+				parent_subvol->path, path);
 		goto out;
 	}
 
@@ -406,7 +401,7 @@ static int process_mkfile(const char *path, void *user)
 	ret = creat(full_path, 0600);
 	if (ret < 0) {
 		ret = -errno;
-		error("mkfile %s failed: %s", path, strerror(-ret));
+		error("mkfile %s failed: %m", path);
 		goto out;
 	}
 	close(ret);
@@ -434,7 +429,7 @@ static int process_mkdir(const char *path, void *user)
 	ret = mkdir(full_path, 0700);
 	if (ret < 0) {
 		ret = -errno;
-		error("mkdir %s failed: %s", path, strerror(-ret));
+		error("mkdir %s failed: %m", path);
 	}
 
 out:
@@ -460,7 +455,7 @@ static int process_mknod(const char *path, u64 mode, u64 dev, void *user)
 	ret = mknod(full_path, mode & S_IFMT, dev);
 	if (ret < 0) {
 		ret = -errno;
-		error("mknod %s failed: %s", path, strerror(-ret));
+		error("mknod %s failed: %m", path);
 	}
 
 out:
@@ -485,7 +480,7 @@ static int process_mkfifo(const char *path, void *user)
 	ret = mkfifo(full_path, 0600);
 	if (ret < 0) {
 		ret = -errno;
-		error("mkfifo %s failed: %s", path, strerror(-ret));
+		error("mkfifo %s failed: %m", path);
 	}
 
 out:
@@ -510,7 +505,7 @@ static int process_mksock(const char *path, void *user)
 	ret = mknod(full_path, 0600 | S_IFSOCK, 0);
 	if (ret < 0) {
 		ret = -errno;
-		error("mknod %s failed: %s", path, strerror(-ret));
+		error("mknod %s failed: %m", path);
 	}
 
 out:
@@ -535,8 +530,7 @@ static int process_symlink(const char *path, const char *lnk, void *user)
 	ret = symlink(lnk, full_path);
 	if (ret < 0) {
 		ret = -errno;
-		error("symlink %s -> %s failed: %s", path,
-				lnk, strerror(-ret));
+		error("symlink %s -> %s failed: %m", path, lnk);
 	}
 
 out:
@@ -568,8 +562,7 @@ static int process_rename(const char *from, const char *to, void *user)
 	ret = rename(full_from, full_to);
 	if (ret < 0) {
 		ret = -errno;
-		error("rename %s -> %s failed: %s", from,
-				to, strerror(-ret));
+		error("rename %s -> %s failed: %m", from, to);
 	}
 
 out:
@@ -601,7 +594,7 @@ static int process_link(const char *path, const char *lnk, void *user)
 	ret = link(full_link_path, full_path);
 	if (ret < 0) {
 		ret = -errno;
-		error("link %s -> %s failed: %s", path, lnk, strerror(-ret));
+		error("link %s -> %s failed: %m", path, lnk);
 	}
 
 out:
@@ -627,7 +620,7 @@ static int process_unlink(const char *path, void *user)
 	ret = unlink(full_path);
 	if (ret < 0) {
 		ret = -errno;
-		error("unlink %s failed. %s", path, strerror(-ret));
+		error("unlink %s failed: %m", path);
 	}
 
 out:
@@ -652,7 +645,7 @@ static int process_rmdir(const char *path, void *user)
 	ret = rmdir(full_path);
 	if (ret < 0) {
 		ret = -errno;
-		error("rmdir %s failed: %s", path, strerror(-ret));
+		error("rmdir %s failed: %m", path);
 	}
 
 out:
@@ -673,7 +666,7 @@ static int open_inode_for_write(struct btrfs_receive *rctx, const char *path)
 	rctx->write_fd = open(path, O_RDWR);
 	if (rctx->write_fd < 0) {
 		ret = -errno;
-		error("cannot open %s: %s", path, strerror(-ret));
+		error("cannot open %s: %m", path);
 		goto out;
 	}
 	strncpy_null(rctx->write_path, path);
@@ -716,8 +709,7 @@ static int process_write(const char *path, const void *data, u64 offset,
 				offset + pos);
 		if (w < 0) {
 			ret = -errno;
-			error("writing to %s failed: %s",
-					path, strerror(-ret));
+			error("writing to %s failed: %m", path);
 			goto out;
 		}
 		pos += w;
@@ -812,7 +804,7 @@ static int process_clone(const char *path, u64 offset, u64 len,
 	clone_fd = openat(rctx->mnt_fd, full_clone_path, O_RDONLY | O_NOATIME);
 	if (clone_fd < 0) {
 		ret = -errno;
-		error("cannot open %s: %s", full_clone_path, strerror(-ret));
+		error("cannot open %s: %m", full_clone_path);
 		goto out;
 	}
 
@@ -823,8 +815,7 @@ static int process_clone(const char *path, u64 offset, u64 len,
 	ret = ioctl(rctx->write_fd, BTRFS_IOC_CLONE_RANGE, &clone_args);
 	if (ret < 0) {
 		ret = -errno;
-		error("failed to clone extents to %s\n%s",
-				path, strerror(-ret));
+		error("failed to clone extents to %s: %m", path);
 		goto out;
 	}
 
@@ -878,8 +869,8 @@ static int process_set_xattr(const char *path, const char *name,
 	ret = lsetxattr(full_path, name, data, len, 0);
 	if (ret < 0) {
 		ret = -errno;
-		error("lsetxattr %s %s=%.*s failed: %s",
-				path, name, len, (char*)data, strerror(-ret));
+		error("lsetxattr %s %s=%.*s failed: %m",
+				path, name, len, (char*)data);
 		goto out;
 	}
 
@@ -907,8 +898,7 @@ static int process_remove_xattr(const char *path, const char *name, void *user)
 	ret = lremovexattr(full_path, name);
 	if (ret < 0) {
 		ret = -errno;
-		error("lremovexattr %s %s failed: %s",
-				path, name, strerror(-ret));
+		error("lremovexattr %s %s failed: %m", path, name);
 		goto out;
 	}
 
@@ -934,7 +924,7 @@ static int process_truncate(const char *path, u64 size, void *user)
 	ret = truncate(full_path, size);
 	if (ret < 0) {
 		ret = -errno;
-		error("truncate %s failed: %s", path, strerror(-ret));
+		error("truncate %s failed: %m", path);
 		goto out;
 	}
 
@@ -960,7 +950,7 @@ static int process_chmod(const char *path, u64 mode, void *user)
 	ret = chmod(full_path, mode);
 	if (ret < 0) {
 		ret = -errno;
-		error("chmod %s failed: %s", path, strerror(-ret));
+		error("chmod %s failed: %m", path);
 		goto out;
 	}
 
@@ -987,7 +977,7 @@ static int process_chown(const char *path, u64 uid, u64 gid, void *user)
 	ret = lchown(full_path, uid, gid);
 	if (ret < 0) {
 		ret = -errno;
-		error("chown %s failed: %s", path, strerror(-ret));
+		error("chown %s failed: %m", path);
 		goto out;
 	}
 
@@ -1002,8 +992,7 @@ static int process_chown(const char *path, u64 uid, u64 gid, void *user)
 		rctx->cached_capabilities_len = 0;
 		if (ret < 0) {
 			ret = -errno;
-			error("restoring capabilities %s: %s",
-					path, strerror(-ret));
+			error("restoring capabilities %s: %m", path);
 			goto out;
 		}
 	}
@@ -1035,8 +1024,7 @@ static int process_utimes(const char *path, struct timespec *at,
 	ret = utimensat(AT_FDCWD, full_path, tv, AT_SYMLINK_NOFOLLOW);
 	if (ret < 0) {
 		ret = -errno;
-		error("utimes %s failed: %s",
-				path, strerror(-ret));
+		error("utimes %s failed: %m", path);
 		goto out;
 	}
 
@@ -1096,14 +1084,14 @@ static int do_receive(struct btrfs_receive *rctx, const char *tomnt,
 	dest_dir_full_path = realpath(tomnt, NULL);
 	if (!dest_dir_full_path) {
 		ret = -errno;
-		error("realpath(%s) failed: %s", tomnt, strerror(-ret));
+		error("realpath(%s) failed: %m", tomnt);
 		goto out;
 	}
 	rctx->dest_dir_fd = open(dest_dir_full_path, O_RDONLY | O_NOATIME);
 	if (rctx->dest_dir_fd < 0) {
 		ret = -errno;
-		error("cannot open destination directory %s: %s",
-			dest_dir_full_path, strerror(-ret));
+		error("cannot open destination directory %s: %m",
+			dest_dir_full_path);
 		goto out;
 	}
 
@@ -1112,8 +1100,9 @@ static int do_receive(struct btrfs_receive *rctx, const char *tomnt,
 	} else {
 		ret = find_mount_root(dest_dir_full_path, &rctx->root_path);
 		if (ret < 0) {
-			error("failed to determine mount point for %s: %s",
-				dest_dir_full_path, strerror(-ret));
+			errno = -ret;
+			error("failed to determine mount point for %s: %m",
+				dest_dir_full_path);
 			ret = -EINVAL;
 			goto out;
 		}
@@ -1127,7 +1116,7 @@ static int do_receive(struct btrfs_receive *rctx, const char *tomnt,
 	rctx->mnt_fd = open(rctx->root_path, O_RDONLY | O_NOATIME);
 	if (rctx->mnt_fd < 0) {
 		ret = -errno;
-		error("cannot open %s: %s", rctx->root_path, strerror(-ret));
+		error("cannot open %s: %m", rctx->root_path);
 		goto out;
 	}
 
@@ -1158,14 +1147,12 @@ static int do_receive(struct btrfs_receive *rctx, const char *tomnt,
 	if (rctx->dest_dir_chroot) {
 		if (chroot(dest_dir_full_path)) {
 			ret = -errno;
-			error("failed to chroot to %s: %s",
-				dest_dir_full_path, strerror(-ret));
+			error("failed to chroot to %s: %m", dest_dir_full_path);
 			goto out;
 		}
 		if (chdir("/")) {
 			ret = -errno;
-			error("failed to chdir to / after chroot: %s",
-				strerror(-ret));
+			error("failed to chdir to / after chroot: %m");
 			goto out;
 		}
 		fprintf(stderr, "Chroot to %s\n", dest_dir_full_path);
@@ -1267,6 +1254,7 @@ int cmd_receive(int argc, char **argv)
 	realmnt[0] = 0;
 	fromfile[0] = 0;
 
+	optind = 0;
 	while (1) {
 		int c;
 		enum { GETOPT_VAL_DUMP = 257 };
@@ -1344,9 +1332,10 @@ int cmd_receive(int argc, char **argv)
 		dump_args.full_subvol_path[1] = '\0';
 		ret = btrfs_read_and_process_send_stream(receive_fd,
 				&btrfs_print_send_ops, &dump_args, 0, 0);
-		if (ret < 0)
-			error("failed to dump the send stream: %s",
-			      strerror(-ret));
+		if (ret < 0) {
+			errno = -ret;
+			error("failed to dump the send stream: %m");
+		}
 	} else {
 		ret = do_receive(&rctx, tomnt, realmnt, receive_fd, max_errors);
 	}
diff --git a/cmds-replace.c b/cmds-replace.c
index 032a44fc..b30e6c78 100644
--- a/cmds-replace.c
+++ b/cmds-replace.c
@@ -134,6 +134,7 @@ static int cmd_replace_start(int argc, char **argv)
 	u64 srcdev_size;
 	u64 dstdev_size;
 
+	optind = 0;
 	while ((c = getopt(argc, argv, "Brf")) != -1) {
 		switch (c) {
 		case 'B':
@@ -207,7 +208,8 @@ static int cmd_replace_start(int argc, char **argv)
 
 		ret = get_fs_info(path, &fi_args, &di_args);
 		if (ret) {
-			error("failed to get device info: %s", strerror(-ret));
+			errno = -ret;
+			error("failed to get device info: %m");
 			free(di_args);
 			goto leave_with_error;
 		}
@@ -333,6 +335,7 @@ static int cmd_replace_status(int argc, char **argv)
 	int ret;
 	DIR *dirstream = NULL;
 
+	optind = 0;
 	while ((c = getopt(argc, argv, "1")) != -1) {
 		switch (c) {
 		case '1':
@@ -501,6 +504,7 @@ static int cmd_replace_cancel(int argc, char **argv)
 	char *path;
 	DIR *dirstream = NULL;
 
+	optind = 0;
 	while ((c = getopt(argc, argv, "")) != -1) {
 		switch (c) {
 		case '?':
diff --git a/cmds-rescue.c b/cmds-rescue.c
index c40088ad..01fbdef9 100644
--- a/cmds-rescue.c
+++ b/cmds-rescue.c
@@ -32,8 +32,8 @@ static const char * const rescue_cmd_group_usage[] = {
 	NULL
 };
 
-int btrfs_recover_chunk_tree(char *path, int verbose, int yes);
-int btrfs_recover_superblocks(char *path, int verbose, int yes);
+int btrfs_recover_chunk_tree(const char *path, int verbose, int yes);
+int btrfs_recover_superblocks(const char *path, int verbose, int yes);
 
 static const char * const cmd_rescue_chunk_recover_usage[] = {
 	"btrfs rescue chunk-recover [options] <device>",
@@ -52,6 +52,7 @@ static int cmd_rescue_chunk_recover(int argc, char *argv[])
 	int yes = 0;
 	int verbose = 0;
 
+	optind = 0;
 	while (1) {
 		int c = getopt(argc, argv, "yvh");
 		if (c < 0)
@@ -76,7 +77,8 @@ static int cmd_rescue_chunk_recover(int argc, char *argv[])
 
 	ret = check_mounted(file);
 	if (ret < 0) {
-		error("could not check mount status: %s", strerror(-ret));
+		errno = -ret;
+		error("could not check mount status: %m");
 		return 1;
 	} else if (ret) {
 		error("the device is busy");
@@ -109,7 +111,7 @@ static const char * const cmd_rescue_super_recover_usage[] = {
  *   0 : All superblocks are valid, no need to recover
  *   1 : Usage or syntax error
  *   2 : Recover all bad superblocks successfully
- *   3 : Fail to Recover bad supeblocks
+ *   3 : Fail to Recover bad superblocks
  *   4 : Abort to recover bad superblocks
  */
 static int cmd_rescue_super_recover(int argc, char **argv)
@@ -119,6 +121,7 @@ static int cmd_rescue_super_recover(int argc, char **argv)
 	int yes = 0;
 	char *dname;
 
+	optind = 0;
 	while (1) {
 		int c = getopt(argc, argv, "vy");
 		if (c < 0)
@@ -140,7 +143,8 @@ static int cmd_rescue_super_recover(int argc, char **argv)
 	dname = argv[optind];
 	ret = check_mounted(dname);
 	if (ret < 0) {
-		error("could not check mount status: %s", strerror(-ret));
+		errno = -ret;
+		error("could not check mount status: %m");
 		return 1;
 	} else if (ret) {
 		error("the device is busy");
@@ -173,7 +177,8 @@ static int cmd_rescue_zero_log(int argc, char **argv)
 	devname = argv[optind];
 	ret = check_mounted(devname);
 	if (ret < 0) {
-		error("could not check mount status: %s", strerror(-ret));
+		errno = -ret;
+		error("could not check mount status: %m");
 		goto out;
 	} else if (ret) {
 		error("%s is currently mounted", devname);
@@ -224,7 +229,8 @@ static int cmd_rescue_fix_device_size(int argc, char **argv)
 	devname = argv[optind];
 	ret = check_mounted(devname);
 	if (ret < 0) {
-		error("could not check mount status: %s", strerror(-ret));
+		errno = -ret;
+		error("could not check mount status: %m");
 		goto out;
 	} else if (ret) {
 		error("%s is currently mounted", devname);
diff --git a/cmds-restore.c b/cmds-restore.c
index 342f5cc7..7302e359 100644
--- a/cmds-restore.c
+++ b/cmds-restore.c
@@ -259,7 +259,7 @@ again:
 		}
 
 		if (path->reada)
-			reada_for_search(root, path, level, slot, 0);
+			reada_for_search(fs_info, path, level, slot, 0);
 
 		next = read_node_slot(fs_info, c, slot);
 		if (extent_buffer_uptodate(next))
@@ -276,7 +276,7 @@ again:
 		if (!level)
 			break;
 		if (path->reada)
-			reada_for_search(root, path, level, 0, 0);
+			reada_for_search(fs_info, path, level, 0, 0);
 		next = read_node_slot(fs_info, next, 0);
 		if (!extent_buffer_uptodate(next))
 			goto again;
@@ -302,7 +302,7 @@ static int copy_one_inline(struct btrfs_root *root, int fd,
 	fi = btrfs_item_ptr(leaf, path->slots[0],
 			    struct btrfs_file_extent_item);
 	ptr = btrfs_file_extent_inline_start(fi);
-	len = btrfs_file_extent_inline_len(leaf, path->slots[0], fi);
+	len = btrfs_file_extent_ram_bytes(leaf, fi);
 	inline_item_len = btrfs_file_extent_inline_item_len(leaf, btrfs_item_nr(path->slots[0]));
 	read_extent_buffer(leaf, buf, ptr, inline_item_len);
 
@@ -324,7 +324,8 @@ static int copy_one_inline(struct btrfs_root *root, int fd,
 		return -ENOMEM;
 	}
 
-	ret = decompress(root, buf, outbuf, len, &ram_size, compress);
+	ret = decompress(root, buf, outbuf, inline_item_len, &ram_size,
+			 compress);
 	if (ret) {
 		free(outbuf);
 		return ret;
@@ -1440,6 +1441,7 @@ int cmd_restore(int argc, char **argv)
 	regex_t match_reg, *mreg = NULL;
 	char reg_err[256];
 
+	optind = 0;
 	while (1) {
 		int opt;
 		enum { GETOPT_VAL_PATH_REGEX = 256 };
@@ -1540,8 +1542,8 @@ int cmd_restore(int argc, char **argv)
 	}
 
 	if ((ret = check_mounted(argv[optind])) < 0) {
-		fprintf(stderr, "Could not check mount status: %s\n",
-			strerror(-ret));
+		errno = -ret;
+		fprintf(stderr, "Could not check mount status: %m\n");
 		return 1;
 	} else if (ret) {
 		fprintf(stderr, "%s is currently mounted.  Aborting.\n", argv[optind]);
@@ -1589,8 +1591,9 @@ int cmd_restore(int argc, char **argv)
 		key.offset = (u64)-1;
 		root = btrfs_read_fs_root(orig_root->fs_info, &key);
 		if (IS_ERR(root)) {
-			fprintf(stderr, "fail to read root %llu: %s\n",
-					root_objectid, strerror(-PTR_ERR(root)));
+			errno = -PTR_ERR(root);
+			fprintf(stderr, "fail to read root %llu: %m\n",
+					root_objectid);
 			root = orig_root;
 			ret = 1;
 			goto out;
diff --git a/cmds-scrub.c b/cmds-scrub.c
index 6b909f20..13cb18a3 100644
--- a/cmds-scrub.c
+++ b/cmds-scrub.c
@@ -1206,9 +1206,9 @@ static int scrub_start(int argc, char **argv, int resume)
 
 	ret = get_fs_info(path, &fi_args, &di_args);
 	if (ret) {
+		errno = -ret;
 		error_on(!do_quiet,
-			"getting dev info for scrub failed: %s",
-			 strerror(-ret));
+			"getting dev info for scrub failed: %m");
 		err = 1;
 		goto out;
 	}
@@ -1221,13 +1221,14 @@ static int scrub_start(int argc, char **argv, int resume)
 	uuid_unparse(fi_args.fsid, fsid);
 	fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid);
 	if (fdres < 0 && fdres != -ENOENT) {
-		warning_on(!do_quiet, "failed to open status file: %s",
-			strerror(-fdres));
+		errno = -fdres;
+		warning_on(!do_quiet, "failed to open status file: %m");
 	} else if (fdres >= 0) {
 		past_scrubs = scrub_read_file(fdres, !do_quiet);
-		if (IS_ERR(past_scrubs))
-			warning_on(!do_quiet, "failed to read status file: %s",
-				strerror(-PTR_ERR(past_scrubs)));
+		if (IS_ERR(past_scrubs)) {
+			errno = -PTR_ERR(past_scrubs);
+			warning_on(!do_quiet, "failed to read status file: %m");
+		}
 		close(fdres);
 	}
 
@@ -1273,8 +1274,8 @@ static int scrub_start(int argc, char **argv, int resume)
 		devid = di_args[i].devid;
 		ret = pthread_mutex_init(&sp[i].progress_mutex, NULL);
 		if (ret) {
-			error_on(!do_quiet, "pthread_mutex_init failed: %s",
-				strerror(ret));
+			errno = ret;
+			error_on(!do_quiet, "pthread_mutex_init failed: %m");
 			err = 1;
 			goto out;
 		}
@@ -1359,9 +1360,9 @@ static int scrub_start(int argc, char **argv, int resume)
 		ret = scrub_write_progress(&spc_write_mutex, fsid, sp,
 					   fi_args.num_devices);
 		if (ret) {
+			errno = -ret;
 			warning_on(!do_quiet,
-   "failed to write the progress status file: %s. Status recording disabled",
-				strerror(-ret));
+   "failed to write the progress status file: %m. Status recording disabled");
 			do_record = 0;
 		}
 	}
@@ -1418,9 +1419,12 @@ static int scrub_start(int argc, char **argv, int resume)
 		ret = pthread_create(&t_devs[i], NULL,
 					scrub_one_dev, &sp[i]);
 		if (ret) {
-			if (do_print)
-			error("creating scrub_one_dev[%llu] thread failed: %s",
-				devid, strerror(ret));
+			if (do_print) {
+				errno = ret;
+				error(
+				"creating scrub_one_dev[%llu] thread failed: %m",
+					devid);
+			}
 			err = 1;
 			goto out;
 		}
@@ -1434,9 +1438,10 @@ static int scrub_start(int argc, char **argv, int resume)
 	spc.fi = &fi_args;
 	ret = pthread_create(&t_prog, NULL, scrub_progress_cycle, &spc);
 	if (ret) {
-		if (do_print)
-			error("creating progress thread failed: %s",
-				strerror(ret));
+		if (do_print) {
+			errno = ret;
+			error("creating progress thread failed: %m");
+		}
 		err = 1;
 		goto out;
 	}
@@ -1448,9 +1453,12 @@ static int scrub_start(int argc, char **argv, int resume)
 		devid = di_args[i].devid;
 		ret = pthread_join(t_devs[i], NULL);
 		if (ret) {
-			if (do_print)
-			  error("pthread_join failed for scrub_one_dev[%llu]: %s",
-				devid, strerror(ret));
+			if (do_print) {
+				errno = ret;
+				error(
+				"pthread_join failed for scrub_one_dev[%llu]: %m",
+					devid);
+			}
 			++err;
 			continue;
 		}
@@ -1465,11 +1473,13 @@ static int scrub_start(int argc, char **argv, int resume)
 				++err;
 				break;
 			default:
-				if (do_print)
-		error("scrubbing %s failed for device id %lld: ret=%d, errno=%d (%s)",
-					path, devid,
-					sp[i].ret, sp[i].ioctl_errno,
-					strerror(sp[i].ioctl_errno));
+				if (do_print) {
+					errno = sp[i].ioctl_errno;
+					error(
+		"scrubbing %s failed for device id %lld: ret=%d, errno=%d (%m)",
+						path, devid, sp[i].ret,
+						sp[i].ioctl_errno);
+				}
 				++err;
 				continue;
 			}
@@ -1511,21 +1521,23 @@ static int scrub_start(int argc, char **argv, int resume)
 
 	/* check for errors from the handling of the progress thread */
 	if (do_print && ret) {
-		error("progress thread handling failed: %s",
-			strerror(ret));
+		errno = ret;
+		error("progress thread handling failed: %m");
 	}
 
 	/* check for errors returned from the progress thread itself */
-	if (do_print && terr && terr != PTHREAD_CANCELED)
-		error("recording progress failed: %s",
-			strerror(-PTR_ERR(terr)));
+	if (do_print && terr && terr != PTHREAD_CANCELED) {
+		errno = -PTR_ERR(terr);
+		error("recording progress failed: %m");
+	}
 
 	if (do_record) {
 		ret = scrub_write_progress(&spc_write_mutex, fsid, sp,
 					   fi_args.num_devices);
-		if (ret && do_print)
-			error("failed to record the result: %s",
-				strerror(-ret));
+		if (ret && do_print) {
+			errno = -ret;
+			error("failed to record the result: %m");
+		}
 	}
 
 	scrub_handle_sigint_child(-1);
@@ -1701,8 +1713,8 @@ static int cmd_scrub_status(int argc, char **argv)
 
 	ret = get_fs_info(path, &fi_args, &di_args);
 	if (ret) {
-		error("getting dev info for scrub failed: %s",
-			strerror(-ret));
+		errno = -ret;
+		error("getting dev info for scrub failed: %m");
 		err = 1;
 		goto out;
 	}
@@ -1729,8 +1741,8 @@ static int cmd_scrub_status(int argc, char **argv)
 		close(fdres);
 		fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid);
 		if (fdres < 0 && fdres != -ENOENT) {
-			warning("failed to open status file: %s",
-				strerror(-fdres));
+			errno = -fdres;
+			warning("failed to open status file: %m");
 			err = 1;
 			goto out;
 		}
@@ -1738,9 +1750,10 @@ static int cmd_scrub_status(int argc, char **argv)
 
 	if (fdres >= 0) {
 		past_scrubs = scrub_read_file(fdres, 1);
-		if (IS_ERR(past_scrubs))
-			warning("failed to read status: %s",
-				strerror(-PTR_ERR(past_scrubs)));
+		if (IS_ERR(past_scrubs)) {
+			errno = -PTR_ERR(past_scrubs);
+			warning("failed to read status: %m");
+		}
 	}
 	in_progress = is_scrub_running_in_kernel(fdmnt, di_args, fi_args.num_devices);
 
diff --git a/cmds-send.c b/cmds-send.c
index c5ecdaa1..b4fa8a3d 100644
--- a/cmds-send.c
+++ b/cmds-send.c
@@ -281,8 +281,7 @@ static void *read_sent_data(void *arg)
 				NULL, SEND_BUFFER_SIZE, SPLICE_F_MORE);
 		if (sbytes < 0) {
 			ret = -errno;
-			error("failed to read stream from kernel: %s",
-				strerror(-ret));
+			error("failed to read stream from kernel: %m");
 			goto out;
 		}
 		if (!sbytes) {
@@ -312,14 +311,14 @@ static int do_send(struct btrfs_send *send, u64 parent_root_id,
 	subvol_fd = openat(send->mnt_fd, subvol, O_RDONLY | O_NOATIME);
 	if (subvol_fd < 0) {
 		ret = -errno;
-		error("cannot open %s: %s", subvol, strerror(-ret));
+		error("cannot open %s: %m", subvol);
 		goto out;
 	}
 
 	ret = pipe(pipefd);
 	if (ret < 0) {
 		ret = -errno;
-		error("pipe failed: %s", strerror(-ret));
+		error("pipe failed: %m");
 		goto out;
 	}
 
@@ -331,7 +330,8 @@ static int do_send(struct btrfs_send *send, u64 parent_root_id,
 		ret = pthread_create(&t_read, NULL, read_sent_data, send);
 	if (ret) {
 		ret = -ret;
-		error("thread setup failed: %s", strerror(-ret));
+		errno = -ret;
+		error("thread setup failed: %m");
 		goto out;
 	}
 
@@ -346,7 +346,7 @@ static int do_send(struct btrfs_send *send, u64 parent_root_id,
 	ret = ioctl(subvol_fd, BTRFS_IOC_SEND, &io_send);
 	if (ret < 0) {
 		ret = -errno;
-		error("send ioctl failed with %d: %s", ret, strerror(-ret));
+		error("send ioctl failed with %d: %m", ret);
 		if (ret == -EINVAL && (!is_first_subvol || !is_last_subvol))
 			fprintf(stderr,
 				"Try upgrading your kernel or don't use -e.\n");
@@ -364,7 +364,8 @@ static int do_send(struct btrfs_send *send, u64 parent_root_id,
 	ret = pthread_join(t_read, &t_err);
 	if (ret) {
 		ret = -ret;
-		error("pthread_join failed: %s", strerror(-ret));
+		errno = -ret;
+		error("pthread_join failed: %m");
 		goto out;
 	}
 	if (t_err) {
@@ -395,8 +396,8 @@ static int init_root_path(struct btrfs_send *sctx, const char *subvol)
 
 	ret = find_mount_root(subvol, &sctx->root_path);
 	if (ret < 0) {
-		error("failed to determine mount point for %s: %s",
-			subvol, strerror(-ret));
+		errno = -ret;
+		error("failed to determine mount point for %s: %m", subvol);
 		ret = -EINVAL;
 		goto out;
 	}
@@ -409,14 +410,14 @@ static int init_root_path(struct btrfs_send *sctx, const char *subvol)
 	sctx->mnt_fd = open(sctx->root_path, O_RDONLY | O_NOATIME);
 	if (sctx->mnt_fd < 0) {
 		ret = -errno;
-		error("cannot open '%s': %s", sctx->root_path, strerror(-ret));
+		error("cannot open '%s': %m", sctx->root_path);
 		goto out;
 	}
 
 	ret = subvol_uuid_search_init(sctx->mnt_fd, &sctx->sus);
 	if (ret < 0) {
-		error("failed to initialize subvol search: %s",
-			strerror(-ret));
+		errno = -ret;
+		error("failed to initialize subvol search: %m");
 		goto out;
 	}
 
@@ -434,15 +435,14 @@ static int is_subvol_ro(struct btrfs_send *sctx, const char *subvol)
 	fd = openat(sctx->mnt_fd, subvol, O_RDONLY | O_NOATIME);
 	if (fd < 0) {
 		ret = -errno;
-		error("cannot open %s: %s", subvol, strerror(-ret));
+		error("cannot open %s: %m", subvol);
 		goto out;
 	}
 
 	ret = ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags);
 	if (ret < 0) {
 		ret = -errno;
-		error("failed to get flags for subvolume %s: %s",
-			subvol, strerror(-ret));
+		error("failed to get flags for subvolume %s: %m", subvol);
 		goto out;
 	}
 
@@ -508,6 +508,7 @@ int cmd_send(int argc, char **argv)
 	send.dump_fd = fileno(stdout);
 	outname[0] = 0;
 
+	optind = 0;
 	while (1) {
 		enum { GETOPT_VAL_SEND_NO_DATA = 256 };
 		static const struct option long_options[] = {
@@ -534,7 +535,7 @@ int cmd_send(int argc, char **argv)
 			subvol = realpath(optarg, NULL);
 			if (!subvol) {
 				ret = -errno;
-				error("realpath %s failed: %s\n", optarg, strerror(-ret));
+				error("realpath %s failed: %m\n", optarg);
 				goto out;
 			}
 
@@ -553,7 +554,8 @@ int cmd_send(int argc, char **argv)
 
 			ret = add_clone_source(&send, root_id);
 			if (ret < 0) {
-				error("cannot add clone source: %s", strerror(-ret));
+				errno = -ret;
+				error("cannot add clone source: %m");
 				goto out;
 			}
 			free(subvol);
@@ -577,7 +579,7 @@ int cmd_send(int argc, char **argv)
 			snapshot_parent = realpath(optarg, NULL);
 			if (!snapshot_parent) {
 				ret = -errno;
-				error("realpath %s failed: %s", optarg, strerror(-ret));
+				error("realpath %s failed: %m", optarg);
 				goto out;
 			}
 
@@ -628,7 +630,7 @@ int cmd_send(int argc, char **argv)
 		send.dump_fd = tmpfd;
 		if (send.dump_fd == -1) {
 			ret = -errno;
-			error("cannot create '%s': %s", outname, strerror(-ret));
+			error("cannot create '%s': %m", outname);
 			goto out;
 		}
 	}
@@ -663,7 +665,8 @@ int cmd_send(int argc, char **argv)
 
 		ret = add_clone_source(&send, parent_root_id);
 		if (ret < 0) {
-			error("cannot add clone source: %s", strerror(-ret));
+			errno = -ret;
+			error("cannot add clone source: %m");
 			goto out;
 		}
 	}
@@ -679,8 +682,8 @@ int cmd_send(int argc, char **argv)
 
 		ret = find_mount_root(subvol, &mount_root);
 		if (ret < 0) {
-			error("find_mount_root failed on %s: %s", subvol,
-				strerror(-ret));
+			errno = -ret;
+			error("find_mount_root failed on %s: %m", subvol);
 			goto out;
 		}
 		if (ret > 0) {
@@ -723,7 +726,7 @@ int cmd_send(int argc, char **argv)
 		subvol = realpath(subvol, NULL);
 		if (!subvol) {
 			ret = -errno;
-			error("realpath %s failed: %s", argv[i], strerror(-ret));
+			error("realpath %s failed: %m", argv[i]);
 			goto out;
 		}
 
@@ -758,7 +761,8 @@ int cmd_send(int argc, char **argv)
 			/* done with this subvol, so add it to the clone sources */
 			ret = add_clone_source(&send, root_id);
 			if (ret < 0) {
-				error("cannot add clone source: %s", strerror(-ret));
+				errno = -ret;
+				error("cannot add clone source: %m");
 				goto out;
 			}
 			free_send_info(&send);
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index 45363a5a..a8395aac 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -102,6 +102,7 @@ static int cmd_subvol_create(int argc, char **argv)
 	struct btrfs_qgroup_inherit *inherit = NULL;
 	DIR	*dirstream = NULL;
 
+	optind = 0;
 	while (1) {
 		int c = getopt(argc, argv, "c:i:");
 		if (c < 0)
@@ -135,7 +136,8 @@ static int cmd_subvol_create(int argc, char **argv)
 	retval = 1;	/* failure */
 	res = test_isdir(dst);
 	if (res < 0 && res != -ENOENT) {
-		error("cannot access %s: %s", dst, strerror(-res));
+		errno = -res;
+		error("cannot access %s: %m", dst);
 		goto out;
 	}
 	if (res >= 0) {
@@ -154,7 +156,7 @@ static int cmd_subvol_create(int argc, char **argv)
 	}
 
 	len = strlen(newname);
-	if (len == 0 || len >= BTRFS_VOL_NAME_MAX) {
+	if (len > BTRFS_VOL_NAME_MAX) {
 		error("subvolume name too long: %s", newname);
 		goto out;
 	}
@@ -248,6 +250,7 @@ static int cmd_subvol_delete(int argc, char **argv)
 	enum { COMMIT_AFTER = 1, COMMIT_EACH = 2 };
 	enum btrfs_util_error err;
 
+	optind = 0;
 	while (1) {
 		int c;
 		static const struct option long_options[] = {
@@ -335,8 +338,8 @@ again:
 	} else if (commit_mode == COMMIT_AFTER) {
 		res = get_fsid(dname, fsid, 0);
 		if (res < 0) {
-			error("unable to get fsid for '%s': %s",
-				path, strerror(-res));
+			errno = -res;
+			error("unable to get fsid for '%s': %m", path);
 			error(
 			"delete succeeded but commit may not be done in the end");
 			ret = 1;
@@ -466,6 +469,7 @@ static int cmd_subvol_list(int argc, char **argv)
 	filter_set = btrfs_list_alloc_filter_set();
 	comparer_set = btrfs_list_alloc_comparer_set();
 
+	optind = 0;
 	while(1) {
 		int c;
 		static const struct option long_options[] = {
@@ -636,6 +640,7 @@ static int cmd_subvol_snapshot(int argc, char **argv)
 	DIR *dirstream1 = NULL, *dirstream2 = NULL;
 
 	memset(&args, 0, sizeof(args));
+	optind = 0;
 	while (1) {
 		int c = getopt(argc, argv, "c:i:r");
 		if (c < 0)
@@ -686,7 +691,8 @@ static int cmd_subvol_snapshot(int argc, char **argv)
 
 	res = test_isdir(dst);
 	if (res < 0 && res != -ENOENT) {
-		error("cannot access %s: %s", dst, strerror(-res));
+		errno = -res;
+		error("cannot access %s: %m", dst);
 		goto out;
 	}
 	if (res == 0) {
@@ -711,7 +717,7 @@ static int cmd_subvol_snapshot(int argc, char **argv)
 	}
 
 	len = strlen(newname);
-	if (len == 0 || len >= BTRFS_VOL_NAME_MAX) {
+	if (len > BTRFS_VOL_NAME_MAX) {
 		error("snapshot name too long '%s'", newname);
 		goto out;
 	}
@@ -933,6 +939,7 @@ static int cmd_subvol_show(int argc, char **argv)
 	char *subvol_path = NULL;
 	enum btrfs_util_error err;
 
+	optind = 0;
 	while (1) {
 		int c;
 		static const struct option long_options[] = {
@@ -1132,6 +1139,7 @@ static int cmd_subvol_sync(int argc, char **argv)
 	int sleep_interval = 1;
 	enum btrfs_util_error err;
 
+	optind = 0;
 	while (1) {
 		int c = getopt(argc, argv, "s:");
 
diff --git a/configure b/configure
index 89626107..381ad05a 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for btrfs-progs v4.17 .
+# Generated by GNU Autoconf 2.69 for btrfs-progs v4.19.1 .
 #
 # Report bugs to <linux-btrfs@vger.kernel.org>.
 #
@@ -580,8 +580,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='btrfs-progs'
 PACKAGE_TARNAME='btrfs-progs'
-PACKAGE_VERSION='v4.17 '
-PACKAGE_STRING='btrfs-progs v4.17 '
+PACKAGE_VERSION='v4.19.1 '
+PACKAGE_STRING='btrfs-progs v4.19.1 '
 PACKAGE_BUGREPORT='linux-btrfs@vger.kernel.org'
 PACKAGE_URL='http://btrfs.wiki.kernel.org'
 
@@ -666,12 +666,14 @@ COM_ERR_CFLAGS
 EXT2FS_LIBS
 EXT2FS_CFLAGS
 DISABLE_BTRFSCONVERT
+BUILD_STATIC_LIBRARIES
+BUILD_SHARED_LIBRARIES
+BUILD_PROGRAMS
 ASCIIDOC_TOOL
 ASCIIDOCTOR
 ASCIIDOC
 SED
 MV
-GZIP
 XMLTO
 DISABLE_DOCUMENTATION
 PKG_CONFIG_LIBDIR
@@ -747,6 +749,9 @@ enable_option_checking
 enable_largefile
 enable_backtrace
 enable_documentation
+enable_programs
+enable_shared
+enable_static
 enable_convert
 with_convert
 enable_zstd
@@ -1321,7 +1326,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures btrfs-progs v4.17  to adapt to many kinds of systems.
+\`configure' configures btrfs-progs v4.19.1  to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1386,7 +1391,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of btrfs-progs v4.17 :";;
+     short | recursive ) echo "Configuration of btrfs-progs v4.19.1 :";;
    esac
   cat <<\_ACEOF
 
@@ -1396,7 +1401,10 @@ Optional Features:
   --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
   --disable-largefile     omit support for large files
   --disable-backtrace     disable btrfs backtrace
-  --disable-documentation do not build domumentation
+  --disable-documentation do not build documentation
+  --disable-programs      do not build utility programs
+  --disable-shared        do not build shared libraries
+  --disable-static        do not build static libraries
   --disable-convert       do not build btrfs-convert
   --disable-zstd          build without zstd support
   --disable-python        do not build libbtrfsutil Python bindings
@@ -1514,7 +1522,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-btrfs-progs configure v4.17
+btrfs-progs configure v4.19.1
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1883,7 +1891,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by btrfs-progs $as_me v4.17 , which was
+It was created by btrfs-progs $as_me v4.19.1 , which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -5259,47 +5267,6 @@ fi
 		as_fn_error $? "cannot find xmlto, cannot build documentation" "$LINENO" 5
 	fi
 
-	# Extract the first word of "gzip", so it can be a program name with args.
-set dummy gzip; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_path_GZIP+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  case $GZIP in
-  [\\/]* | ?:[\\/]*)
-  ac_cv_path_GZIP="$GZIP" # Let the user override the test with a path.
-  ;;
-  *)
-  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-    for ac_exec_ext in '' $ac_executable_extensions; do
-  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
-    ac_cv_path_GZIP="$as_dir/$ac_word$ac_exec_ext"
-    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
-    break 2
-  fi
-done
-  done
-IFS=$as_save_IFS
-
-  test -z "$ac_cv_path_GZIP" && ac_cv_path_GZIP="gzip"
-  ;;
-esac
-fi
-GZIP=$ac_cv_path_GZIP
-if test -n "$GZIP"; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $GZIP" >&5
-$as_echo "$GZIP" >&6; }
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
 	# Extract the first word of "mv", so it can be a program name with args.
 set dummy mv; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
@@ -5503,11 +5470,56 @@ fi
 fi
 
 
+# Check whether --enable-programs was given.
+if test "${enable_programs+set}" = set; then :
+  enableval=$enable_programs;
+else
+  enable_programs=yes
+
+fi
+
+if test "x$enable_programs" = xyes; then :
+  BUILD_PROGRAMS=1
+else
+  BUILD_PROGRAMS=0
+fi
+
+
+# Check whether --enable-shared was given.
+if test "${enable_shared+set}" = set; then :
+  enableval=$enable_shared;
+else
+  enable_shared=yes
+
+fi
+
+if test "x$enable_shared" = xyes; then :
+  BUILD_SHARED_LIBRARIES=1
+else
+  BUILD_SHARED_LIBRARIES=0
+fi
+
+
+# Check whether --enable-static was given.
+if test "${enable_static+set}" = set; then :
+  enableval=$enable_static;
+else
+  enable_static=yes
+
+fi
+
+if test "x$enable_static" = xyes; then :
+  BUILD_STATIC_LIBRARIES=1
+else
+  BUILD_STATIC_LIBRARIES=0
+fi
+
+
 # Check whether --enable-convert was given.
 if test "${enable_convert+set}" = set; then :
   enableval=$enable_convert;
 else
-  enable_convert=yes
+  enable_convert=$enable_programs
 
 fi
 
@@ -6399,7 +6411,7 @@ fi
 if test "${enable_python+set}" = set; then :
   enableval=$enable_python;
 else
-  enable_python=yes
+  enable_python=$enable_shared
 
 fi
 
@@ -7345,7 +7357,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by btrfs-progs $as_me v4.17 , which was
+This file was extended by btrfs-progs $as_me v4.19.1 , which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -7408,7 +7420,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-btrfs-progs config.status v4.17
+btrfs-progs config.status v4.19.1
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
@@ -8135,6 +8147,9 @@ fi
 	cflags:             ${CFLAGS}
 	ldflags:            ${LDFLAGS}
 
+	programs:           ${enable_programs}
+	shared libraries:   ${enable_shared}
+	static libraries:   ${enable_static}
 	documentation:      ${enable_documentation}
 	doc generator:      ${ASCIIDOC_TOOL}
 	backtrace support:  ${enable_backtrace}
@@ -8159,6 +8174,9 @@ $as_echo "
 	cflags:             ${CFLAGS}
 	ldflags:            ${LDFLAGS}
 
+	programs:           ${enable_programs}
+	shared libraries:   ${enable_shared}
+	static libraries:   ${enable_static}
 	documentation:      ${enable_documentation}
 	doc generator:      ${ASCIIDOC_TOOL}
 	backtrace support:  ${enable_backtrace}
diff --git a/configure.ac b/configure.ac
index 2567de12..cf792eb5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -85,7 +85,7 @@ fi
 
 
 AC_ARG_ENABLE([documentation],
-	      AS_HELP_STRING([--disable-documentation], [do not build domumentation]),
+	      AS_HELP_STRING([--disable-documentation], [do not build documentation]),
   [], [enable_documentation=yes]
 )
 AS_IF([test "x$enable_documentation" = xyes], [DISABLE_DOCUMENTATION=0], [DISABLE_DOCUMENTATION=1])
@@ -99,7 +99,6 @@ if test "x$enable_documentation" = xyes; then
 		AC_MSG_ERROR([cannot find xmlto, cannot build documentation])
 	fi
 
-	AC_PATH_PROG([GZIP], [gzip], [gzip])
 	AC_PATH_PROG([MV], [mv], [mv])
 	AC_PROG_SED
 	AC_PATH_PROG([ASCIIDOC], [asciidoc])
@@ -118,9 +117,30 @@ if test "x$enable_documentation" = xyes; then
 fi
 AC_SUBST([ASCIIDOC_TOOL])
 
+AC_ARG_ENABLE([programs],
+	      AS_HELP_STRING([--disable-programs], [do not build utility programs]),
+	      [], [enable_programs=yes]
+)
+AS_IF([test "x$enable_programs" = xyes], [BUILD_PROGRAMS=1], [BUILD_PROGRAMS=0])
+AC_SUBST([BUILD_PROGRAMS])
+
+AC_ARG_ENABLE([shared],
+	      AS_HELP_STRING([--disable-shared], [do not build shared libraries]),
+	      [], [enable_shared=yes]
+)
+AS_IF([test "x$enable_shared" = xyes], [BUILD_SHARED_LIBRARIES=1], [BUILD_SHARED_LIBRARIES=0])
+AC_SUBST([BUILD_SHARED_LIBRARIES])
+
+AC_ARG_ENABLE([static],
+	      AS_HELP_STRING([--disable-static], [do not build static libraries]),
+	      [], [enable_static=yes]
+)
+AS_IF([test "x$enable_static" = xyes], [BUILD_STATIC_LIBRARIES=1], [BUILD_STATIC_LIBRARIES=0])
+AC_SUBST([BUILD_STATIC_LIBRARIES])
+
 AC_ARG_ENABLE([convert],
 	      AS_HELP_STRING([--disable-convert], [do not build btrfs-convert]),
-  [], [enable_convert=yes]
+  [], [enable_convert=$enable_programs]
 )
 
 AS_IF([test "x$enable_convert" = xyes], [DISABLE_BTRFSCONVERT=0], [DISABLE_BTRFSCONVERT=1])
@@ -215,7 +235,7 @@ AC_SUBST(BTRFSRESTORE_ZSTD)
 
 AC_ARG_ENABLE([python],
 	AS_HELP_STRING([--disable-python], [do not build libbtrfsutil Python bindings]),
-	[], [enable_python=yes]
+	[], [enable_python=$enable_shared]
 )
 
 if test "x$enable_python" = xyes; then
@@ -277,6 +297,9 @@ AC_MSG_RESULT([
 	cflags:             ${CFLAGS}
 	ldflags:            ${LDFLAGS}
 
+	programs:           ${enable_programs}
+	shared libraries:   ${enable_shared}
+	static libraries:   ${enable_static}
 	documentation:      ${enable_documentation}
 	doc generator:      ${ASCIIDOC_TOOL}
 	backtrace support:  ${enable_backtrace}
diff --git a/convert/common.c b/convert/common.c
index 6ddf4a46..a9b24043 100644
--- a/convert/common.c
+++ b/convert/common.c
@@ -76,7 +76,7 @@ static inline int write_temp_super(int fd, struct btrfs_super_block *sb,
 }
 
 /*
- * Setup temporary superblock at cfg->super_bynter
+ * Setup temporary superblock at cfg->super_bytenr
  * Needed info are extracted from cfg, and root_bytenr, chunk_bytenr
  *
  * For now sys chunk array will be empty and dev_item is empty too.
@@ -98,7 +98,7 @@ static int setup_temp_super(int fd, struct btrfs_mkfs_config *cfg,
 
 	if (*cfg->fs_uuid) {
 		if (uuid_parse(cfg->fs_uuid, super->fsid) != 0) {
-			error("cound not parse UUID: %s", cfg->fs_uuid);
+			error("could not parse UUID: %s", cfg->fs_uuid);
 			ret = -EINVAL;
 			goto out;
 		}
diff --git a/convert/main.c b/convert/main.c
index 7077fcba..68f76f71 100644
--- a/convert/main.c
+++ b/convert/main.c
@@ -66,7 +66,7 @@
  *      c. Doesn't cover any data chunks in 1.1
  *
  * 2)   Create basic btrfs filesystem structure
- *      Initial metadata and sys chunks are inserted in the first availabe
+ *      Initial metadata and sys chunks are inserted in the first available
  *      space found in step 1.3
  *      Then insert all data chunks into the basic btrfs
  *
@@ -290,10 +290,16 @@ static int create_image_file_range(struct btrfs_trans_handle *trans,
 	if (disk_bytenr) {
 		/* Check if the range is in a data block group */
 		bg_cache = btrfs_lookup_block_group(root->fs_info, bytenr);
-		if (!bg_cache)
+		if (!bg_cache) {
+			error("missing data block for bytenr %llu", bytenr);
 			return -ENOENT;
-		if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
+		}
+		if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA)) {
+			error(
+	"data bytenr %llu is covered by non-data block group %llu flags 0x%llu",
+			      bytenr, bg_cache->key.objectid, bg_cache->flags);
 			return -EINVAL;
+		}
 
 		/* The extent should never cross block group boundary */
 		len = min_t(u64, len, bg_cache->key.objectid +
@@ -310,8 +316,15 @@ static int create_image_file_range(struct btrfs_trans_handle *trans,
 	if (ret < 0)
 		return ret;
 
-	if (datacsum)
+	if (datacsum) {
 		ret = csum_disk_extent(trans, root, bytenr, len);
+		if (ret < 0) {
+			errno = -ret;
+			error(
+		"failed to calculate csum for bytenr %llu len %llu: %m",
+			      bytenr, len);
+		}
+	}
 	*ret_len = len;
 	return ret;
 }
@@ -669,7 +682,7 @@ static int calculate_available_space(struct btrfs_convert_context *cctx)
 	cur_off = 0;
 	/*
 	 * Calculate free space
-	 * Always round up the start bytenr, to avoid metadata extent corss
+	 * Always round up the start bytenr, to avoid metadata extent cross
 	 * stripe boundary, as later mkfs_convert() won't have all the extent
 	 * allocation check
 	 */
@@ -711,7 +724,7 @@ out:
 
 /*
  * Read used space, and since we have the used space,
- * calcuate data_chunks and free for later mkfs
+ * calculate data_chunks and free for later mkfs
  */
 static int convert_read_used_space(struct btrfs_convert_context *cctx)
 {
@@ -759,18 +772,34 @@ static int create_image(struct btrfs_root *root,
 
 	ret = btrfs_find_free_objectid(trans, root, BTRFS_FIRST_FREE_OBJECTID,
 				       &ino);
-	if (ret < 0)
+	if (ret < 0) {
+		errno = -ret;
+		error("failed to find free objectid for root %llu: %m",
+			root->root_key.objectid);
 		goto out;
+	}
 	ret = btrfs_new_inode(trans, root, ino, 0400 | S_IFREG);
-	if (ret < 0)
+	if (ret < 0) {
+		errno = -ret;
+		error("failed to create new inode for root %llu: %m",
+			root->root_key.objectid);
 		goto out;
+	}
 	ret = btrfs_change_inode_flags(trans, root, ino, flags);
-	if (ret < 0)
+	if (ret < 0) {
+		errno = -ret;
+		error("failed to change inode flag for ino %llu root %llu: %m",
+			ino, root->root_key.objectid);
 		goto out;
+	}
 	ret = btrfs_add_link(trans, root, ino, BTRFS_FIRST_FREE_OBJECTID, name,
 			     strlen(name), BTRFS_FT_REG_FILE, NULL, 1, 0);
-	if (ret < 0)
+	if (ret < 0) {
+		errno = -ret;
+		error("failed to link ino %llu to '/%s' in root %llu: %m",
+			ino, name, root->root_key.objectid);
 		goto out;
+	}
 
 	key.objectid = ino;
 	key.type = BTRFS_INODE_ITEM_KEY;
@@ -946,7 +975,7 @@ static int init_btrfs(struct btrfs_mkfs_config *cfg, struct btrfs_root *root,
 
 	/*
 	 * Don't alloc any metadata/system chunk, as we don't want
-	 * any meta/sys chunk allcated before all data chunks are inserted.
+	 * any meta/sys chunk allocated before all data chunks are inserted.
 	 * Or we screw up the chunk layout just like the old implement.
 	 */
 	fs_info->avoid_sys_chunk_alloc = 1;
@@ -1135,7 +1164,8 @@ static int do_convert(const char *devname, u32 convert_flags, u32 nodesize,
 
 	ret = make_convert_btrfs(fd, &mkfs_cfg, &cctx);
 	if (ret) {
-		error("unable to create initial ctree: %s", strerror(-ret));
+		errno = -ret;
+		error("unable to create initial ctree: %m");
 		goto fail;
 	}
 
@@ -1182,7 +1212,7 @@ static int do_convert(const char *devname, u32 convert_flags, u32 nodesize,
 	if (progress) {
 		ctx.info = task_init(print_copied_inodes, after_copied_inodes,
 				     &ctx);
-		task_start(ctx.info);
+		task_start(ctx.info, NULL, NULL);
 	}
 	ret = copy_inodes(&cctx, root, convert_flags, &ctx);
 	if (ret) {
@@ -1314,7 +1344,7 @@ static bool is_chunk_direct_mapped(struct btrfs_fs_info *fs_info, u64 start)
 	if (map->num_stripes != 1)
 		goto out;
 
-	/* Chunk's logical doesn't match with phisical, not 1:1 mapped */
+	/* Chunk's logical doesn't match with physical, not 1:1 mapped */
 	if (map->ce.start != map->stripes[0].physical)
 		goto out;
 	ret = true;
@@ -1326,7 +1356,7 @@ out:
  * Iterate all file extents of the convert image.
  *
  * All file extents except ones in btrfs_reserved_ranges must be mapped 1:1
- * on disk. (Means thier file_offset must match their on disk bytenr)
+ * on disk. (Means their file_offset must match their on disk bytenr)
  *
  * File extents in reserved ranges can be relocated to other place, and in
  * that case we will read them out for later use.
@@ -1353,8 +1383,8 @@ static int check_convert_image(struct btrfs_root *image_root, u64 ino,
 	 * So we only need to check if ret < 0
 	 */
 	if (ret < 0) {
-		error("failed to iterate file extents at offset 0: %s",
-			strerror(-ret));
+		errno = -ret;
+		error("failed to iterate file extents at offset 0: %m");
 		btrfs_release_path(&path);
 		return ret;
 	}
@@ -1558,8 +1588,8 @@ static int do_rollback(const char *devname)
 		ret = -ENOENT;
 		goto close_fs;
 	} else if (ret < 0) {
-		error("failed to find source fs image subvolume: %s",
-			strerror(-ret));
+		errno = -ret;
+		error("failed to find source fs image subvolume: %m");
 		goto close_fs;
 	}
 
@@ -1570,8 +1600,8 @@ static int do_rollback(const char *devname)
 	image_root = btrfs_read_fs_root(fs_info, &key);
 	if (IS_ERR(image_root)) {
 		ret = PTR_ERR(image_root);
-		error("failed to open convert image subvolume: %s",
-			strerror(-ret));
+		errno = -ret;
+		error("failed to open convert image subvolume: %m");
 		goto close_fs;
 	}
 
@@ -1586,8 +1616,8 @@ static int do_rollback(const char *devname)
 			ret = PTR_ERR(dir);
 		else
 			ret = -ENOENT;
-		error("failed to locate file %s: %s", image_name,
-			strerror(-ret));
+		errno = -ret;
+		error("failed to locate file %s: %m", image_name);
 		goto close_fs;
 	}
 	btrfs_dir_item_key_to_cpu(path.nodes[0], dir, &key);
@@ -1600,7 +1630,8 @@ static int do_rollback(const char *devname)
 
 	if (ret < 0) {
 		btrfs_release_path(&path);
-		error("unable to find inode %llu: %s", ino, strerror(-ret));
+		errno = -ret;
+		error("unable to find inode %llu: %m", ino);
 		goto close_fs;
 	}
 	inode_item = btrfs_item_ptr(path.nodes[0], path.slots[0],
@@ -1642,8 +1673,9 @@ close_fs:
 				ret = -errno;
 			else
 				ret = -EIO;
-			error("failed to recover range [%llu, %llu): %s",
-			      range->start, real_size, strerror(-ret));
+			errno = -ret;
+			error("failed to recover range [%llu, %llu): %m",
+			      range->start, real_size);
 			goto free_mem;
 		}
 		ret = 0;
@@ -1805,7 +1837,8 @@ int main(int argc, char *argv[])
 	file = argv[optind];
 	ret = check_mounted(file);
 	if (ret < 0) {
-		error("could not check mount status: %s", strerror(-ret));
+		errno = -ret;
+		error("could not check mount status: %m");
 		return 1;
 	} else if (ret) {
 		error("%s is mounted", file);
diff --git a/convert/source-ext2.c b/convert/source-ext2.c
index a2af1212..a136e565 100644
--- a/convert/source-ext2.c
+++ b/convert/source-ext2.c
@@ -162,8 +162,8 @@ static int ext2_read_used_space(struct btrfs_convert_context *cctx)
 		}
 		ret = __ext2_add_one_block(fs, block_bitmap, i, used_tree);
 		if (ret < 0) {
-			error("fail to build used space tree, %s",
-			      strerror(-ret));
+			errno = -ret;
+			error("fail to build used space tree, %m");
 			break;
 		}
 		blk_itr += EXT2_CLUSTERS_PER_GROUP(fs->super);
diff --git a/convert/source-fs.c b/convert/source-fs.c
index b6d08370..5660a22c 100644
--- a/convert/source-fs.c
+++ b/convert/source-fs.c
@@ -201,7 +201,7 @@ int read_disk_extent(struct btrfs_root *root, u64 bytenr,
 	ret = 0;
 fail:
 	if (ret > 0)
-		ret = -1;
+		ret = -EIO;
 	return ret;
 }
 
diff --git a/convert/source-fs.h b/convert/source-fs.h
index f5314aff..7cf587d9 100644
--- a/convert/source-fs.h
+++ b/convert/source-fs.h
@@ -26,7 +26,7 @@
 #define CONV_IMAGE_SUBVOL_OBJECTID BTRFS_FIRST_FREE_OBJECTID
 
 /*
- * Reresents a simple contiguous range.
+ * Represents a simple contiguous range.
  *
  * For multiple or non-contiguous ranges, use extent_cache_tree from
  * extent-cache.c
diff --git a/convert/source-reiserfs.c b/convert/source-reiserfs.c
index e0b3b685..cc592f19 100644
--- a/convert/source-reiserfs.c
+++ b/convert/source-reiserfs.c
@@ -301,7 +301,7 @@ static int reiserfs_record_indirect_extent(reiserfs_filsys_t fs, u64 position,
 
 /*
  * Unlike btrfs inline extents, reiserfs can have multiple inline extents.
- * This handles concatanating multiple tails into one inline extent
+ * This handles concatenating multiple tails into one inline extent
  * for insertion.
  */
 static int reiserfs_record_direct_extent(reiserfs_filsys_t fs, __u64 position,
@@ -493,10 +493,10 @@ static int reiserfs_copy_dirent(reiserfs_filsys_t fs,
 	ret = reiserfs_copy_meta(fs, root, dirent_data->convert_flags,
 				 deh_dirid, deh_objectid, &type);
 	if (ret) {
+		errno = -ret;
 		error(
-	"an error occured while converting \"%.*s\", reiserfs key [%u %u]: %s",
-			(int)len, name, deh_dirid, deh_objectid,
-			strerror(-ret));
+	"an error occured while converting \"%.*s\", reiserfs key [%u %u]: %m",
+			(int)len, name, deh_dirid, deh_objectid);
 		return ret;
 	}
 	trans = btrfs_start_transaction(root, 1);
@@ -564,7 +564,7 @@ static int reiserfs_copy_meta(reiserfs_filsys_t fs, struct btrfs_root *root,
 	};
 
 	/* The root directory's dirid in reiserfs points to an object
-	 * that does't exist.  In btrfs it's self-referential.
+	 * that doens't exist.  In btrfs it's self-referential.
 	 */
 	if (deh_dirid == REISERFS_ROOT_PARENT_OBJECTID)
 		parent = objectid;
diff --git a/ctree.c b/ctree.c
index 2c51580f..295cd5ed 100644
--- a/ctree.c
+++ b/ctree.c
@@ -27,7 +27,7 @@
 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
 		      *root, struct btrfs_path *path, int level);
 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
-		      *root, struct btrfs_key *ins_key,
+		      *root, const struct btrfs_key *ins_key,
 		      struct btrfs_path *path, int data_size, int extend);
 static int push_node_left(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root, struct extent_buffer *dst,
@@ -76,6 +76,18 @@ void add_root_to_dirty_list(struct btrfs_root *root)
 	}
 }
 
+static void root_add_used(struct btrfs_root *root, u32 size)
+{
+        btrfs_set_root_used(&root->root_item,
+                            btrfs_root_used(&root->root_item) + size);
+}
+
+static void root_sub_used(struct btrfs_root *root, u32 size)
+{
+        btrfs_set_root_used(&root->root_item,
+                            btrfs_root_used(&root->root_item) - size);
+}
+
 int btrfs_copy_root(struct btrfs_trans_handle *trans,
 		      struct btrfs_root *root,
 		      struct extent_buffer *buf,
@@ -322,7 +334,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 		WARN_ON(btrfs_header_generation(parent) != trans->transid);
 
 		btrfs_free_extent(trans, root, buf->start, buf->len,
-				  0, root->root_key.objectid, level, 1);
+				  0, root->root_key.objectid, level, 0);
 	}
 	if (!list_empty(&buf->recow)) {
 		list_del_init(&buf->recow);
@@ -377,7 +389,7 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
 	return ret;
 }
 
-int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
+int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
 {
 	if (k1->objectid > k2->objectid)
 		return 1;
@@ -397,7 +409,8 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
 /*
  * compare two keys in a memcmp fashion
  */
-static int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
+static int btrfs_comp_keys(struct btrfs_disk_key *disk,
+		const struct btrfs_key *k2)
 {
 	struct btrfs_key k1;
 
@@ -590,7 +603,7 @@ static int noinline check_block(struct btrfs_root *root,
  * slot may point to max if the key is bigger than all of the keys
  */
 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
-			      int item_size, struct btrfs_key *key,
+			      int item_size, const struct btrfs_key *key,
 			      int max, int *slot)
 {
 	int low = 0;
@@ -624,7 +637,7 @@ static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
  * simple bin_search frontend that does the right thing for
  * leaves vs nodes
  */
-static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+static int bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
 		      int level, int *slot)
 {
 	if (level == 0)
@@ -722,9 +735,11 @@ static int balance_level(struct btrfs_trans_handle *trans,
 		/* once for the path */
 		free_extent_buffer(mid);
 
+		root_sub_used(root, mid->len);
+
 		ret = btrfs_free_extent(trans, root, mid->start, mid->len,
 					0, root->root_key.objectid,
-					level, 1);
+					level, 0);
 		/* once for the root ptr */
 		free_extent_buffer(mid);
 		return ret;
@@ -777,6 +792,8 @@ static int balance_level(struct btrfs_trans_handle *trans,
 			wret = btrfs_del_ptr(root, path, level + 1, pslot + 1);
 			if (wret)
 				ret = wret;
+
+			root_sub_used(root, right->len);
 			wret = btrfs_free_extent(trans, root, bytenr,
 						 blocksize, 0,
 						 root->root_key.objectid,
@@ -823,6 +840,8 @@ static int balance_level(struct btrfs_trans_handle *trans,
 		wret = btrfs_del_ptr(root, path, level + 1, pslot);
 		if (wret)
 			ret = wret;
+
+		root_sub_used(root, blocksize);
 		wret = btrfs_free_extent(trans, root, bytenr, blocksize,
 					 0, root->root_key.objectid,
 					 level, 0);
@@ -982,10 +1001,9 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
 /*
  * readahead one full node of leaves
  */
-void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
-			     int level, int slot, u64 objectid)
+void reada_for_search(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
+		      int level, int slot, u64 objectid)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct extent_buffer *node;
 	struct btrfs_disk_key disk_key;
 	u32 nritems;
@@ -1111,9 +1129,9 @@ out:
  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
  * possible)
  */
-int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
-		      *root, struct btrfs_key *key, struct btrfs_path *p, int
-		      ins_len, int cow)
+int btrfs_search_slot(struct btrfs_trans_handle *trans,
+		struct btrfs_root *root, const struct btrfs_key *key,
+		struct btrfs_path *p, int ins_len, int cow)
 {
 	struct extent_buffer *b;
 	int slot;
@@ -1185,7 +1203,7 @@ again:
 				break;
 
 			if (should_reada)
-				reada_for_search(root, p, level, slot,
+				reada_for_search(fs_info, p, level, slot,
 						 key->objectid);
 
 			b = read_node_slot(fs_info, b, slot);
@@ -1208,6 +1226,80 @@ again:
 }
 
 /*
+ * Helper to use instead of search slot if no exact match is needed but
+ * instead the next or previous item should be returned.
+ * When find_higher is true, the next higher item is returned, the next lower
+ * otherwise.
+ * When return_any and find_higher are both true, and no higher item is found,
+ * return the next lower instead.
+ * When return_any is true and find_higher is false, and no lower item is found,
+ * return the next higher instead.
+ * It returns 0 if any item is found, 1 if none is found (tree empty), and
+ * < 0 on error
+ */
+int btrfs_search_slot_for_read(struct btrfs_root *root,
+                               const struct btrfs_key *key,
+                               struct btrfs_path *p, int find_higher,
+                               int return_any)
+{
+        int ret;
+        struct extent_buffer *leaf;
+
+again:
+        ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
+        if (ret <= 0)
+                return ret;
+        /*
+	 * A return value of 1 means the path is at the position where the item
+	 * should be inserted. Normally this is the next bigger item, but in
+	 * case the previous item is the last in a leaf, path points to the
+	 * first free slot in the previous leaf, i.e. at an invalid item.
+         */
+        leaf = p->nodes[0];
+
+        if (find_higher) {
+                if (p->slots[0] >= btrfs_header_nritems(leaf)) {
+                        ret = btrfs_next_leaf(root, p);
+                        if (ret <= 0)
+                                return ret;
+                        if (!return_any)
+                                return 1;
+                        /*
+			 * No higher item found, return the next lower instead
+                         */
+                        return_any = 0;
+                        find_higher = 0;
+                        btrfs_release_path(p);
+                        goto again;
+                }
+        } else {
+                if (p->slots[0] == 0) {
+                        ret = btrfs_prev_leaf(root, p);
+                        if (ret < 0)
+                                return ret;
+                        if (!ret) {
+                                leaf = p->nodes[0];
+                                if (p->slots[0] == btrfs_header_nritems(leaf))
+                                        p->slots[0]--;
+                                return 0;
+                        }
+                        if (!return_any)
+                                return 1;
+                        /*
+			 * No lower item found, return the next higher instead
+                         */
+                        return_any = 0;
+                        find_higher = 1;
+                        btrfs_release_path(p);
+                        goto again;
+                } else {
+                        --p->slots[0];
+                }
+        }
+        return 0;
+}
+
+/*
  * adjust the pointers going up the tree, starting at level
  * making sure the right key of each node is points to 'key'.
  * This is used after shifting pointers to the left, so it stops
@@ -1454,6 +1546,8 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
 	btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
 	btrfs_set_header_owner(c, root->root_key.objectid);
 
+	root_add_used(root, root->fs_info->nodesize);
+
 	write_extent_buffer(c, root->fs_info->fsid,
 			    btrfs_header_fsid(), BTRFS_FSID_SIZE);
 
@@ -1581,6 +1675,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
 			    btrfs_header_chunk_tree_uuid(split),
 			    BTRFS_UUID_SIZE);
 
+	root_add_used(root, root->fs_info->nodesize);
 
 	copy_extent_buffer(split, c,
 			   btrfs_node_key_ptr_offset(0),
@@ -2052,7 +2147,7 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
  */
 static noinline int split_leaf(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root,
-			       struct btrfs_key *ins_key,
+			       const struct btrfs_key *ins_key,
 			       struct btrfs_path *path, int data_size,
 			       int extend)
 {
@@ -2163,6 +2258,8 @@ again:
 			    btrfs_header_chunk_tree_uuid(right),
 			    BTRFS_UUID_SIZE);
 
+	root_add_used(root, root->fs_info->nodesize);
+
 	if (split == 0) {
 		if (mid <= slot) {
 			btrfs_set_header_nritems(right, 0);
@@ -2682,6 +2779,8 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
 	if (ret)
 		return ret;
 
+	root_sub_used(root, leaf->len);
+
 	ret = btrfs_free_extent(trans, root, leaf->start, leaf->len,
 				0, root->root_key.objectid, 0, 0);
 	return ret;
@@ -2851,18 +2950,22 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
 }
 
 /*
- * walk up the tree as far as required to find the next leaf.
+ * Walk up the tree as far as necessary to find the next sibling tree block.
+ * More generic version of btrfs_next_leaf(), as it could find sibling nodes
+ * if @path->lowest_level is not 0.
+ *
  * returns 0 if it found something or 1 if there are no greater leaves.
  * returns < 0 on io errors.
  */
-int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
+int btrfs_next_sibling_tree_block(struct btrfs_fs_info *fs_info,
+				  struct btrfs_path *path)
 {
 	int slot;
-	int level = 1;
+	int level = path->lowest_level + 1;
 	struct extent_buffer *c;
 	struct extent_buffer *next = NULL;
-	struct btrfs_fs_info *fs_info = root->fs_info;
 
+	BUG_ON(path->lowest_level + 1 >= BTRFS_MAX_LEVEL);
 	while(level < BTRFS_MAX_LEVEL) {
 		if (!path->nodes[level])
 			return 1;
@@ -2877,7 +2980,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
 		}
 
 		if (path->reada)
-			reada_for_search(root, path, level, slot, 0);
+			reada_for_search(fs_info, path, level, slot, 0);
 
 		next = read_node_slot(fs_info, c, slot);
 		if (!extent_buffer_uptodate(next))
@@ -2891,10 +2994,10 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
 		free_extent_buffer(c);
 		path->nodes[level] = next;
 		path->slots[level] = 0;
-		if (!level)
+		if (level == path->lowest_level)
 			break;
 		if (path->reada)
-			reada_for_search(root, path, level, 0, 0);
+			reada_for_search(fs_info, path, level, 0, 0);
 		next = read_node_slot(fs_info, next, 0);
 		if (!extent_buffer_uptodate(next))
 			return -EIO;
@@ -2940,7 +3043,7 @@ int btrfs_previous_item(struct btrfs_root *root,
 
 /*
  * search in extent tree to find a previous Metadata/Data extent item with
- * min objecitd.
+ * min objectid.
  *
  * returns 0 if something is found, 1 if nothing was found and < 0 on error
  */
diff --git a/ctree.h b/ctree.h
index de4b1b7e..f9c49d60 100644
--- a/ctree.h
+++ b/ctree.h
@@ -97,7 +97,7 @@ struct btrfs_free_space_ctl;
 /* for storing balance parameters in the root tree */
 #define BTRFS_BALANCE_OBJECTID -4ULL
 
-/* oprhan objectid for tracking unlinked/truncated files */
+/* orphan objectid for tracking unlinked/truncated files */
 #define BTRFS_ORPHAN_OBJECTID -5ULL
 
 /* does write ahead logging to speed up fsyncs */
@@ -119,7 +119,7 @@ struct btrfs_free_space_ctl;
 #define BTRFS_FREE_SPACE_OBJECTID -11ULL
 
 /*
- * The inode number assigned to the special inode for sotring
+ * The inode number assigned to the special inode for storing
  * free ino cache
  */
 #define BTRFS_FREE_INO_OBJECTID -12ULL
@@ -497,7 +497,9 @@ struct btrfs_super_block {
  * added here until read-write support for the free space tree is implemented in
  * btrfs-progs.
  */
-#define BTRFS_FEATURE_COMPAT_RO_SUPP		0ULL
+#define BTRFS_FEATURE_COMPAT_RO_SUPP			\
+	(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE |	\
+	 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)
 
 #define BTRFS_FEATURE_INCOMPAT_SUPP			\
 	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\
@@ -1071,6 +1073,17 @@ struct btrfs_block_group_cache {
 	u64 flags;
 	int cached;
 	int ro;
+	/*
+         * If the free space extent count exceeds this number, convert the block
+         * group to bitmaps.
+         */
+        u32 bitmap_high_thresh;
+        /*
+         * If the free space extent count drops below this number, convert the
+         * block group back to extents.
+         */
+        u32 bitmap_low_thresh;
+
 };
 
 struct btrfs_device;
@@ -1098,7 +1111,6 @@ struct btrfs_fs_info {
 	struct extent_io_tree free_space_cache;
 	struct extent_io_tree block_group_cache;
 	struct extent_io_tree pinned_extents;
-	struct extent_io_tree pending_del;
 	struct extent_io_tree extent_ins;
 	struct extent_io_tree *excluded_extents;
 
@@ -1143,8 +1155,7 @@ struct btrfs_fs_info {
 
 	int transaction_aborted;
 
-	int (*free_extent_hook)(struct btrfs_trans_handle *trans,
-				struct btrfs_root *root,
+	int (*free_extent_hook)(struct btrfs_fs_info *fs_info,
 				u64 bytenr, u64 num_bytes, u64 parent,
 				u64 root_objectid, u64 owner, u64 offset,
 				int refs_to_drop);
@@ -1973,7 +1984,7 @@ static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
 }
 
 static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk,
-					 struct btrfs_key *cpu)
+					 const struct btrfs_key *cpu)
 {
 	disk->offset = cpu_to_le64(cpu->offset);
 	disk->type = cpu->type;
@@ -2393,7 +2404,7 @@ static inline struct btrfs_disk_balance_args* btrfs_balance_item_sys(
 
 /*
  * btrfs_dev_stats_item helper, returns pointer to the raw array, do the
- * endiannes conversion, @dsi is offset to eb data
+ * endianness conversion, @dsi is offset to eb data
  */
 static inline __le64* btrfs_dev_stats_values(struct extent_buffer *eb,
 		struct btrfs_dev_stats_item *dsi)
@@ -2443,28 +2454,6 @@ static inline u32 btrfs_search_header_len(struct btrfs_ioctl_search_header *sh)
 	return get_unaligned_32(&sh->len);
 }
 
-/* this returns the number of file bytes represented by the inline item.
- * If an item is compressed, this is the uncompressed size
- */
-static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
-					       int slot,
-					       struct btrfs_file_extent_item *fi)
-{
-	/*
-	 * return the space used on disk if this item isn't
-	 * compressed or encoded
-	 */
-	if (btrfs_file_extent_compression(eb, fi) == 0 &&
-	    btrfs_file_extent_encryption(eb, fi) == 0 &&
-	    btrfs_file_extent_other_encoding(eb, fi) == 0) {
-		return btrfs_file_extent_inline_item_len(eb,
-							 btrfs_item_nr(slot));
-	}
-
-	/* otherwise use the ram bytes field */
-	return btrfs_file_extent_ram_bytes(eb, fi);
-}
-
 #define btrfs_fs_incompat(fs_info, opt) \
 	__btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
 
@@ -2504,7 +2493,6 @@ int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans);
 void btrfs_pin_extent(struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes);
 void btrfs_unpin_extent(struct btrfs_fs_info *fs_info,
 			u64 bytenr, u64 num_bytes);
-int btrfs_extent_post_op(struct btrfs_trans_handle *trans);
 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
 							 btrfs_fs_info *info,
 							 u64 bytenr);
@@ -2546,8 +2534,7 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
 			    u64 orig_parent, u64 parent,
 			    u64 root_objectid, u64 ref_generation,
 			    u64 owner_objectid);
-int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
-				    struct btrfs_root *root);
+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
 int btrfs_free_block_groups(struct btrfs_fs_info *info);
 int btrfs_read_block_groups(struct btrfs_root *root);
 struct btrfs_block_group_cache *
@@ -2576,7 +2563,7 @@ u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
 u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
 
 /* ctree.c */
-int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
+int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
 int btrfs_del_ptr(struct btrfs_root *root, struct btrfs_path *path,
 		int level, int slot);
 enum btrfs_tree_block_status
@@ -2585,8 +2572,8 @@ btrfs_check_node(struct btrfs_root *root, struct btrfs_disk_key *parent_key,
 enum btrfs_tree_block_status
 btrfs_check_leaf(struct btrfs_root *root, struct btrfs_disk_key *parent_key,
 		 struct extent_buffer *buf);
-void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
-			     int level, int slot, u64 objectid);
+void reada_for_search(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
+		      int level, int slot, u64 objectid);
 struct extent_buffer *read_node_slot(struct btrfs_fs_info *fs_info,
 				   struct extent_buffer *parent, int slot);
 int btrfs_previous_item(struct btrfs_root *root,
@@ -2619,9 +2606,13 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
 		     struct btrfs_path *path,
 		     struct btrfs_key *new_key,
 		     unsigned long split_offset);
-int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
-		      *root, struct btrfs_key *key, struct btrfs_path *p, int
-		      ins_len, int cow);
+int btrfs_search_slot(struct btrfs_trans_handle *trans,
+		struct btrfs_root *root, const struct btrfs_key *key,
+		struct btrfs_path *p, int ins_len, int cow);
+int btrfs_search_slot_for_read(struct btrfs_root *root,
+                               const struct btrfs_key *key,
+                               struct btrfs_path *p, int find_higher,
+                               int return_any);
 int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
 		u64 iobjectid, u64 ioff, u8 key_type,
 		struct btrfs_key *found_key);
@@ -2656,7 +2647,22 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
 	return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1);
 }
 
-int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
+int btrfs_next_sibling_tree_block(struct btrfs_fs_info *fs_info,
+				  struct btrfs_path *path);
+
+/*
+ * Walk up the tree as far as necessary to find the next leaf.
+ *
+ * returns 0 if it found something or 1 if there are no greater leaves.
+ * returns < 0 on io errors.
+ */
+static inline int btrfs_next_leaf(struct btrfs_root *root,
+				  struct btrfs_path *path)
+{
+	path->lowest_level = 0;
+	return btrfs_next_sibling_tree_block(root->fs_info, path);
+}
+
 static inline int btrfs_next_item(struct btrfs_root *root,
 				  struct btrfs_path *p)
 {
@@ -2752,8 +2758,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
 			u64 ino, u64 parent_ino, u64 *index);
 
 /* file-item.c */
-int btrfs_del_csums(struct btrfs_trans_handle *trans,
-		    struct btrfs_root *root, u64 bytenr, u64 len);
+int btrfs_del_csums(struct btrfs_trans_handle *trans, u64 bytenr, u64 len);
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root,
 			     u64 objectid, u64 pos, u64 offset,
@@ -2814,4 +2819,7 @@ int btrfs_punch_hole(struct btrfs_trans_handle *trans,
 int btrfs_read_file(struct btrfs_root *root, u64 ino, u64 start, int len,
 		    char *dest);
 
+/* extent-tree.c */
+int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long nr);
+
 #endif
diff --git a/delayed-ref.c b/delayed-ref.c
new file mode 100644
index 00000000..9974dbbd
--- /dev/null
+++ b/delayed-ref.c
@@ -0,0 +1,607 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2009 Oracle.  All rights reserved.
+ */
+
+#include "ctree.h"
+#include "btrfs-list.h"
+#include "delayed-ref.h"
+#include "transaction.h"
+
+/*
+ * delayed back reference update tracking.  For subvolume trees
+ * we queue up extent allocations and backref maintenance for
+ * delayed processing.   This avoids deep call chains where we
+ * add extents in the middle of btrfs_search_slot, and it allows
+ * us to buffer up frequently modified backrefs in an rb tree instead
+ * of hammering updates on the extent allocation tree.
+ */
+
+/*
+ * compare two delayed tree backrefs with same bytenr and type
+ */
+static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
+			  struct btrfs_delayed_tree_ref *ref2)
+{
+	if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
+		if (ref1->root < ref2->root)
+			return -1;
+		if (ref1->root > ref2->root)
+			return 1;
+	} else {
+		if (ref1->parent < ref2->parent)
+			return -1;
+		if (ref1->parent > ref2->parent)
+			return 1;
+	}
+	return 0;
+}
+
+static int comp_refs(struct btrfs_delayed_ref_node *ref1,
+		     struct btrfs_delayed_ref_node *ref2,
+		     bool check_seq)
+{
+	int ret = 0;
+
+	if (ref1->type < ref2->type)
+		return -1;
+	if (ref1->type > ref2->type)
+		return 1;
+	if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
+	    ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
+		ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
+				     btrfs_delayed_node_to_tree_ref(ref2));
+	else
+		BUG();
+
+	if (ret)
+		return ret;
+	if (check_seq) {
+		if (ref1->seq < ref2->seq)
+			return -1;
+		if (ref1->seq > ref2->seq)
+			return 1;
+	}
+	return 0;
+}
+
+/* insert a new ref to head ref rbtree */
+static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
+						   struct rb_node *node)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent_node = NULL;
+	struct btrfs_delayed_ref_head *entry;
+	struct btrfs_delayed_ref_head *ins;
+	u64 bytenr;
+
+	ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
+	bytenr = ins->bytenr;
+	while (*p) {
+		parent_node = *p;
+		entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
+				 href_node);
+
+		if (bytenr < entry->bytenr)
+			p = &(*p)->rb_left;
+		else if (bytenr > entry->bytenr)
+			p = &(*p)->rb_right;
+		else
+			return entry;
+	}
+
+	rb_link_node(node, parent_node, p);
+	rb_insert_color(node, root);
+	return NULL;
+}
+
+static struct btrfs_delayed_ref_node* tree_insert(struct rb_root *root,
+		struct btrfs_delayed_ref_node *ins)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *node = &ins->ref_node;
+	struct rb_node *parent_node = NULL;
+	struct btrfs_delayed_ref_node *entry;
+
+	while (*p) {
+		int comp;
+
+		parent_node = *p;
+		entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
+				 ref_node);
+		comp = comp_refs(ins, entry, true);
+		if (comp < 0)
+			p = &(*p)->rb_left;
+		else if (comp > 0)
+			p = &(*p)->rb_right;
+		else
+			return entry;
+	}
+
+	rb_link_node(node, parent_node, p);
+	rb_insert_color(node, root);
+	return NULL;
+}
+
+/*
+ * find an head entry based on bytenr. This returns the delayed ref
+ * head if it was able to find one, or NULL if nothing was in that spot.
+ * If return_bigger is given, the next bigger entry is returned if no exact
+ * match is found.
+ */
+static struct btrfs_delayed_ref_head *
+find_ref_head(struct rb_root *root, u64 bytenr,
+	      int return_bigger)
+{
+	struct rb_node *n;
+	struct btrfs_delayed_ref_head *entry;
+
+	n = root->rb_node;
+	entry = NULL;
+	while (n) {
+		entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
+
+		if (bytenr < entry->bytenr)
+			n = n->rb_left;
+		else if (bytenr > entry->bytenr)
+			n = n->rb_right;
+		else
+			return entry;
+	}
+	if (entry && return_bigger) {
+		if (bytenr > entry->bytenr) {
+			n = rb_next(&entry->href_node);
+			if (!n)
+				n = rb_first(root);
+			entry = rb_entry(n, struct btrfs_delayed_ref_head,
+					 href_node);
+			return entry;
+		}
+		return entry;
+	}
+	return NULL;
+}
+
+static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
+				    struct btrfs_delayed_ref_root *delayed_refs,
+				    struct btrfs_delayed_ref_head *head,
+				    struct btrfs_delayed_ref_node *ref)
+{
+	rb_erase(&ref->ref_node, &head->ref_tree);
+	RB_CLEAR_NODE(&ref->ref_node);
+	if (!list_empty(&ref->add_list))
+		list_del(&ref->add_list);
+	ref->in_tree = 0;
+	btrfs_put_delayed_ref(ref);
+	if (trans->delayed_ref_updates)
+		trans->delayed_ref_updates--;
+}
+
+static bool merge_ref(struct btrfs_trans_handle *trans,
+		      struct btrfs_delayed_ref_root *delayed_refs,
+		      struct btrfs_delayed_ref_head *head,
+		      struct btrfs_delayed_ref_node *ref,
+		      u64 seq)
+{
+	struct btrfs_delayed_ref_node *next;
+	struct rb_node *node = rb_next(&ref->ref_node);
+	bool done = false;
+
+	while (!done && node) {
+		int mod;
+
+		next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
+		node = rb_next(node);
+		if (seq && next->seq >= seq)
+			break;
+		if (comp_refs(ref, next, false))
+			break;
+
+		if (ref->action == next->action) {
+			mod = next->ref_mod;
+		} else {
+			if (ref->ref_mod < next->ref_mod) {
+				__swap(ref, next);
+				done = true;
+			}
+			mod = -next->ref_mod;
+		}
+
+		drop_delayed_ref(trans, delayed_refs, head, next);
+		ref->ref_mod += mod;
+		if (ref->ref_mod == 0) {
+			drop_delayed_ref(trans, delayed_refs, head, ref);
+			done = true;
+		} else {
+			/*
+			 * Can't have multiples of the same ref on a tree block.
+			 */
+			WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
+				ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
+		}
+	}
+
+	return done;
+}
+
+void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
+			      struct btrfs_delayed_ref_root *delayed_refs,
+			      struct btrfs_delayed_ref_head *head)
+{
+	struct btrfs_delayed_ref_node *ref;
+	struct rb_node *node;
+
+	if (RB_EMPTY_ROOT(&head->ref_tree))
+		return;
+
+	/* We don't have too many refs to merge for data. */
+	if (head->is_data)
+		return;
+
+again:
+	for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
+		ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
+		if (merge_ref(trans, delayed_refs, head, ref, 0))
+			goto again;
+	}
+}
+
+struct btrfs_delayed_ref_head *
+btrfs_select_ref_head(struct btrfs_trans_handle *trans)
+{
+	struct btrfs_delayed_ref_root *delayed_refs;
+	struct btrfs_delayed_ref_head *head;
+	u64 start;
+	bool loop = false;
+
+	delayed_refs = &trans->delayed_refs;
+
+again:
+	start = delayed_refs->run_delayed_start;
+	head = find_ref_head(&delayed_refs->href_root, start, 1);
+	if (!head && !loop) {
+		delayed_refs->run_delayed_start = 0;
+		start = 0;
+		loop = true;
+		head = find_ref_head(&delayed_refs->href_root, start, 1);
+		if (!head)
+			return NULL;
+	} else if (!head && loop) {
+		return NULL;
+	}
+
+	while (head->processing) {
+		struct rb_node *node;
+
+		node = rb_next(&head->href_node);
+		if (!node) {
+			if (loop)
+				return NULL;
+			delayed_refs->run_delayed_start = 0;
+			start = 0;
+			loop = true;
+			goto again;
+		}
+		head = rb_entry(node, struct btrfs_delayed_ref_head,
+				href_node);
+	}
+
+	head->processing = 1;
+	WARN_ON(delayed_refs->num_heads_ready == 0);
+	delayed_refs->num_heads_ready--;
+	delayed_refs->run_delayed_start = head->bytenr +
+		head->num_bytes;
+	return head;
+}
+
+/*
+ * Helper to insert the ref_node to the tail or merge with tail.
+ *
+ * Return 0 for insert.
+ * Return >0 for merge.
+ */
+static int insert_delayed_ref(struct btrfs_trans_handle *trans,
+			      struct btrfs_delayed_ref_root *root,
+			      struct btrfs_delayed_ref_head *href,
+			      struct btrfs_delayed_ref_node *ref)
+{
+	struct btrfs_delayed_ref_node *exist;
+	int mod;
+	int ret = 0;
+
+	exist = tree_insert(&href->ref_tree, ref);
+	if (!exist)
+		goto inserted;
+
+	/* Now we are sure we can merge */
+	ret = 1;
+	if (exist->action == ref->action) {
+		mod = ref->ref_mod;
+	} else {
+		/* Need to change action */
+		if (exist->ref_mod < ref->ref_mod) {
+			exist->action = ref->action;
+			mod = -exist->ref_mod;
+			exist->ref_mod = ref->ref_mod;
+			if (ref->action == BTRFS_ADD_DELAYED_REF)
+				list_add_tail(&exist->add_list,
+					      &href->ref_add_list);
+			else if (ref->action == BTRFS_DROP_DELAYED_REF) {
+				ASSERT(!list_empty(&exist->add_list));
+				list_del(&exist->add_list);
+			} else {
+				ASSERT(0);
+			}
+		} else
+			mod = -ref->ref_mod;
+	}
+	exist->ref_mod += mod;
+
+	/* remove existing tail if its ref_mod is zero */
+	if (exist->ref_mod == 0)
+		drop_delayed_ref(trans, root, href, exist);
+	return ret;
+inserted:
+	if (ref->action == BTRFS_ADD_DELAYED_REF)
+		list_add_tail(&ref->add_list, &href->ref_add_list);
+	trans->delayed_ref_updates++;
+	return ret;
+}
+
+/*
+ * helper function to update the accounting in the head ref
+ * existing and update must have the same bytenr
+ */
+static noinline void
+update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
+			 struct btrfs_delayed_ref_head *existing,
+			 struct btrfs_delayed_ref_head *update,
+			 int *old_ref_mod_ret)
+{
+	int old_ref_mod;
+
+	BUG_ON(existing->is_data != update->is_data);
+
+	if (update->must_insert_reserved) {
+		/* if the extent was freed and then
+		 * reallocated before the delayed ref
+		 * entries were processed, we can end up
+		 * with an existing head ref without
+		 * the must_insert_reserved flag set.
+		 * Set it again here
+		 */
+		existing->must_insert_reserved = update->must_insert_reserved;
+
+		/*
+		 * update the num_bytes so we make sure the accounting
+		 * is done correctly
+		 */
+		existing->num_bytes = update->num_bytes;
+
+	}
+
+	if (update->extent_op) {
+		if (!existing->extent_op) {
+			existing->extent_op = update->extent_op;
+		} else {
+			if (update->extent_op->update_key) {
+				memcpy(&existing->extent_op->key,
+				       &update->extent_op->key,
+				       sizeof(update->extent_op->key));
+				existing->extent_op->update_key = true;
+			}
+			if (update->extent_op->update_flags) {
+				existing->extent_op->flags_to_set |=
+					update->extent_op->flags_to_set;
+				existing->extent_op->update_flags = true;
+			}
+			btrfs_free_delayed_extent_op(update->extent_op);
+		}
+	}
+	/*
+	 * update the reference mod on the head to reflect this new operation,
+	 * only need the lock for this case cause we could be processing it
+	 * currently, for refs we just added we know we're a-ok.
+	 */
+	old_ref_mod = existing->total_ref_mod;
+	if (old_ref_mod_ret)
+		*old_ref_mod_ret = old_ref_mod;
+	existing->ref_mod += update->ref_mod;
+	existing->total_ref_mod += update->ref_mod;
+
+}
+
+static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
+				  void *qrecord,
+				  u64 bytenr, u64 num_bytes, u64 ref_root,
+				  u64 reserved, int action, bool is_data,
+				  bool is_system)
+{
+	int count_mod = 1;
+	int must_insert_reserved = 0;
+
+	/* If reserved is provided, it must be a data extent. */
+	BUG_ON(!is_data && reserved);
+
+	/*
+	 * The head node stores the sum of all the mods, so dropping a ref
+	 * should drop the sum in the head node by one.
+	 */
+	if (action == BTRFS_UPDATE_DELAYED_HEAD)
+		count_mod = 0;
+	else if (action == BTRFS_DROP_DELAYED_REF)
+		count_mod = -1;
+
+	/*
+	 * BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved
+	 * accounting when the extent is finally added, or if a later
+	 * modification deletes the delayed ref without ever inserting the
+	 * extent into the extent allocation tree.  ref->must_insert_reserved
+	 * is the flag used to record that accounting mods are required.
+	 *
+	 * Once we record must_insert_reserved, switch the action to
+	 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
+	 */
+	if (action == BTRFS_ADD_DELAYED_EXTENT)
+		must_insert_reserved = 1;
+	else
+		must_insert_reserved = 0;
+
+	head_ref->refs = 1;
+	head_ref->bytenr = bytenr;
+	head_ref->num_bytes = num_bytes;
+	head_ref->ref_mod = count_mod;
+	head_ref->must_insert_reserved = must_insert_reserved;
+	head_ref->is_data = is_data;
+	head_ref->is_system = is_system;
+	head_ref->ref_tree = RB_ROOT;
+	INIT_LIST_HEAD(&head_ref->ref_add_list);
+	RB_CLEAR_NODE(&head_ref->href_node);
+	head_ref->processing = 0;
+	head_ref->total_ref_mod = count_mod;
+}
+
+/*
+ * helper function to actually insert a head node into the rbtree.
+ * this does all the dirty work in terms of maintaining the correct
+ * overall modification count.
+ */
+static noinline struct btrfs_delayed_ref_head *
+add_delayed_ref_head(struct btrfs_trans_handle *trans,
+		     struct btrfs_delayed_ref_head *head_ref,
+		     void *qrecord,
+		     int action, int *qrecord_inserted_ret,
+		     int *old_ref_mod, int *new_ref_mod)
+{
+	struct btrfs_delayed_ref_head *existing;
+	struct btrfs_delayed_ref_root *delayed_refs;
+
+	delayed_refs = &trans->delayed_refs;
+
+	existing = htree_insert(&delayed_refs->href_root, &head_ref->href_node);
+	if (existing) {
+		update_existing_head_ref(delayed_refs, existing, head_ref, old_ref_mod);
+		/*
+		 * we've updated the existing ref, free the newly
+		 * allocated ref
+		 */
+		kfree(head_ref);
+		head_ref = existing;
+	} else {
+		if (old_ref_mod)
+			*old_ref_mod = 0;
+		delayed_refs->num_heads++;
+		delayed_refs->num_heads_ready++;
+		trans->delayed_ref_updates++;
+	}
+	if (new_ref_mod)
+		*new_ref_mod = head_ref->total_ref_mod;
+
+	return head_ref;
+}
+
+/*
+ * init_delayed_ref_common - Initialize the structure which represents a
+ *			     modification to a an extent.
+ *
+ * @fs_info:    Internal to the mounted filesystem mount structure.
+ *
+ * @ref:	The structure which is going to be initialized.
+ *
+ * @bytenr:	The logical address of the extent for which a modification is
+ *		going to be recorded.
+ *
+ * @num_bytes:  Size of the extent whose modification is being recorded.
+ *
+ * @ref_root:	The id of the root where this modification has originated, this
+ *		can be either one of the well-known metadata trees or the
+ *		subvolume id which references this extent.
+ *
+ * @action:	Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or
+ *		BTRFS_ADD_DELAYED_EXTENT
+ *
+ * @ref_type:	Holds the type of the extent which is being recorded, can be
+ *		one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY
+ *		when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/
+ *		BTRFS_EXTENT_DATA_REF_KEY when recording data extent
+ */
+static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
+				    struct btrfs_delayed_ref_node *ref,
+				    u64 bytenr, u64 num_bytes, u64 ref_root,
+				    int action, u8 ref_type)
+{
+	if (action == BTRFS_ADD_DELAYED_EXTENT)
+		action = BTRFS_ADD_DELAYED_REF;
+
+	ref->refs = 1;
+	ref->bytenr = bytenr;
+	ref->num_bytes = num_bytes;
+	ref->ref_mod = 1;
+	ref->action = action;
+	ref->is_head = 0;
+	ref->in_tree = 1;
+	ref->seq = 0;
+	ref->type = ref_type;
+	RB_CLEAR_NODE(&ref->ref_node);
+	INIT_LIST_HEAD(&ref->add_list);
+}
+
+/*
+ * add a delayed tree ref.  This does all of the accounting required
+ * to make sure the delayed ref is eventually processed before this
+ * transaction commits.
+ */
+int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
+			       struct btrfs_trans_handle *trans,
+			       u64 bytenr, u64 num_bytes, u64 parent,
+			       u64 ref_root, int level, int action,
+			       struct btrfs_delayed_extent_op *extent_op,
+			       int *old_ref_mod, int *new_ref_mod)
+{
+	struct btrfs_delayed_tree_ref *ref;
+	struct btrfs_delayed_ref_head *head_ref;
+	struct btrfs_delayed_ref_root *delayed_refs;
+	bool is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
+	int ret;
+	u8 ref_type;
+
+	BUG_ON(extent_op && extent_op->is_data);
+	ref = kmalloc(sizeof(*ref), GFP_NOFS);
+	if (!ref)
+		return -ENOMEM;
+
+	if (parent)
+		ref_type = BTRFS_SHARED_BLOCK_REF_KEY;
+	else
+		ref_type = BTRFS_TREE_BLOCK_REF_KEY;
+	init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
+				ref_root, action, ref_type);
+	ref->root = ref_root;
+	ref->parent = parent;
+	ref->level = level;
+
+	head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
+	if (!head_ref)
+		goto free_ref;
+
+	init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes,
+			      ref_root, 0, action, false, is_system);
+	head_ref->extent_op = extent_op;
+
+	delayed_refs = &trans->delayed_refs;
+
+	head_ref = add_delayed_ref_head(trans, head_ref, NULL, action, NULL,
+			old_ref_mod, new_ref_mod);
+
+	ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
+
+	if (ret > 0)
+		kfree(ref);
+
+	return 0;
+
+free_ref:
+	kfree(ref);
+
+	return -ENOMEM;
+}
diff --git a/delayed-ref.h b/delayed-ref.h
new file mode 100644
index 00000000..30a68b2a
--- /dev/null
+++ b/delayed-ref.h
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2008 Oracle.  All rights reserved.
+ */
+
+#ifndef BTRFS_DELAYED_REF_H
+#define BTRFS_DELAYED_REF_H
+
+#include "kerncompat.h"
+
+/* these are the possible values of struct btrfs_delayed_ref_node->action */
+#define BTRFS_ADD_DELAYED_REF    1 /* add one backref to the tree */
+#define BTRFS_DROP_DELAYED_REF   2 /* delete one backref from the tree */
+#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
+#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
+
+struct btrfs_delayed_ref_node {
+	struct rb_node ref_node;
+	/*
+	 * If action is BTRFS_ADD_DELAYED_REF, also link this node to
+	 * ref_head->ref_add_list, then we do not need to iterate the
+	 * whole ref_head->ref_list to find BTRFS_ADD_DELAYED_REF nodes.
+	 */
+	struct list_head add_list;
+
+	/* the starting bytenr of the extent */
+	u64 bytenr;
+
+	/* the size of the extent */
+	u64 num_bytes;
+
+	/* seq number to keep track of insertion order */
+	u64 seq;
+
+	/* ref count on this data structure */
+	u64 refs;
+
+	/*
+	 * how many refs is this entry adding or deleting.  For
+	 * head refs, this may be a negative number because it is keeping
+	 * track of the total mods done to the reference count.
+	 * For individual refs, this will always be a positive number
+	 *
+	 * It may be more than one, since it is possible for a single
+	 * parent to have more than one ref on an extent
+	 */
+	int ref_mod;
+
+	unsigned int action:8;
+	unsigned int type:8;
+	/* is this node still in the rbtree? */
+	unsigned int is_head:1;
+	unsigned int in_tree:1;
+};
+
+struct btrfs_delayed_extent_op {
+	struct btrfs_disk_key key;
+	u8 level;
+	bool update_key;
+	bool update_flags;
+	bool is_data;
+	u64 flags_to_set;
+};
+
+/*
+ * the head refs are used to hold a lock on a given extent, which allows us
+ * to make sure that only one process is running the delayed refs
+ * at a time for a single extent.  They also store the sum of all the
+ * reference count modifications we've queued up.
+ */
+struct btrfs_delayed_ref_head {
+	u64 bytenr;
+	u64 num_bytes;
+	u64 refs;
+
+	struct rb_root ref_tree;
+	/* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */
+	struct list_head ref_add_list;
+
+	struct rb_node href_node;
+
+	struct btrfs_delayed_extent_op *extent_op;
+
+	/*
+	 * This is used to track the final ref_mod from all the refs associated
+	 * with this head ref, this is not adjusted as delayed refs are run,
+	 * this is meant to track if we need to do the csum accounting or not.
+	 */
+	int total_ref_mod;
+
+	/*
+	 * This is the current outstanding mod references for this bytenr.  This
+	 * is used with lookup_extent_info to get an accurate reference count
+	 * for a bytenr, so it is adjusted as delayed refs are run so that any
+	 * on disk reference count + ref_mod is accurate.
+	 */
+	int ref_mod;
+
+	/*
+	 * when a new extent is allocated, it is just reserved in memory
+	 * The actual extent isn't inserted into the extent allocation tree
+	 * until the delayed ref is processed.  must_insert_reserved is
+	 * used to flag a delayed ref so the accounting can be updated
+	 * when a full insert is done.
+	 *
+	 * It is possible the extent will be freed before it is ever
+	 * inserted into the extent allocation tree.  In this case
+	 * we need to update the in ram accounting to properly reflect
+	 * the free has happened.
+	 */
+	unsigned int must_insert_reserved:1;
+	unsigned int is_data:1;
+	unsigned int is_system:1;
+	unsigned int processing:1;
+};
+
+struct btrfs_delayed_tree_ref {
+	struct btrfs_delayed_ref_node node;
+	u64 root;
+	u64 parent;
+	int level;
+};
+
+struct btrfs_delayed_ref_root {
+	/* head ref rbtree */
+	struct rb_root href_root;
+
+	/* dirty extent records */
+	struct rb_root dirty_extent_root;
+
+	/* total number of head nodes in tree */
+	unsigned long num_heads;
+
+	/* total number of head nodes ready for processing */
+	unsigned long num_heads_ready;
+
+	/*
+	 * set when the tree is flushing before a transaction commit,
+	 * used by the throttling code to decide if new updates need
+	 * to be run right away
+	 */
+	int flushing;
+
+	u64 run_delayed_start;
+};
+
+
+static inline struct btrfs_delayed_extent_op *
+btrfs_alloc_delayed_extent_op(void)
+{
+	return kmalloc(sizeof(struct btrfs_delayed_extent_op), GFP_KERNEL);
+}
+
+static inline void
+btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op)
+{
+	if (op)
+		kfree(op);
+}
+
+static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
+{
+	WARN_ON(ref->refs == 0);
+	if (--ref->refs) {
+		WARN_ON(ref->in_tree);
+		switch (ref->type) {
+		case BTRFS_TREE_BLOCK_REF_KEY:
+		case BTRFS_SHARED_BLOCK_REF_KEY:
+			kfree(ref);
+			break;
+		case BTRFS_EXTENT_DATA_REF_KEY:
+		case BTRFS_SHARED_DATA_REF_KEY:
+			kfree(ref);
+			break;
+		default:
+			BUG();
+		}
+	}
+}
+
+static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head)
+{
+	if (--head->refs)
+		kfree(head);
+}
+
+int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
+			       struct btrfs_trans_handle *trans,
+			       u64 bytenr, u64 num_bytes, u64 parent,
+			       u64 ref_root, int level, int action,
+			       struct btrfs_delayed_extent_op *extent_op,
+			       int *old_ref_mod, int *new_ref_mod);
+void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
+			      struct btrfs_delayed_ref_root *delayed_refs,
+			      struct btrfs_delayed_ref_head *head);
+
+struct btrfs_delayed_ref_head *
+btrfs_select_ref_head(struct btrfs_trans_handle *trans);
+
+/*
+ * helper functions to cast a node into its container
+ */
+static inline struct btrfs_delayed_tree_ref *
+btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
+{
+	return container_of(node, struct btrfs_delayed_tree_ref, node);
+}
+#endif
diff --git a/dir-test.c b/dir-test.c
deleted file mode 100644
index cfb77f2a..00000000
--- a/dir-test.c
+++ /dev/null
@@ -1,518 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <signal.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include "kerncompat.h"
-#include "radix-tree.h"
-#include "ctree.h"
-#include "disk-io.h"
-#include "print-tree.h"
-#include "hash.h"
-#include "transaction.h"
-
-int keep_running = 1;
-struct btrfs_super_block super;
-static u64 dir_oid = 0;
-static u64 file_oid = 33778;
-
-static int find_num(struct radix_tree_root *root, unsigned long *num_ret,
-		     int exists)
-{
-	unsigned long num = rand();
-	unsigned long res[2];
-	int ret;
-
-again:
-	ret = radix_tree_gang_lookup(root, (void **)res, num, 2);
-	if (exists) {
-		if (ret == 0)
-			return -1;
-		num = res[0];
-	} else if (ret != 0 && num == res[0]) {
-		num++;
-		if (ret > 1 && num == res[1]) {
-			num++;
-			goto again;
-		}
-	}
-	*num_ret = num;
-	return 0;
-}
-
-static void initial_inode_init(struct btrfs_root *root,
-			       struct btrfs_inode_item *inode_item)
-{
-	memset(inode_item, 0, sizeof(*inode_item));
-	btrfs_set_inode_generation(inode_item, root->fs_info->generation);
-	btrfs_set_inode_mode(inode_item, S_IFREG | 0700);
-}
-
-static int ins_one(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		   struct radix_tree_root *radix)
-{
-	int ret;
-	char buf[128];
-	unsigned long oid;
-	u64 objectid;
-	struct btrfs_path path;
-	struct btrfs_key inode_map;
-	struct btrfs_inode_item inode_item;
-
-	find_num(radix, &oid, 0);
-	sprintf(buf, "str-%lu", oid);
-
-	ret = btrfs_find_free_objectid(trans, root, dir_oid + 1, &objectid);
-	if (ret)
-		goto error;
-
-	inode_map.objectid = objectid;
-	inode_map.flags = 0;
-	inode_map.type = BTRFS_INODE_ITEM_KEY;
-	inode_map.offset = 0;
-
-	initial_inode_init(root, &inode_item);
-	ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
-	if (ret)
-		goto error;
-	ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf), dir_oid,
-				    &inode_map, BTRFS_FT_UNKNOWN);
-	if (ret)
-		goto error;
-
-	radix_tree_preload(GFP_KERNEL);
-	ret = radix_tree_insert(radix, oid, (void *)oid);
-	radix_tree_preload_end();
-	if (ret)
-		goto error;
-	return ret;
-error:
-	if (ret != -EEXIST)
-		goto fatal;
-
-	/*
-	 * if we got an EEXIST, it may be due to hash collision, double
-	 * check
-	 */
-	btrfs_init_path(&path);
-	ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf,
-				    strlen(buf), 0);
-	if (ret)
-		goto fatal_release;
-	if (!btrfs_match_dir_item_name(root, &path, buf, strlen(buf))) {
-		struct btrfs_dir_item *di;
-		char *found;
-		u32 found_len;
-		u64 myhash;
-		u64 foundhash;
-
-		di = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0],
-				    struct btrfs_dir_item);
-		found = (char *)(di + 1);
-		found_len = btrfs_dir_name_len(di);
-		myhash = btrfs_name_hash(buf, strlen(buf));
-		foundhash = btrfs_name_hash(found, found_len);
-		if (myhash != foundhash)
-			goto fatal_release;
-		btrfs_release_path(&path);
-		return 0;
-	}
-fatal_release:
-	btrfs_release_path(&path);
-fatal:
-	printf("failed to insert %lu ret %d\n", oid, ret);
-	return ret;
-}
-
-static int insert_dup(struct btrfs_trans_handle *trans, struct btrfs_root
-		      *root, struct radix_tree_root *radix)
-{
-	int ret;
-	char buf[128];
-	unsigned long oid;
-	struct btrfs_key key;
-
-	ret = find_num(radix, &oid, 1);
-	if (ret < 0)
-		return 0;
-	sprintf(buf, "str-%lu", oid);
-
-	key.objectid = file_oid;
-	key.flags = 0;
-	key.type = BTRFS_INODE_ITEM_KEY;
-	key.offset = 0;
-	ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf), dir_oid,
-				    &key, BTRFS_FT_UNKNOWN);
-	if (ret != -EEXIST) {
-		printf("insert on %s gave us %d\n", buf, ret);
-		return 1;
-	}
-	return 0;
-}
-
-static int del_dir_item(struct btrfs_trans_handle *trans,
-			struct btrfs_root *root,
-			struct radix_tree_root *radix,
-			unsigned long radix_index,
-			struct btrfs_path *path)
-{
-	int ret;
-	unsigned long *ptr;
-	u64 file_objectid;
-	struct btrfs_dir_item *di;
-
-	/* find the inode number of the file */
-	di = btrfs_item_ptr(&path->nodes[0]->leaf, path->slots[0],
-			    struct btrfs_dir_item);
-	file_objectid = btrfs_disk_key_objectid(&di->location);
-
-	/* delete the directory item */
-	ret = btrfs_del_item(trans, root, path);
-	if (ret)
-		goto out_release;
-	btrfs_release_path(path);
-
-	/* delete the inode */
-	btrfs_init_path(path);
-	ret = btrfs_lookup_inode(trans, root, path, file_objectid, -1);
-	if (ret)
-		goto out_release;
-	ret = btrfs_del_item(trans, root, path);
-	if (ret)
-		goto out_release;
-	btrfs_release_path(path);
-
-	if (root->fs_info->last_inode_alloc > file_objectid)
-		root->fs_info->last_inode_alloc = file_objectid;
-	ptr = radix_tree_delete(radix, radix_index);
-	if (!ptr) {
-		ret = -5555;
-		goto out;
-	}
-	return 0;
-out_release:
-	btrfs_release_path(path);
-out:
-	printf("failed to delete %lu %d\n", radix_index, ret);
-	return ret;
-}
-
-static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		   struct radix_tree_root *radix)
-{
-	int ret;
-	char buf[128];
-	unsigned long oid;
-	struct btrfs_path path;
-
-	ret = find_num(radix, &oid, 1);
-	if (ret < 0)
-		return 0;
-	sprintf(buf, "str-%lu", oid);
-	btrfs_init_path(&path);
-	ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf,
-				    strlen(buf), -1);
-	if (ret)
-		goto out_release;
-
-	ret = del_dir_item(trans, root, radix, oid, &path);
-	if (ret)
-		goto out_release;
-	return ret;
-out_release:
-	btrfs_release_path(&path);
-	printf("failed to delete %lu %d\n", oid, ret);
-	return ret;
-}
-
-static int lookup_item(struct btrfs_trans_handle *trans, struct btrfs_root
-		       *root, struct radix_tree_root *radix)
-{
-	struct btrfs_path path;
-	char buf[128];
-	int ret;
-	unsigned long oid;
-	u64 objectid;
-	struct btrfs_dir_item *di;
-
-	ret = find_num(radix, &oid, 1);
-	if (ret < 0)
-		return 0;
-	sprintf(buf, "str-%lu", oid);
-	btrfs_init_path(&path);
-	ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf,
-				    strlen(buf), 0);
-	if (!ret) {
-		di = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0],
-				    struct btrfs_dir_item);
-		objectid = btrfs_disk_key_objectid(&di->location);
-	}
-	btrfs_release_path(&path);
-	if (ret) {
-		printf("unable to find key %lu\n", oid);
-		return ret;
-	}
-	return 0;
-}
-
-static int lookup_enoent(struct btrfs_trans_handle *trans, struct btrfs_root
-			 *root, struct radix_tree_root *radix)
-{
-	struct btrfs_path path;
-	char buf[128];
-	int ret;
-	unsigned long oid;
-
-	ret = find_num(radix, &oid, 0);
-	if (ret < 0)
-		return 0;
-	sprintf(buf, "str-%lu", oid);
-	btrfs_init_path(&path);
-	ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf,
-				    strlen(buf), 0);
-	btrfs_release_path(&path);
-	if (!ret) {
-		printf("able to find key that should not exist %lu\n", oid);
-		return ret;
-	}
-	return 0;
-}
-
-static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root
-		      *root, struct radix_tree_root *radix, int nr)
-{
-	struct btrfs_path path;
-	struct btrfs_key key;
-	unsigned long found = 0;
-	u32 found_len;
-	int ret;
-	int slot;
-	int count = 0;
-	char buf[128];
-	struct btrfs_dir_item *di;
-
-	key.offset = (u64)-1;
-	key.flags = 0;
-	key.type = BTRFS_DIR_ITEM_KEY;
-	key.objectid = dir_oid;
-	while(nr-- >= 0) {
-		btrfs_init_path(&path);
-		ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
-		if (ret < 0) {
-			btrfs_release_path(&path);
-			return ret;
-		}
-		if (ret != 0) {
-			if (path.slots[0] == 0) {
-				btrfs_release_path(&path);
-				break;
-			}
-			path.slots[0] -= 1;
-		}
-		slot = path.slots[0];
-		di = btrfs_item_ptr(&path.nodes[0]->leaf, slot,
-				    struct btrfs_dir_item);
-		found_len = btrfs_dir_name_len(di);
-		memcpy(buf, (char *)(di + 1), found_len);
-		BUG_ON(found_len > 128);
-		buf[found_len] = '\0';
-		found = atoi(buf + 4);
-		ret = del_dir_item(trans, root, radix, found, &path);
-		count++;
-		if (ret) {
-			fprintf(stderr,
-				"failed to remove %lu from tree\n",
-				found);
-			return ret;
-		}
-		if (!keep_running)
-			break;
-	}
-	return 0;
-	fprintf(stderr, "failed to delete from the radix %lu\n", found);
-	return ret;
-}
-
-static int fill_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		     struct radix_tree_root *radix, int count)
-{
-	int i;
-	int ret = 0;
-	for (i = 0; i < count; i++) {
-		ret = ins_one(trans, root, radix);
-		if (ret) {
-			fprintf(stderr, "fill failed\n");
-			goto out;
-		}
-		if (i % 1000 == 0) {
-			ret = btrfs_commit_transaction(trans, root, &super);
-			if (ret) {
-				fprintf(stderr, "fill commit failed\n");
-				return ret;
-			}
-		}
-		if (i && i % 10000 == 0) {
-			printf("bigfill %d\n", i);
-		}
-		if (!keep_running)
-			break;
-	}
-out:
-	return ret;
-}
-
-static int bulk_op(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		   struct radix_tree_root *radix)
-{
-	int ret;
-	int nr = rand() % 5000;
-	static int run_nr = 0;
-
-	/* do the bulk op much less frequently */
-	if (run_nr++ % 100)
-		return 0;
-	ret = empty_tree(trans, root, radix, nr);
-	if (ret)
-		return ret;
-	ret = fill_tree(trans, root, radix, nr);
-	if (ret)
-		return ret;
-	return 0;
-}
-
-
-int (*ops[])(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct
-	     radix_tree_root *radix) =
-	{ ins_one, insert_dup, del_one, lookup_item,
-	  lookup_enoent, bulk_op };
-
-void sigstopper(int ignored)
-{
-	keep_running = 0;
-	fprintf(stderr, "caught exit signal, stopping\n");
-}
-
-int print_usage(void)
-{
-	printf("usage: tester [-ih] [-c count] [-f count]\n");
-	printf("\t -c count -- iteration count after filling\n");
-	printf("\t -f count -- run this many random inserts before starting\n");
-	printf("\t -i       -- only do initial fill\n");
-	printf("\t -h       -- this help text\n");
-	exit(1);
-}
-int main(int ac, char **av)
-{
-	RADIX_TREE(radix, GFP_KERNEL);
-	struct btrfs_root *root;
-	int i;
-	int ret;
-	int count;
-	int op;
-	int iterations = 20000;
-	int init_fill_count = 800000;
-	int err = 0;
-	int initial_only = 0;
-	struct btrfs_trans_handle *trans;
-	radix_tree_init();
-
-	root = open_ctree(av[ac-1], &super, 0);
-
-	if (!root) {
-		fprintf(stderr, "Open ctree failed\n");
-		return 1;
-	}
-
-	trans = btrfs_start_transaction(root, 1);
-	BUG_ON(IS_ERR(trans));
-
-	dir_oid = btrfs_super_root_dir(&super);
-
-	signal(SIGTERM, sigstopper);
-	signal(SIGINT, sigstopper);
-
-	for (i = 1 ; i < ac - 1; i++) {
-		if (strcmp(av[i], "-i") == 0) {
-			initial_only = 1;
-		} else if (strcmp(av[i], "-c") == 0) {
-			iterations = atoi(av[i+1]);
-			i++;
-		} else if (strcmp(av[i], "-f") == 0) {
-			init_fill_count = atoi(av[i+1]);
-			i++;
-		} else {
-			print_usage();
-		}
-	}
-	printf("initial fill\n");
-	ret = fill_tree(trans, root, &radix, init_fill_count);
-	printf("starting run\n");
-	if (ret) {
-		err = ret;
-		goto out;
-	}
-	if (initial_only == 1) {
-		goto out;
-	}
-	for (i = 0; i < iterations; i++) {
-		op = rand() % ARRAY_SIZE(ops);
-		count = rand() % 128;
-		if (i % 2000 == 0) {
-			printf("%d\n", i);
-			fflush(stdout);
-		}
-		if (i && i % 5000 == 0) {
-			printf("open & close, root level %d nritems %d\n",
-				btrfs_header_level(&root->node->node.header),
-				btrfs_header_nritems(&root->node->node.header));
-			close_ctree(root, &super);
-			root = open_ctree("dbfile", &super, 0);
-
-			if (!root) {
-				fprintf(stderr, "Open ctree failed\n");
-				return 1;
-			}
-		}
-		while(count--) {
-			ret = ops[op](trans, root, &radix);
-			if (ret) {
-				fprintf(stderr, "op %d failed %d:%d\n",
-					op, i, iterations);
-				btrfs_print_tree(root, root->node, 1);
-				fprintf(stderr, "op %d failed %d:%d\n",
-					op, i, iterations);
-				err = ret;
-				goto out;
-			}
-			if (ops[op] == bulk_op)
-				break;
-			if (keep_running == 0) {
-				err = 0;
-				goto out;
-			}
-		}
-	}
-out:
-	close_ctree(root, &super);
-	return !!err;
-}
-
diff --git a/disk-io.c b/disk-io.c
index 4a609a89..5fafa144 100644
--- a/disk-io.c
+++ b/disk-io.c
@@ -94,6 +94,10 @@ static void print_tree_block_error(struct btrfs_fs_info *fs_info,
 	char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
 	u8 buf[BTRFS_UUID_SIZE];
 
+	if (!err)
+		return;
+
+	fprintf(stderr, "bad tree block %llu, ", eb->start);
 	switch (err) {
 	case BTRFS_BAD_FSID:
 		read_extent_buffer(eb, buf, btrfs_header_fsid(),
@@ -664,6 +668,9 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
 	if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID)
 		return fs_info->quota_enabled ? fs_info->quota_root :
 				ERR_PTR(-ENOENT);
+	if (location->objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
+		return fs_info->free_space_root ? fs_info->free_space_root :
+						ERR_PTR(-ENOENT);
 
 	BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID ||
 	       location->offset != (u64)-1);
@@ -726,7 +733,6 @@ struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr)
 	extent_io_tree_init(&fs_info->free_space_cache);
 	extent_io_tree_init(&fs_info->block_group_cache);
 	extent_io_tree_init(&fs_info->pinned_extents);
-	extent_io_tree_init(&fs_info->pending_del);
 	extent_io_tree_init(&fs_info->extent_ins);
 	fs_info->excluded_extents = NULL;
 
@@ -984,7 +990,6 @@ void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info)
 	extent_io_tree_cleanup(&fs_info->free_space_cache);
 	extent_io_tree_cleanup(&fs_info->block_group_cache);
 	extent_io_tree_cleanup(&fs_info->pinned_extents);
-	extent_io_tree_cleanup(&fs_info->pending_del);
 	extent_io_tree_cleanup(&fs_info->extent_ins);
 }
 
@@ -1125,6 +1130,9 @@ static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path,
 	if (flags & OPEN_CTREE_TEMPORARY_SUPER)
 		sbflags = SBREAD_TEMPORARY;
 
+	if (flags & OPEN_CTREE_IGNORE_FSID_MISMATCH)
+		sbflags |= SBREAD_IGNORE_FSID_MISMATCH;
+
 	ret = btrfs_scan_fs_devices(fp, path, &fs_devices, sb_bytenr, sbflags,
 			(flags & OPEN_CTREE_NO_DEVICES));
 	if (ret)
@@ -1376,9 +1384,14 @@ static int check_super(struct btrfs_super_block *sb, unsigned sbflags)
 
 		uuid_unparse(sb->fsid, fsid);
 		uuid_unparse(sb->dev_item.fsid, dev_fsid);
-		error("dev_item UUID does not match fsid: %s != %s",
-			dev_fsid, fsid);
-		goto error_out;
+		if (sbflags & SBREAD_IGNORE_FSID_MISMATCH) {
+			warning("ignored: dev_item fsid mismatch: %s != %s",
+					dev_fsid, fsid);
+		} else {
+			error("dev_item UUID does not match fsid: %s != %s",
+					dev_fsid, fsid);
+			goto error_out;
+		}
 	}
 
 	/*
@@ -1427,7 +1440,7 @@ error_out:
  * @sb_bytenr:  offset of the particular superblock copy we want
  * @sbflags:	flags controlling how the superblock is read
  *
- * This function is used by various btrfs comands to obtain a valid superblock.
+ * This function is used by various btrfs commands to obtain a valid superblock.
  *
  * It's mode of operation is controlled by the @sb_bytenr and @sbdflags
  * parameters. If SBREAD_RECOVER flag is set and @sb_bytenr is
diff --git a/disk-io.h b/disk-io.h
index fefdb0a7..05cbbce6 100644
--- a/disk-io.h
+++ b/disk-io.h
@@ -36,7 +36,7 @@ enum btrfs_open_ctree_flags {
 	OPEN_CTREE_PARTIAL		= (1U << 1),
 	/* If primary root pinters are invalid, try backup copies */
 	OPEN_CTREE_BACKUP_ROOT		= (1U << 2),
-	/* Allow reading all superblock sopies if the primary is damaged */
+	/* Allow reading all superblock copies if the primary is damaged */
 	OPEN_CTREE_RECOVER_SUPER	= (1U << 3),
 	/* Restoring filesystem image */
 	OPEN_CTREE_RESTORE		= (1U << 4),
@@ -100,6 +100,12 @@ enum btrfs_read_sb_flags {
 	 * SBREAD_RECOVER
 	 */
 	SBREAD_TEMPORARY = (1 << 1),
+
+	/*
+	 * Equivalent of OPEN_CTREE_IGNORE_FSID_MISMATCH, allow to read
+	 * superblock that has mismatched sb::fsid and sb::dev_item.fsid
+	 */
+	SBREAD_IGNORE_FSID_MISMATCH = (1 << 2),
 };
 
 /*
diff --git a/extent-tree.c b/extent-tree.c
index 0643815b..8c9cdeff 100644
--- a/extent-tree.c
+++ b/extent-tree.c
@@ -29,6 +29,7 @@
 #include "crc32c.h"
 #include "volumes.h"
 #include "free-space-cache.h"
+#include "free-space-tree.h"
 #include "utils.h"
 
 #define PENDING_EXTENT_INSERT 0
@@ -44,18 +45,10 @@ struct pending_extent_op {
 	int level;
 };
 
-static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
-				     struct btrfs_root *root,
-				     u64 root_objectid, u64 generation,
-				     u64 flags, struct btrfs_disk_key *key,
-				     int level, struct btrfs_key *ins);
 static int __free_extent(struct btrfs_trans_handle *trans,
-			 struct btrfs_root *root,
 			 u64 bytenr, u64 num_bytes, u64 parent,
 			 u64 root_objectid, u64 owner_objectid,
 			 u64 owner_offset, int refs_to_drop);
-static int finish_current_insert(struct btrfs_trans_handle *trans);
-static int del_pending_extents(struct btrfs_trans_handle *trans);
 static struct btrfs_block_group_cache *
 btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache
 		       *hint, u64 search_start, int data, int owner);
@@ -1420,19 +1413,10 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 		err = ret;
 out:
 	btrfs_free_path(path);
-	finish_current_insert(trans);
-	del_pending_extents(trans);
 	BUG_ON(err);
 	return err;
 }
 
-int btrfs_extent_post_op(struct btrfs_trans_handle *trans)
-{
-	finish_current_insert(trans);
-	del_pending_extents(trans);
-	return 0;
-}
-
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 			     struct btrfs_fs_info *fs_info, u64 bytenr,
 			     u64 offset, int metadata, u64 *refs, u64 *flags)
@@ -1604,8 +1588,6 @@ again:
 	btrfs_set_extent_flags(l, item, flags);
 out:
 	btrfs_free_path(path);
-	finish_current_insert(trans);
-	del_pending_extents(trans);
 	return ret;
 }
 
@@ -1703,7 +1685,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
 				 struct btrfs_block_group_cache *cache)
 {
 	int ret;
-	int pending_ret;
 	struct btrfs_root *extent_root = trans->fs_info->extent_root;
 	unsigned long bi;
 	struct extent_buffer *leaf;
@@ -1719,18 +1700,13 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
 	btrfs_mark_buffer_dirty(leaf);
 	btrfs_release_path(path);
 fail:
-	finish_current_insert(trans);
-	pending_ret = del_pending_extents(trans);
 	if (ret)
 		return ret;
-	if (pending_ret)
-		return pending_ret;
 	return 0;
 
 }
 
-int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
-				   struct btrfs_root *root)
+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
 {
 	struct extent_io_tree *block_group_cache;
 	struct btrfs_block_group_cache *cache;
@@ -1741,7 +1717,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 	u64 end;
 	u64 ptr;
 
-	block_group_cache = &root->fs_info->block_group_cache;
+	block_group_cache = &trans->fs_info->block_group_cache;
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
@@ -1912,12 +1888,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 	return 0;
 }
 
-static int update_block_group(struct btrfs_root *root,
-			      u64 bytenr, u64 num_bytes, int alloc,
-			      int mark_free)
+static int update_block_group(struct btrfs_fs_info *info, u64 bytenr,
+			      u64 num_bytes, int alloc, int mark_free)
 {
 	struct btrfs_block_group_cache *cache;
-	struct btrfs_fs_info *info = root->fs_info;
 	u64 total = num_bytes;
 	u64 old_val;
 	u64 byte_in_group;
@@ -1932,14 +1906,6 @@ static int update_block_group(struct btrfs_root *root,
 		old_val -= num_bytes;
 	btrfs_set_super_bytes_used(info->super_copy, old_val);
 
-	/* block accounting for root item */
-	old_val = btrfs_root_used(&root->root_item);
-	if (alloc)
-		old_val += num_bytes;
-	else
-		old_val -= num_bytes;
-	btrfs_set_root_used(&root->root_item, old_val);
-
 	while(total) {
 		cache = btrfs_lookup_block_group(info, bytenr);
 		if (!cache) {
@@ -2034,73 +2000,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 	return 0;
 }
 
-static int extent_root_pending_ops(struct btrfs_fs_info *info)
-{
-	u64 start;
-	u64 end;
-	int ret;
-
-	ret = find_first_extent_bit(&info->extent_ins, 0, &start,
-				    &end, EXTENT_LOCKED);
-	if (!ret) {
-		ret = find_first_extent_bit(&info->pending_del, 0, &start, &end,
-					    EXTENT_LOCKED);
-	}
-	return ret == 0;
-
-}
-static int finish_current_insert(struct btrfs_trans_handle *trans)
-{
-	u64 start;
-	u64 end;
-	u64 priv;
-	struct btrfs_fs_info *info = trans->fs_info;
-	struct btrfs_root *extent_root = info->extent_root;
-	struct pending_extent_op *extent_op;
-	struct btrfs_key key;
-	int ret;
-	int skinny_metadata =
-		btrfs_fs_incompat(extent_root->fs_info, SKINNY_METADATA);
-
-	while(1) {
-		ret = find_first_extent_bit(&info->extent_ins, 0, &start,
-					    &end, EXTENT_LOCKED);
-		if (ret)
-			break;
-
-		ret = get_state_private(&info->extent_ins, start, &priv);
-		BUG_ON(ret);
-		extent_op = (struct pending_extent_op *)(unsigned long)priv;
-
-		if (extent_op->type == PENDING_EXTENT_INSERT) {
-			key.objectid = start;
-			if (skinny_metadata) {
-				key.offset = extent_op->level;
-				key.type = BTRFS_METADATA_ITEM_KEY;
-			} else {
-				key.offset = extent_op->num_bytes;
-				key.type = BTRFS_EXTENT_ITEM_KEY;
-			}
-			ret = alloc_reserved_tree_block(trans, extent_root,
-						extent_root->root_key.objectid,
-						trans->transid,
-						extent_op->flags,
-						&extent_op->key,
-						extent_op->level, &key);
-			BUG_ON(ret);
-		} else {
-			BUG_ON(1);
-		}
-
-		clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED);
-		kfree(extent_op);
-	}
-	return 0;
-}
-
-static int pin_down_bytes(struct btrfs_trans_handle *trans,
-			  struct btrfs_root *root,
-			  u64 bytenr, u64 num_bytes, int is_data)
+static int pin_down_bytes(struct btrfs_trans_handle *trans, u64 bytenr,
+			  u64 num_bytes, int is_data)
 {
 	int err = 0;
 	struct extent_buffer *buf;
@@ -2108,7 +2009,7 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
 	if (is_data)
 		goto pinit;
 
-	buf = btrfs_find_tree_block(root->fs_info, bytenr, num_bytes);
+	buf = btrfs_find_tree_block(trans->fs_info, bytenr, num_bytes);
 	if (!buf)
 		goto pinit;
 
@@ -2152,7 +2053,6 @@ void btrfs_unpin_extent(struct btrfs_fs_info *fs_info,
  * remove an extent from the root, returns 0 on success
  */
 static int __free_extent(struct btrfs_trans_handle *trans,
-			 struct btrfs_root *root,
 			 u64 bytenr, u64 num_bytes, u64 parent,
 			 u64 root_objectid, u64 owner_objectid,
 			 u64 owner_offset, int refs_to_drop)
@@ -2160,7 +2060,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
 
 	struct btrfs_key key;
 	struct btrfs_path *path;
-	struct btrfs_root *extent_root = root->fs_info->extent_root;
+	struct btrfs_root *extent_root = trans->fs_info->extent_root;
 	struct extent_buffer *leaf;
 	struct btrfs_extent_item *ei;
 	struct btrfs_extent_inline_ref *iref;
@@ -2174,8 +2074,8 @@ static int __free_extent(struct btrfs_trans_handle *trans,
 	int skinny_metadata =
 		btrfs_fs_incompat(extent_root->fs_info, SKINNY_METADATA);
 
-	if (root->fs_info->free_extent_hook) {
-		root->fs_info->free_extent_hook(trans, root, bytenr, num_bytes,
+	if (trans->fs_info->free_extent_hook) {
+		trans->fs_info->free_extent_hook(trans->fs_info, bytenr, num_bytes,
 						parent, root_objectid, owner_objectid,
 						owner_offset, refs_to_drop);
 
@@ -2360,7 +2260,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
 		}
 
 		if (pin) {
-			ret = pin_down_bytes(trans, root, bytenr, num_bytes,
+			ret = pin_down_bytes(trans, bytenr, num_bytes,
 					     is_data);
 			if (ret > 0)
 				mark_free = 1;
@@ -2373,79 +2273,23 @@ static int __free_extent(struct btrfs_trans_handle *trans,
 		btrfs_release_path(path);
 
 		if (is_data) {
-			ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
+			ret = btrfs_del_csums(trans, bytenr, num_bytes);
 			BUG_ON(ret);
 		}
 
-		update_block_group(root, bytenr, num_bytes, 0, mark_free);
+		ret = add_to_free_space_tree(trans, bytenr, num_bytes);
+		if (ret) {
+			goto fail;
+		}
+
+		update_block_group(trans->fs_info, bytenr, num_bytes, 0,
+				   mark_free);
 	}
 fail:
 	btrfs_free_path(path);
-	finish_current_insert(trans);
 	return ret;
 }
 
-/*
- * find all the blocks marked as pending in the radix tree and remove
- * them from the extent map
- */
-static int del_pending_extents(struct btrfs_trans_handle *trans)
-{
-	int ret;
-	int err = 0;
-	u64 start;
-	u64 end;
-	u64 priv;
-	struct extent_io_tree *pending_del;
-	struct extent_io_tree *extent_ins;
-	struct pending_extent_op *extent_op;
-	struct btrfs_fs_info *fs_info = trans->fs_info;
-	struct btrfs_root *extent_root = fs_info->extent_root;
-
-	extent_ins = &extent_root->fs_info->extent_ins;
-	pending_del = &extent_root->fs_info->pending_del;
-
-	while(1) {
-		ret = find_first_extent_bit(pending_del, 0, &start, &end,
-					    EXTENT_LOCKED);
-		if (ret)
-			break;
-
-		ret = get_state_private(pending_del, start, &priv);
-		BUG_ON(ret);
-		extent_op = (struct pending_extent_op *)(unsigned long)priv;
-
-		clear_extent_bits(pending_del, start, end, EXTENT_LOCKED);
-
-		if (!test_range_bit(extent_ins, start, end,
-				    EXTENT_LOCKED, 0)) {
-			ret = __free_extent(trans, extent_root,
-					    start, end + 1 - start, 0,
-					    extent_root->root_key.objectid,
-					    extent_op->level, 0, 1);
-			kfree(extent_op);
-		} else {
-			kfree(extent_op);
-			ret = get_state_private(extent_ins, start, &priv);
-			BUG_ON(ret);
-			extent_op = (struct pending_extent_op *)
-							(unsigned long)priv;
-
-			clear_extent_bits(extent_ins, start, end,
-					  EXTENT_LOCKED);
-
-			if (extent_op->type == PENDING_BACKREF_UPDATE)
-				BUG_ON(1);
-
-			kfree(extent_op);
-		}
-		if (ret)
-			err = ret;
-	}
-	return err;
-}
-
-
 int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root,
 			  struct extent_buffer *buf,
@@ -2465,33 +2309,30 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
 		      u64 bytenr, u64 num_bytes, u64 parent,
 		      u64 root_objectid, u64 owner, u64 offset)
 {
-	struct btrfs_root *extent_root = root->fs_info->extent_root;
-	int pending_ret;
 	int ret;
 
 	WARN_ON(num_bytes < root->fs_info->sectorsize);
-	if (root == extent_root) {
-		struct pending_extent_op *extent_op;
-
-		extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
-		BUG_ON(!extent_op);
-
-		extent_op->type = PENDING_EXTENT_DELETE;
-		extent_op->bytenr = bytenr;
-		extent_op->num_bytes = num_bytes;
-		extent_op->level = (int)owner;
-
-		set_extent_bits(&root->fs_info->pending_del,
-				bytenr, bytenr + num_bytes - 1,
-				EXTENT_LOCKED);
-		set_state_private(&root->fs_info->pending_del,
-				  bytenr, (unsigned long)extent_op);
-		return 0;
+	/*
+	 * tree log blocks never actually go into the extent allocation
+	 * tree, just update pinning info and exit early.
+	 */
+	if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
+		printf("PINNING EXTENTS IN LOG TREE\n");
+		WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
+		btrfs_pin_extent(trans->fs_info, bytenr, num_bytes);
+		ret = 0;
+	} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+		BUG_ON(offset);
+		ret = btrfs_add_delayed_tree_ref(trans->fs_info, trans,
+						 bytenr, num_bytes, parent,
+						 root_objectid, (int)owner,
+						 BTRFS_DROP_DELAYED_REF,
+						 NULL, NULL, NULL);
+	} else {
+		ret = __free_extent(trans, bytenr, num_bytes, parent,
+				    root_objectid, owner, offset, 1);
 	}
-	ret = __free_extent(trans, root, bytenr, num_bytes, parent,
-			    root_objectid, owner, offset, 1);
-	pending_ret = del_pending_extents(trans);
-	return ret ? ret : pending_ret;
+	return ret;
 }
 
 static u64 stripe_align(struct btrfs_root *root, u64 val)
@@ -2690,57 +2531,88 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
 }
 
 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
-				     struct btrfs_root *root,
-				     u64 root_objectid, u64 generation,
-				     u64 flags, struct btrfs_disk_key *key,
-				     int level, struct btrfs_key *ins)
+				      struct btrfs_delayed_ref_node *node,
+				      struct btrfs_delayed_extent_op *extent_op)
 {
-	int ret;
-	struct btrfs_fs_info *fs_info = root->fs_info;
+
+	struct btrfs_delayed_tree_ref *ref = btrfs_delayed_node_to_tree_ref(node);
+	bool skinny_metadata = btrfs_fs_incompat(trans->fs_info, SKINNY_METADATA);
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_extent_item *extent_item;
-	struct btrfs_tree_block_info *block_info;
 	struct btrfs_extent_inline_ref *iref;
-	struct btrfs_path *path;
 	struct extent_buffer *leaf;
+	struct btrfs_path *path;
+	struct btrfs_key ins;
 	u32 size = sizeof(*extent_item) + sizeof(*iref);
-	int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
+	u64 start, end;
+	int ret;
 
-	if (!skinny_metadata)
-		size += sizeof(*block_info);
+	ins.objectid = node->bytenr;
+	if (skinny_metadata) {
+		ins.offset = ref->level;
+		ins.type = BTRFS_METADATA_ITEM_KEY;
+	} else {
+		ins.offset = node->num_bytes;
+		ins.type = BTRFS_EXTENT_ITEM_KEY;
+
+		size += sizeof(struct btrfs_tree_block_info);
+	}
+
+	if (ref->root == BTRFS_EXTENT_TREE_OBJECTID) {
+		ret = find_first_extent_bit(&trans->fs_info->extent_ins,
+					    node->bytenr, &start, &end,
+					    EXTENT_LOCKED);
+		ASSERT(!ret);
+		ASSERT(start == node->bytenr);
+		ASSERT(end == node->bytenr + node->num_bytes - 1);
+	}
 
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
 
 	ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
-				      ins, size);
-	BUG_ON(ret);
+				      &ins, size);
+	if (ret)
+		return ret;
 
 	leaf = path->nodes[0];
 	extent_item = btrfs_item_ptr(leaf, path->slots[0],
 				     struct btrfs_extent_item);
 	btrfs_set_extent_refs(leaf, extent_item, 1);
-	btrfs_set_extent_generation(leaf, extent_item, generation);
+	btrfs_set_extent_generation(leaf, extent_item, trans->transid);
 	btrfs_set_extent_flags(leaf, extent_item,
-			       flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
+			       extent_op->flags_to_set |
+			       BTRFS_EXTENT_FLAG_TREE_BLOCK);
 
 	if (skinny_metadata) {
 		iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
 	} else {
+		struct btrfs_tree_block_info *block_info;
 		block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
-		btrfs_set_tree_block_key(leaf, block_info, key);
-		btrfs_set_tree_block_level(leaf, block_info, level);
+		btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
+		btrfs_set_tree_block_level(leaf, block_info, ref->level);
 		iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
 	}
 
 	btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY);
-	btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
+	btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
 
 	btrfs_mark_buffer_dirty(leaf);
 	btrfs_free_path(path);
 
-	ret = update_block_group(root, ins->objectid, fs_info->nodesize,
-				 1, 0);
+	ret = remove_from_free_space_tree(trans, ins.objectid, fs_info->nodesize);
+	if (ret)
+		return ret;
+
+	ret = update_block_group(fs_info, ins.objectid, fs_info->nodesize, 1,
+				 0);
+
+	if (ref->root == BTRFS_EXTENT_TREE_OBJECTID) {
+		clear_extent_bits(&trans->fs_info->extent_ins, start, end,
+				  EXTENT_LOCKED);
+	}
+
 	return ret;
 }
 
@@ -2752,39 +2624,51 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
 			    u64 search_end, struct btrfs_key *ins)
 {
 	int ret;
+	u64 extent_size;
+	struct btrfs_delayed_extent_op *extent_op;
+	bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
+						 SKINNY_METADATA);
+
+	extent_op = btrfs_alloc_delayed_extent_op();
+	if (!extent_op)
+		return -ENOMEM;
+
 	ret = btrfs_reserve_extent(trans, root, num_bytes, empty_size,
 				   hint_byte, search_end, ins, 0);
-	BUG_ON(ret);
+	if (ret < 0)
+		return ret;
 
+	if (key)
+		memcpy(&extent_op->key, key, sizeof(extent_op->key));
+	else
+		memset(&extent_op->key, 0, sizeof(extent_op->key));
+	extent_op->flags_to_set = flags;
+	extent_op->update_key = skinny_metadata ? false : true;
+	extent_op->update_flags = true;
+	extent_op->is_data = false;
+	extent_op->level = level;
+
+	extent_size = ins->offset;
+
+	if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
+		ins->offset = level;
+		ins->type = BTRFS_METADATA_ITEM_KEY;
+	}
+
+	/* Ensure this reserved extent is not found by the allocator */
 	if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) {
-		struct pending_extent_op *extent_op;
-
-		extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
-		BUG_ON(!extent_op);
-
-		extent_op->type = PENDING_EXTENT_INSERT;
-		extent_op->bytenr = ins->objectid;
-		extent_op->num_bytes = ins->offset;
-		extent_op->level = level;
-		extent_op->flags = flags;
-		memcpy(&extent_op->key, key, sizeof(*key));
-
-		set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
-				ins->objectid + ins->offset - 1,
-				EXTENT_LOCKED);
-		set_state_private(&root->fs_info->extent_ins,
-				  ins->objectid, (unsigned long)extent_op);
-	} else {
-		if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
-			ins->offset = level;
-			ins->type = BTRFS_METADATA_ITEM_KEY;
-		}
-		ret = alloc_reserved_tree_block(trans, root, root_objectid,
-						generation, flags,
-						key, level, ins);
-		finish_current_insert(trans);
-		del_pending_extents(trans);
+		ret = set_extent_bits(&trans->fs_info->extent_ins,
+				      ins->objectid,
+				      ins->objectid + extent_size - 1,
+				      EXTENT_LOCKED);
+
+		BUG_ON(ret);
 	}
+
+	ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, ins->objectid,
+					 extent_size, 0, root_objectid,
+					 level, BTRFS_ADD_DELAYED_EXTENT,
+					 extent_op, NULL, NULL);
 	return ret;
 }
 
@@ -2975,7 +2859,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
 			path->slots[*level]++;
 			ret = btrfs_free_extent(trans, root, bytenr, blocksize,
 						parent->start, root_owner,
-						root_gen, *level - 1, 1);
+						root_gen, *level - 1, 0);
 			BUG_ON(ret);
 			continue;
 		}
@@ -3017,7 +2901,7 @@ out:
 	root_gen = btrfs_header_generation(parent);
 	ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
 				path->nodes[*level]->len, parent->start,
-				root_owner, root_gen, *level, 1);
+				root_owner, root_gen, *level, 0);
 	free_extent_buffer(path->nodes[*level]);
 	path->nodes[*level] = NULL;
 	*level += 1;
@@ -3068,7 +2952,7 @@ static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
 						path->nodes[*level]->start,
 						path->nodes[*level]->len,
 						parent->start, root_owner,
-						root_gen, *level, 1);
+						root_gen, *level, 0);
 			BUG_ON(ret);
 			free_extent_buffer(path->nodes[*level]);
 			path->nodes[*level] = NULL;
@@ -3309,11 +3193,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 				sizeof(cache->item));
 	BUG_ON(ret);
 
-	ret = finish_current_insert(trans);
-	BUG_ON(ret);
-	ret = del_pending_extents(trans);
-	BUG_ON(ret);
-
 	return 0;
 }
 
@@ -3409,10 +3288,6 @@ int btrfs_make_block_groups(struct btrfs_trans_handle *trans,
 					sizeof(cache->item));
 		BUG_ON(ret);
 
-		finish_current_insert(trans);
-		ret = del_pending_extents(trans);
-		BUG_ON(ret);
-
 		cur_start = cache->key.objectid + cache->key.offset;
 	}
 	return 0;
@@ -3422,7 +3297,7 @@ int btrfs_update_block_group(struct btrfs_root *root,
 			     u64 bytenr, u64 num_bytes, int alloc,
 			     int mark_free)
 {
-	return update_block_group(root, bytenr, num_bytes,
+	return update_block_group(root->fs_info, bytenr, num_bytes,
 				  alloc, mark_free);
 }
 
@@ -3794,14 +3669,9 @@ int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans)
 	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *root = fs_info->extent_root;
 
-	while(extent_root_pending_ops(fs_info)) {
-		ret = finish_current_insert(trans);
-		if (ret)
-			return ret;
-		ret = del_pending_extents(trans);
-		if (ret)
-			return ret;
-	}
+	ret = btrfs_run_delayed_refs(trans, -1);
+	if (ret)
+		return ret;
 
 	while(1) {
 		cache = btrfs_lookup_first_block_group(fs_info, start);
@@ -3879,7 +3749,7 @@ static void __get_extent_size(struct btrfs_root *root, struct btrfs_path *path,
  * Return >0 for not found.
  * Return <0 for err
  */
-int btrfs_search_overlap_extent(struct btrfs_root *root,
+static int btrfs_search_overlap_extent(struct btrfs_root *root,
 				struct btrfs_path *path, u64 bytenr, u64 len)
 {
 	struct btrfs_key key;
@@ -4006,7 +3876,7 @@ static int __btrfs_record_file_extent(struct btrfs_trans_handle *trans,
 		} else if (ret != -EEXIST) {
 			goto fail;
 		}
-		btrfs_extent_post_op(trans);
+		btrfs_run_delayed_refs(trans, -1);
 		extent_bytenr = disk_bytenr;
 		extent_num_bytes = num_bytes;
 		extent_offset = 0;
@@ -4197,3 +4067,231 @@ u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
 
 	return total_added;
 }
+
+static void cleanup_extent_op(struct btrfs_trans_handle *trans,
+			     struct btrfs_fs_info *fs_info,
+			     struct btrfs_delayed_ref_head *head)
+{
+	struct btrfs_delayed_extent_op *extent_op = head->extent_op;
+
+	if (!extent_op)
+		return;
+	head->extent_op = NULL;
+	btrfs_free_delayed_extent_op(extent_op);
+}
+
+static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
+				      struct btrfs_delayed_ref_head *head)
+{
+	head->processing = 0;
+	delayed_refs->num_heads_ready++;
+}
+
+static int cleanup_ref_head(struct btrfs_trans_handle *trans,
+			    struct btrfs_fs_info *fs_info,
+			    struct btrfs_delayed_ref_head *head)
+{
+	struct btrfs_delayed_ref_root *delayed_refs;
+
+	delayed_refs = &trans->delayed_refs;
+
+	cleanup_extent_op(trans, fs_info, head);
+
+	/*
+	 * Need to drop our head ref lock and re-acquire the delayed ref lock
+	 * and then re-check to make sure nobody got added.
+	 */
+	if (!RB_EMPTY_ROOT(&head->ref_tree) || head->extent_op)
+		return 1;
+
+	delayed_refs->num_heads--;
+	rb_erase(&head->href_node, &delayed_refs->href_root);
+	RB_CLEAR_NODE(&head->href_node);
+
+	if (head->must_insert_reserved)
+		btrfs_pin_extent(fs_info, head->bytenr, head->num_bytes);
+
+	btrfs_put_delayed_ref_head(head);
+	return 0;
+}
+
+static inline struct btrfs_delayed_ref_node *
+select_delayed_ref(struct btrfs_delayed_ref_head *head)
+{
+	struct btrfs_delayed_ref_node *ref;
+
+	if (RB_EMPTY_ROOT(&head->ref_tree))
+		return NULL;
+	/*
+	 * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
+	 * This is to prevent a ref count from going down to zero, which deletes
+	 * the extent item from the extent tree, when there still are references
+	 * to add, which would fail because they would not find the extent item.
+	 */
+	if (!list_empty(&head->ref_add_list))
+		return list_first_entry(&head->ref_add_list,
+					struct btrfs_delayed_ref_node,
+					add_list);
+	ref = rb_entry(rb_first(&head->ref_tree),
+		       struct btrfs_delayed_ref_node, ref_node);
+	ASSERT(list_empty(&ref->add_list));
+	return ref;
+}
+
+
+static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
+				struct btrfs_fs_info *fs_info,
+				struct btrfs_delayed_ref_node *node,
+				struct btrfs_delayed_extent_op *extent_op,
+				int insert_reserved)
+{
+	int ret = 0;
+	struct btrfs_delayed_tree_ref *ref;
+	u64 parent = 0;
+	u64 ref_root = 0;
+
+	ref = btrfs_delayed_node_to_tree_ref(node);
+
+	if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
+			parent = ref->parent;
+	ref_root = ref->root;
+
+	if (node->ref_mod != 1) {
+		printf("btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
+			node->bytenr, node->ref_mod, node->action, ref_root,
+			parent);
+		return -EIO;
+	}
+	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
+		BUG_ON(!extent_op || !extent_op->update_flags);
+		ret = alloc_reserved_tree_block(trans, node, extent_op);
+	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
+		struct btrfs_delayed_tree_ref *ref = btrfs_delayed_node_to_tree_ref(node);
+		ret =  __free_extent(trans, node->bytenr, node->num_bytes,
+			     ref->parent, ref->root, ref->level, 0, 1);
+	} else {
+		BUG();
+	}
+
+	return ret;
+}
+
+/* helper function to actually process a single delayed ref entry */
+static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
+			       struct btrfs_fs_info *fs_info,
+			       struct btrfs_delayed_ref_node *node,
+			       struct btrfs_delayed_extent_op *extent_op,
+			       int insert_reserved)
+{
+	int ret = 0;
+
+	if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
+		node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
+		ret = run_delayed_tree_ref(trans, fs_info, node, extent_op,
+					   insert_reserved);
+	} else
+		BUG();
+	return ret;
+}
+
+int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long nr)
+{
+	struct btrfs_fs_info *fs_info = trans->fs_info;
+	struct btrfs_delayed_ref_root *delayed_refs;
+	struct btrfs_delayed_ref_node *ref;
+	struct btrfs_delayed_ref_head *locked_ref = NULL;
+	struct btrfs_delayed_extent_op *extent_op;
+	int ret;
+	int must_insert_reserved = 0;
+
+	delayed_refs = &trans->delayed_refs;
+	while (1) {
+		if (!locked_ref) {
+			locked_ref = btrfs_select_ref_head(trans);
+			if (!locked_ref)
+				break;
+		}
+		/*
+		 * We need to try and merge add/drops of the same ref since we
+		 * can run into issues with relocate dropping the implicit ref
+		 * and then it being added back again before the drop can
+		 * finish.	If we merged anything we need to re-loop so we can
+		 * get a good ref.
+		 * Or we can get node references of the same type that weren't
+		 * merged when created due to bumps in the tree mod seq, and
+		 * we need to merge them to prevent adding an inline extent
+		 * backref before dropping it (triggering a BUG_ON at
+		 * insert_inline_extent_backref()).
+		 */
+		btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
+		ref = select_delayed_ref(locked_ref);
+		/*
+		 * We're done processing refs in this ref_head, clean everything
+		 * up and move on to the next ref_head.
+		 */
+		if (!ref) {
+			ret = cleanup_ref_head(trans, fs_info, locked_ref);
+			if (ret > 0 ) {
+				/* We dropped our lock, we need to loop. */
+				ret = 0;
+				continue;
+			} else if (ret) {
+				return ret;
+			}
+			locked_ref = NULL;
+			continue;
+		}
+
+		ref->in_tree = 0;
+		rb_erase(&ref->ref_node, &locked_ref->ref_tree);
+		RB_CLEAR_NODE(&ref->ref_node);
+		if (!list_empty(&ref->add_list))
+				list_del(&ref->add_list);
+		/*
+		 * When we play the delayed ref, also correct the ref_mod on
+		 * head
+		 */
+		switch (ref->action) {
+		case BTRFS_ADD_DELAYED_REF:
+		case BTRFS_ADD_DELAYED_EXTENT:
+			locked_ref->ref_mod -= ref->ref_mod;
+			break;
+		case BTRFS_DROP_DELAYED_REF:
+			locked_ref->ref_mod += ref->ref_mod;
+			break;
+		default:
+			WARN_ON(1);
+		}
+
+		/*
+		 * Record the must-insert_reserved flag before we drop the spin
+		 * lock.
+		 */
+		must_insert_reserved = locked_ref->must_insert_reserved;
+		locked_ref->must_insert_reserved = 0;
+
+		extent_op = locked_ref->extent_op;
+		locked_ref->extent_op = NULL;
+
+		ret = run_one_delayed_ref(trans, fs_info, ref, extent_op,
+					  must_insert_reserved);
+
+		btrfs_free_delayed_extent_op(extent_op);
+		/*
+		 * If we are re-initing extent tree in this transaction
+		 * failure in freeing old roots are expected (because we don't
+		 * have the old extent tree, hence backref resolution will
+		 * return -EIO).
+		 */
+		if (ret && (!trans->reinit_extent_tree ||
+		     ref->action != BTRFS_DROP_DELAYED_REF)) {
+			unselect_delayed_ref_head(delayed_refs, locked_ref);
+			btrfs_put_delayed_ref(ref);
+			return ret;
+		}
+
+		btrfs_put_delayed_ref(ref);
+	}
+
+	return 0;
+}
diff --git a/extent_io.c b/extent_io.c
index 19849269..c57f6282 100644
--- a/extent_io.c
+++ b/extent_io.c
@@ -205,6 +205,62 @@ static int clear_state_bit(struct extent_io_tree *tree,
 }
 
 /*
+ * extent_buffer_bitmap_set - set an area of a bitmap
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @pos: bit number of the first bit
+ * @len: number of bits to set
+ */
+void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+                              unsigned long pos, unsigned long len)
+{
+	u8 *p = (u8 *)eb->data + start + BIT_BYTE(pos);
+	const unsigned int size = pos + len;
+	int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
+	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
+
+	while (len >= bits_to_set) {
+		*p |= mask_to_set;
+		len -= bits_to_set;
+		bits_to_set = BITS_PER_BYTE;
+		mask_to_set = ~0;
+		p++;
+	}
+	if (len) {
+		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
+		*p |= mask_to_set;
+	}
+}
+
+/*
+ * extent_buffer_bitmap_clear - clear an area of a bitmap
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @pos: bit number of the first bit
+ * @len: number of bits to clear
+ */
+void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
+                                unsigned long pos, unsigned long len)
+{
+	u8 *p = (u8 *)eb->data + start + BIT_BYTE(pos);
+	const unsigned int size = pos + len;
+	int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
+	u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
+
+	while (len >= bits_to_clear) {
+		*p &= ~mask_to_clear;
+		len -= bits_to_clear;
+		bits_to_clear = BITS_PER_BYTE;
+		mask_to_clear = ~0;
+		p++;
+	}
+	if (len) {
+		mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
+		*p &= ~mask_to_clear;
+	}
+}
+
+/*
  * clear some bits on a range in the tree.
  */
 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits)
@@ -608,7 +664,11 @@ static void free_extent_buffer_internal(struct extent_buffer *eb, bool free_now)
 	eb->refs--;
 	BUG_ON(eb->refs < 0);
 	if (eb->refs == 0) {
-		BUG_ON(eb->flags & EXTENT_DIRTY);
+		if (eb->flags & EXTENT_DIRTY) {
+			warning(
+			"dirty eb leak (aborted trans): start %llu len %u",
+				eb->start, eb->len);
+		}
 		list_del_init(&eb->recow);
 		if (eb->flags & EXTENT_BUFFER_DUMMY || free_now)
 			free_extent_buffer_final(eb);
diff --git a/extent_io.h b/extent_io.h
index d407d93d..9587528b 100644
--- a/extent_io.h
+++ b/extent_io.h
@@ -175,4 +175,9 @@ int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
 			u64 bytes, int mirror);
 int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
 		       u64 bytes, int mirror);
+void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
+                                unsigned long pos, unsigned long len);
+void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+                              unsigned long pos, unsigned long len);
+
 #endif
diff --git a/file-item.c b/file-item.c
index 7b0ff358..71d4e89f 100644
--- a/file-item.c
+++ b/file-item.c
@@ -394,8 +394,7 @@ static noinline int truncate_one_csum(struct btrfs_root *root,
  * deletes the csum items from the csum tree for a given
  * range of bytes.
  */
-int btrfs_del_csums(struct btrfs_trans_handle *trans,
-		    struct btrfs_root *root, u64 bytenr, u64 len)
+int btrfs_del_csums(struct btrfs_trans_handle *trans, u64 bytenr, u64 len)
 {
 	struct btrfs_path *path;
 	struct btrfs_key key;
@@ -403,11 +402,10 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
 	u64 csum_end;
 	struct extent_buffer *leaf;
 	int ret;
-	u16 csum_size =
-		btrfs_super_csum_size(root->fs_info->super_copy);
-	int blocksize = root->fs_info->sectorsize;
+	u16 csum_size = btrfs_super_csum_size(trans->fs_info->super_copy);
+	int blocksize = trans->fs_info->sectorsize;
+	struct btrfs_root *csum_root = trans->fs_info->csum_root;
 
-	root = root->fs_info->csum_root;
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -418,7 +416,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
 		key.offset = end_byte - 1;
 		key.type = BTRFS_EXTENT_CSUM_KEY;
 
-		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+		ret = btrfs_search_slot(trans, csum_root, &key, path, -1, 1);
 		if (ret > 0) {
 			if (path->slots[0] == 0)
 				goto out;
@@ -445,7 +443,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
 
 		/* delete the entire item, it is inside our range */
 		if (key.offset >= bytenr && csum_end <= end_byte) {
-			ret = btrfs_del_item(trans, root, path);
+			ret = btrfs_del_item(trans, csum_root, path);
 			BUG_ON(ret);
 		} else if (key.offset < bytenr && csum_end > end_byte) {
 			unsigned long offset;
@@ -485,12 +483,14 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
 			 * btrfs_split_item returns -EAGAIN when the
 			 * item changed size or key
 			 */
-			ret = btrfs_split_item(trans, root, path, &key, offset);
+			ret = btrfs_split_item(trans, csum_root, path, &key,
+					       offset);
 			BUG_ON(ret && ret != -EAGAIN);
 
 			key.offset = end_byte - 1;
 		} else {
-			ret = truncate_one_csum(root, path, &key, bytenr, len);
+			ret = truncate_one_csum(csum_root, path, &key, bytenr,
+						len);
 			BUG_ON(ret);
 		}
 		btrfs_release_path(path);
diff --git a/file.c b/file.c
index f5e645c4..d9e78e4e 100644
--- a/file.c
+++ b/file.c
@@ -178,7 +178,7 @@ out:
  * 3) data read out is also aligned to sectorsize, not truncated to inode size
  *
  * Return < 0 for fatal error during read.
- * Otherwise return the number of succesfully read data in bytes.
+ * Otherwise return the number of successfully read data in bytes.
  */
 int btrfs_read_file(struct btrfs_root *root, u64 ino, u64 start, int len,
 		    char *dest)
@@ -255,8 +255,7 @@ int btrfs_read_file(struct btrfs_root *root, u64 ino, u64 start, int len,
 		/* Inline extent, one inode should only one inline extent */
 		if (btrfs_file_extent_type(leaf, fi) ==
 		    BTRFS_FILE_EXTENT_INLINE) {
-			extent_len = btrfs_file_extent_inline_len(leaf, slot,
-								  fi);
+			extent_len = btrfs_file_extent_ram_bytes(leaf, fi);
 			if (extent_start + extent_len <= start)
 				goto next;
 			read_extent_buffer(leaf, dest,
@@ -302,7 +301,7 @@ next:
 
 	/*
 	 * Special trick for no_holes, since for no_holes we don't have good
-	 * method to account skipped and tailling holes, we used
+	 * method to account skipped and tailing holes, we used
 	 * min(inode size, len) as return value
 	 */
 	if (no_holes) {
diff --git a/free-space-cache.c b/free-space-cache.c
index 9b83a71c..2ef2d307 100644
--- a/free-space-cache.c
+++ b/free-space-cache.c
@@ -838,10 +838,8 @@ int btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, u64 offset,
 	try_merge_free_space(ctl, info);
 
 	ret = link_free_space(ctl, info);
-	if (ret) {
+	if (ret)
 		printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
-		BUG_ON(ret == -EEXIST);
-	}
 
 	return ret;
 }
diff --git a/free-space-tree.c b/free-space-tree.c
index 139a031e..6ef57928 100644
--- a/free-space-tree.c
+++ b/free-space-tree.c
@@ -21,6 +21,37 @@
 #include "free-space-cache.h"
 #include "free-space-tree.h"
 #include "transaction.h"
+#include "bitops.h"
+#include "internal.h"
+
+static inline void set_free_space_tree_thresholds(struct btrfs_block_group_cache *cache,
+				    u64 sectorsize)
+{
+	u32 bitmap_range;
+	size_t bitmap_size;
+	u64 num_bitmaps, total_bitmap_size;
+
+	/*
+	 * We convert to bitmaps when the disk space required for using extents
+	 * exceeds that required for using bitmaps.
+	 */
+	bitmap_range = sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
+	num_bitmaps = div_u64(cache->key.offset + bitmap_range - 1,
+			      bitmap_range);
+	bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
+	total_bitmap_size = num_bitmaps * bitmap_size;
+	cache->bitmap_high_thresh = div_u64(total_bitmap_size,
+					    sizeof(struct btrfs_item));
+
+	/*
+	 * We allow for a small buffer between the high threshold and low
+	 * threshold to avoid thrashing back and forth between the two formats.
+	 */
+	if (cache->bitmap_high_thresh > 100)
+		cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
+	else
+		cache->bitmap_low_thresh = 0;
+}
 
 static struct btrfs_free_space_info *
 search_free_space_info(struct btrfs_trans_handle *trans,
@@ -47,8 +78,7 @@ search_free_space_info(struct btrfs_trans_handle *trans,
 }
 
 static int free_space_test_bit(struct btrfs_block_group_cache *block_group,
-			       struct btrfs_path *path, u64 offset,
-			       u64 sectorsize)
+			       struct btrfs_path *path, u64 offset)
 {
 	struct extent_buffer *leaf;
 	struct btrfs_key key;
@@ -64,10 +94,1080 @@ static int free_space_test_bit(struct btrfs_block_group_cache *block_group,
 	ASSERT(offset >= found_start && offset < found_end);
 
 	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
-	i = (offset - found_start) / sectorsize;
+	i = (offset - found_start) / leaf->fs_info->sectorsize;
 	return !!extent_buffer_test_bit(leaf, ptr, i);
 }
 
+/*
+ * btrfs_search_slot() but we're looking for the greatest key less than the
+ * passed key.
+ */
+static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
+				  struct btrfs_root *root,
+				  struct btrfs_key *key, struct btrfs_path *p,
+				  int ins_len, int cow)
+{
+	int ret;
+
+	ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
+	if (ret < 0)
+		return ret;
+
+	if (ret == 0) {
+		ASSERT(0);
+		return -EIO;
+	}
+
+	if (p->slots[0] == 0) {
+		ASSERT(0);
+		return -EIO;
+	}
+	p->slots[0]--;
+
+	return 0;
+}
+
+static int add_new_free_space_info(struct btrfs_trans_handle *trans,
+				   struct btrfs_block_group_cache *block_group,
+				   struct btrfs_path *path)
+{
+	struct btrfs_root *root = trans->fs_info->free_space_root;
+	struct btrfs_free_space_info *info;
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	int ret;
+
+	key.objectid = block_group->key.objectid;
+	key.type = BTRFS_FREE_SPACE_INFO_KEY;
+	key.offset = block_group->key.offset;
+
+	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
+	if (ret)
+		goto out;
+
+	leaf = path->nodes[0];
+	info = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_free_space_info);
+	btrfs_set_free_space_extent_count(leaf, info, 0);
+	btrfs_set_free_space_flags(leaf, info, 0);
+	btrfs_mark_buffer_dirty(leaf);
+
+	ret = 0;
+out:
+	btrfs_release_path(path);
+	return ret;
+}
+
+static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
+{
+	return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE);
+}
+
+static unsigned long *alloc_bitmap(u32 bitmap_size)
+{
+	unsigned long *ret;
+	unsigned int nofs_flag;
+	u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
+
+	/*
+	 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
+	 * into the filesystem as the free space bitmap can be modified in the
+	 * critical section of a transaction commit.
+	 *
+	 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
+	 * know that recursion is unsafe.
+	 */
+	nofs_flag = memalloc_nofs_save();
+	ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
+	memalloc_nofs_restore(nofs_flag);
+	return ret;
+}
+
+static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
+{
+	u8 *p = ((u8 *)map) + BIT_BYTE(start);
+	const unsigned int size = start + len;
+	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
+	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
+
+	while (len - bits_to_set >= 0) {
+		*p |= mask_to_set;
+		len -= bits_to_set;
+		bits_to_set = BITS_PER_BYTE;
+		mask_to_set = ~0;
+		p++;
+	}
+	if (len) {
+		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
+		*p |= mask_to_set;
+	}
+}
+
+static int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
+				  struct btrfs_block_group_cache *block_group,
+				  struct btrfs_path *path)
+{
+	struct btrfs_fs_info *fs_info = trans->fs_info;
+	struct btrfs_root *root = fs_info->free_space_root;
+	struct btrfs_free_space_info *info;
+	struct btrfs_key key, found_key;
+	struct extent_buffer *leaf;
+	unsigned long *bitmap;
+	char *bitmap_cursor;
+	u64 start, end;
+	u64 bitmap_range, i;
+	u32 bitmap_size, flags, expected_extent_count;
+	u32 extent_count = 0;
+	int done = 0, nr;
+	int ret;
+
+	bitmap_size = free_space_bitmap_size(block_group->key.offset,
+					     fs_info->sectorsize);
+	bitmap = alloc_bitmap(bitmap_size);
+	if (!bitmap) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	start = block_group->key.objectid;
+	end = block_group->key.objectid + block_group->key.offset;
+
+	key.objectid = end - 1;
+	key.type = (u8)-1;
+	key.offset = (u64)-1;
+
+	while (!done) {
+		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+		if (ret)
+			goto out;
+
+		leaf = path->nodes[0];
+		nr = 0;
+		path->slots[0]++;
+		while (path->slots[0] > 0) {
+			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
+
+			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
+				ASSERT(found_key.objectid == block_group->key.objectid);
+				ASSERT(found_key.offset == block_group->key.offset);
+				done = 1;
+				break;
+			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
+				u64 first, last;
+
+				ASSERT(found_key.objectid >= start);
+				ASSERT(found_key.objectid < end);
+				ASSERT(found_key.objectid + found_key.offset <= end);
+
+				first = div_u64(found_key.objectid - start,
+						fs_info->sectorsize);
+				last = div_u64(found_key.objectid + found_key.offset - start,
+					       fs_info->sectorsize);
+				le_bitmap_set(bitmap, first, last - first);
+
+				extent_count++;
+				nr++;
+				path->slots[0]--;
+			} else {
+				ASSERT(0);
+			}
+		}
+
+		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
+		if (ret)
+			goto out;
+		btrfs_release_path(path);
+	}
+
+	info = search_free_space_info(trans, fs_info, block_group, path, 1);
+	if (IS_ERR(info)) {
+		ret = PTR_ERR(info);
+		goto out;
+	}
+	leaf = path->nodes[0];
+	flags = btrfs_free_space_flags(leaf, info);
+	flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
+	btrfs_set_free_space_flags(leaf, info, flags);
+	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
+	btrfs_mark_buffer_dirty(leaf);
+	btrfs_release_path(path);
+
+	if (extent_count != expected_extent_count) {
+		fprintf(stderr,
+			"incorrect extent count for %llu; counted %u, expected %u",
+			block_group->key.objectid, extent_count,
+			expected_extent_count);
+		ASSERT(0);
+		ret = -EIO;
+		goto out;
+	}
+
+	bitmap_cursor = (char *)bitmap;
+	bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
+	i = start;
+	while (i < end) {
+		unsigned long ptr;
+		u64 extent_size;
+		u32 data_size;
+
+		extent_size = min(end - i, bitmap_range);
+		data_size = free_space_bitmap_size(extent_size,
+						   fs_info->sectorsize);
+
+		key.objectid = i;
+		key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
+		key.offset = extent_size;
+
+		ret = btrfs_insert_empty_item(trans, root, path, &key,
+					      data_size);
+		if (ret)
+			goto out;
+
+		leaf = path->nodes[0];
+		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+		write_extent_buffer(leaf, bitmap_cursor, ptr,
+				    data_size);
+		btrfs_mark_buffer_dirty(leaf);
+		btrfs_release_path(path);
+
+		i += extent_size;
+		bitmap_cursor += data_size;
+	}
+
+	ret = 0;
+out:
+	kvfree(bitmap);
+	if (ret)
+		btrfs_abort_transaction(trans, ret);
+	return ret;
+}
+
+static int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
+				  struct btrfs_block_group_cache *block_group,
+				  struct btrfs_path *path)
+{
+	struct btrfs_fs_info *fs_info = trans->fs_info;
+	struct btrfs_root *root = fs_info->free_space_root;
+	struct btrfs_free_space_info *info;
+	struct btrfs_key key, found_key;
+	struct extent_buffer *leaf;
+	unsigned long *bitmap;
+	u64 start, end;
+	u32 bitmap_size, flags, expected_extent_count;
+	unsigned long nrbits, start_bit, end_bit;
+	u32 extent_count = 0;
+	int done = 0, nr;
+	int ret;
+
+	bitmap_size = free_space_bitmap_size(block_group->key.offset,
+					     fs_info->sectorsize);
+	bitmap = alloc_bitmap(bitmap_size);
+	if (!bitmap) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	start = block_group->key.objectid;
+	end = block_group->key.objectid + block_group->key.offset;
+
+	key.objectid = end - 1;
+	key.type = (u8)-1;
+	key.offset = (u64)-1;
+
+	while (!done) {
+		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+		if (ret)
+			goto out;
+
+		leaf = path->nodes[0];
+		nr = 0;
+		path->slots[0]++;
+		while (path->slots[0] > 0) {
+			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
+
+			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
+				ASSERT(found_key.objectid == block_group->key.objectid);
+				ASSERT(found_key.offset == block_group->key.offset);
+				done = 1;
+				break;
+			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
+				unsigned long ptr;
+				char *bitmap_cursor;
+				u32 bitmap_pos, data_size;
+
+				ASSERT(found_key.objectid >= start);
+				ASSERT(found_key.objectid < end);
+				ASSERT(found_key.objectid + found_key.offset <= end);
+
+				bitmap_pos = div_u64(found_key.objectid - start,
+						     fs_info->sectorsize *
+						     BITS_PER_BYTE);
+				bitmap_cursor = ((char *)bitmap) + bitmap_pos;
+				data_size = free_space_bitmap_size(found_key.offset,
+								   fs_info->sectorsize);
+
+				ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
+				read_extent_buffer(leaf, bitmap_cursor, ptr,
+						   data_size);
+
+				nr++;
+				path->slots[0]--;
+			} else {
+				ASSERT(0);
+			}
+		}
+
+		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
+		if (ret)
+			goto out;
+		btrfs_release_path(path);
+	}
+
+	info = search_free_space_info(trans, fs_info, block_group, path, 1);
+	if (IS_ERR(info)) {
+		ret = PTR_ERR(info);
+		goto out;
+	}
+	leaf = path->nodes[0];
+	flags = btrfs_free_space_flags(leaf, info);
+	flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
+	btrfs_set_free_space_flags(leaf, info, flags);
+	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
+	btrfs_mark_buffer_dirty(leaf);
+	btrfs_release_path(path);
+
+	nrbits = div_u64(block_group->key.offset, fs_info->sectorsize);
+	start_bit = find_next_bit_le(bitmap, nrbits, 0);
+
+	while (start_bit < nrbits) {
+		end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
+		ASSERT(start_bit < end_bit);
+
+		key.objectid = start + start_bit * fs_info->sectorsize;
+		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+		key.offset = (end_bit - start_bit) * fs_info->sectorsize;
+
+		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+		if (ret)
+			goto out;
+		btrfs_release_path(path);
+
+		extent_count++;
+
+		start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
+	}
+
+	if (extent_count != expected_extent_count) {
+		fprintf(stderr,
+			"incorrect extent count for %llu; counted %u, expected %u",
+			block_group->key.objectid, extent_count,
+			expected_extent_count);
+		ASSERT(0);
+		ret = -EIO;
+		goto out;
+	}
+
+	ret = 0;
+out:
+	kvfree(bitmap);
+	if (ret)
+		btrfs_abort_transaction(trans, ret);
+	return ret;
+}
+
+static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
+					  struct btrfs_block_group_cache *block_group,
+					  struct btrfs_path *path,
+					  int new_extents)
+{
+	struct btrfs_free_space_info *info;
+	u32 flags;
+	u32 extent_count;
+	int ret = 0;
+
+	if (new_extents == 0)
+		return 0;
+
+	info = search_free_space_info(trans, trans->fs_info, block_group, path,
+				1);
+	if (IS_ERR(info)) {
+		ret = PTR_ERR(info);
+		goto out;
+	}
+	flags = btrfs_free_space_flags(path->nodes[0], info);
+	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
+
+	extent_count += new_extents;
+	btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
+	btrfs_mark_buffer_dirty(path->nodes[0]);
+	btrfs_release_path(path);
+
+	if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
+	    extent_count > block_group->bitmap_high_thresh) {
+		ret = convert_free_space_to_bitmaps(trans, block_group, path);
+	} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
+		   extent_count < block_group->bitmap_low_thresh) {
+		ret = convert_free_space_to_extents(trans, block_group, path);
+	}
+
+
+out:
+	return ret;
+}
+
+
+static void free_space_set_bits(struct btrfs_block_group_cache *block_group,
+				struct btrfs_path *path, u64 *start, u64 *size,
+				int bit)
+{
+	struct extent_buffer *leaf = path->nodes[0];
+	struct btrfs_fs_info *fs_info = leaf->fs_info;
+	struct btrfs_key key;
+	u64 end = *start + *size;
+	u64 found_start, found_end;
+	unsigned long ptr, first, last;
+
+	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
+
+	found_start = key.objectid;
+	found_end = key.objectid + key.offset;
+	ASSERT(*start >= found_start && *start < found_end);
+	ASSERT(end > found_start);
+
+	if (end > found_end)
+		end = found_end;
+
+	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+	first = (*start - found_start) / fs_info->sectorsize;
+	last = (end - found_start) / fs_info->sectorsize;
+	if (bit)
+		extent_buffer_bitmap_set(leaf, ptr, first, last - first);
+	else
+		extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
+	btrfs_mark_buffer_dirty(leaf);
+
+	*size -= end - *start;
+	*start = end;
+}
+
+/*
+ * We can't use btrfs_next_item() in modify_free_space_bitmap() because
+ * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
+ * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
+ * looking for.
+ */
+static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
+				  struct btrfs_root *root, struct btrfs_path *p)
+{
+	struct btrfs_key key;
+
+	if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
+		p->slots[0]++;
+		return 0;
+	}
+
+	btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
+	btrfs_release_path(p);
+
+	key.objectid += key.offset;
+	key.type = (u8)-1;
+	key.offset = (u64)-1;
+
+	return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
+}
+
+/*
+ * If remove is 1, then we are removing free space, thus clearing bits in the
+ * bitmap. If remove is 0, then we are adding free space, thus setting bits in
+ * the bitmap.
+ */
+static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
+				    struct btrfs_block_group_cache *block_group,
+				    struct btrfs_path *path,
+				    u64 start, u64 size, int remove)
+{
+	struct btrfs_root *root = trans->fs_info->free_space_root;
+	struct btrfs_key key;
+	u64 end = start + size;
+	u64 cur_start, cur_size;
+	int prev_bit, next_bit;
+	int new_extents;
+	int ret;
+
+	/*
+	 * Read the bit for the block immediately before the extent of space if
+	 * that block is within the block group.
+	 */
+	if (start > block_group->key.objectid) {
+		u64 prev_block = start - trans->fs_info->sectorsize;
+
+		key.objectid = prev_block;
+		key.type = (u8)-1;
+		key.offset = (u64)-1;
+
+		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
+		if (ret)
+			goto out;
+
+		prev_bit = free_space_test_bit(block_group, path, prev_block);
+
+		/* The previous block may have been in the previous bitmap. */
+		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+		if (start >= key.objectid + key.offset) {
+			ret = free_space_next_bitmap(trans, root, path);
+			if (ret)
+				goto out;
+		}
+	} else {
+		key.objectid = start;
+		key.type = (u8)-1;
+		key.offset = (u64)-1;
+
+		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
+		if (ret)
+			goto out;
+
+		prev_bit = -1;
+	}
+
+	/*
+	 * Iterate over all of the bitmaps overlapped by the extent of space,
+	 * clearing/setting bits as required.
+	 */
+	cur_start = start;
+	cur_size = size;
+	while (1) {
+		free_space_set_bits(block_group, path, &cur_start, &cur_size,
+					!remove);
+		if (cur_size == 0)
+			break;
+		ret = free_space_next_bitmap(trans, root, path);
+		if (ret)
+			goto out;
+	}
+
+	/*
+	 * Read the bit for the block immediately after the extent of space if
+	 * that block is within the block group.
+	 */
+	if (end < block_group->key.objectid + block_group->key.offset) {
+		/* The next block may be in the next bitmap. */
+		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+		if (end >= key.objectid + key.offset) {
+			ret = free_space_next_bitmap(trans, root, path);
+			if (ret)
+				goto out;
+		}
+
+		next_bit = free_space_test_bit(block_group, path, end);
+	} else {
+		next_bit = -1;
+	}
+
+	if (remove) {
+		new_extents = -1;
+		if (prev_bit == 1) {
+			/* Leftover on the left. */
+			new_extents++;
+		}
+		if (next_bit == 1) {
+			/* Leftover on the right. */
+			new_extents++;
+		}
+	} else {
+		new_extents = 1;
+		if (prev_bit == 1) {
+			/* Merging with neighbor on the left. */
+			new_extents--;
+		}
+		if (next_bit == 1) {
+			/* Merging with neighbor on the right. */
+			new_extents--;
+		}
+	}
+
+	btrfs_release_path(path);
+	ret = update_free_space_extent_count(trans, block_group, path,
+			new_extents);
+
+out:
+	return ret;
+}
+
+static int remove_free_space_extent(struct btrfs_trans_handle *trans,
+				    struct btrfs_block_group_cache *block_group,
+				    struct btrfs_path *path,
+				    u64 start, u64 size)
+{
+	struct btrfs_root *root = trans->fs_info->free_space_root;
+	struct btrfs_key key;
+	u64 found_start, found_end;
+	u64 end = start + size;
+	int new_extents = -1;
+	int ret;
+
+	key.objectid = start;
+	key.type = (u8)-1;
+	key.offset = (u64)-1;
+
+	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+	if (ret)
+		goto out;
+
+	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+	ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
+
+	found_start = key.objectid;
+	found_end = key.objectid + key.offset;
+	ASSERT(start >= found_start && end <= found_end);
+
+	/*
+	 * Okay, now that we've found the free space extent which contains the
+	 * free space that we are removing, there are four cases:
+	 *
+	 * 1. We're using the whole extent: delete the key we found and
+	 * decrement the free space extent count.
+	 * 2. We are using part of the extent starting at the beginning: delete
+	 * the key we found and insert a new key representing the leftover at
+	 * the end. There is no net change in the number of extents.
+	 * 3. We are using part of the extent ending at the end: delete the key
+	 * we found and insert a new key representing the leftover at the
+	 * beginning. There is no net change in the number of extents.
+	 * 4. We are using part of the extent in the middle: delete the key we
+	 * found and insert two new keys representing the leftovers on each
+	 * side. Where we used to have one extent, we now have two, so increment
+	 * the extent count. We may need to convert the block group to bitmaps
+	 * as a result.
+	 */
+
+	/* Delete the existing key (cases 1-4). */
+	ret = btrfs_del_item(trans, root, path);
+	if (ret)
+		goto out;
+
+	/* Add a key for leftovers at the beginning (cases 3 and 4). */
+	if (start > found_start) {
+		key.objectid = found_start;
+		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+		key.offset = start - found_start;
+
+		btrfs_release_path(path);
+		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+		if (ret)
+			goto out;
+		new_extents++;
+	}
+
+	/* Add a key for leftovers at the end (cases 2 and 4). */
+	if (end < found_end) {
+		key.objectid = end;
+		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+		key.offset = found_end - end;
+
+		btrfs_release_path(path);
+		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+		if (ret)
+			goto out;
+		new_extents++;
+	}
+
+	btrfs_release_path(path);
+	ret = update_free_space_extent_count(trans, block_group, path,
+					     new_extents);
+
+out:
+	return ret;
+}
+
+static int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
+				  struct btrfs_block_group_cache *block_group,
+				  struct btrfs_path *path, u64 start, u64 size)
+{
+	struct btrfs_free_space_info *info;
+	u32 flags;
+
+	info = search_free_space_info(NULL, trans->fs_info, block_group, path, 0);
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+	flags = btrfs_free_space_flags(path->nodes[0], info);
+	btrfs_release_path(path);
+
+	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
+		return modify_free_space_bitmap(trans, block_group, path,
+				start, size, 1);
+	} else {
+		return remove_free_space_extent(trans, block_group, path,
+				start, size);
+	}
+}
+
+int remove_from_free_space_tree(struct btrfs_trans_handle *trans, u64 start,
+		u64 size)
+{
+	struct btrfs_block_group_cache *block_group;
+	struct btrfs_path *path;
+	int ret;
+
+	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
+		return 0;
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	block_group = btrfs_lookup_block_group(trans->fs_info, start);
+	if (!block_group) {
+		ASSERT(0);
+		ret = -ENOENT;
+		goto out;
+	}
+
+	ret = __remove_from_free_space_tree(trans, block_group, path, start,
+					    size);
+out:
+	btrfs_free_path(path);
+	if (ret)
+		btrfs_abort_transaction(trans, ret);
+	return ret;
+}
+
+static int add_free_space_extent(struct btrfs_trans_handle *trans,
+				 struct btrfs_block_group_cache *block_group,
+				 struct btrfs_path *path,
+				 u64 start, u64 size)
+{
+	struct btrfs_root *root = trans->fs_info->free_space_root;
+	struct btrfs_key key, new_key;
+	u64 found_start, found_end;
+	u64 end = start + size;
+	int new_extents = 1;
+	int ret;
+
+	/*
+	 * We are adding a new extent of free space, but we need to merge
+	 * extents. There are four cases here:
+	 *
+	 * 1. The new extent does not have any immediate neighbors to merge
+	 * with: add the new key and increment the free space extent count. We
+	 * may need to convert the block group to bitmaps as a result.
+	 * 2. The new extent has an immediate neighbor before it: remove the
+	 * previous key and insert a new key combining both of them. There is no
+	 * net change in the number of extents.
+	 * 3. The new extent has an immediate neighbor after it: remove the next
+	 * key and insert a new key combining both of them. There is no net
+	 * change in the number of extents.
+	 * 4. The new extent has immediate neighbors on both sides: remove both
+	 * of the keys and insert a new key combining all of them. Where we used
+	 * to have two extents, we now have one, so decrement the extent count.
+	 */
+
+	new_key.objectid = start;
+	new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+	new_key.offset = size;
+
+	/* Search for a neighbor on the left. */
+	if (start == block_group->key.objectid)
+		goto right;
+	key.objectid = start - 1;
+	key.type = (u8)-1;
+	key.offset = (u64)-1;
+
+	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+	if (ret)
+		goto out;
+
+	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
+		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
+		btrfs_release_path(path);
+		goto right;
+	}
+
+	found_start = key.objectid;
+	found_end = key.objectid + key.offset;
+	ASSERT(found_start >= block_group->key.objectid &&
+	       found_end > block_group->key.objectid);
+	ASSERT(found_start < start && found_end <= start);
+
+	/*
+	 * Delete the neighbor on the left and absorb it into the new key (cases
+	 * 2 and 4).
+	 */
+	if (found_end == start) {
+		ret = btrfs_del_item(trans, root, path);
+		if (ret)
+			goto out;
+		new_key.objectid = found_start;
+		new_key.offset += key.offset;
+		new_extents--;
+	}
+	btrfs_release_path(path);
+right:
+	/* Search for a neighbor on the right. */
+	if (end == block_group->key.objectid + block_group->key.offset)
+		goto insert;
+	key.objectid = end;
+	key.type = (u8)-1;
+	key.offset = (u64)-1;
+
+	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+	if (ret)
+		goto out;
+
+	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
+		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
+		btrfs_release_path(path);
+		goto insert;
+	}
+
+	found_start = key.objectid;
+	found_end = key.objectid + key.offset;
+	ASSERT(found_start >= block_group->key.objectid &&
+			found_end > block_group->key.objectid);
+	ASSERT((found_start < start && found_end <= start) ||
+			(found_start >= end && found_end > end));
+
+	/*
+	 * Delete the neighbor on the right and absorb it into the new key
+	 * (cases 3 and 4).
+	 */
+	if (found_start == end) {
+		ret = btrfs_del_item(trans, root, path);
+		if (ret)
+			goto out;
+		new_key.offset += key.offset;
+		new_extents--;
+	}
+	btrfs_release_path(path);
+
+insert:
+	/* Insert the new key (cases 1-4). */
+	ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
+	if (ret)
+		goto out;
+
+	btrfs_release_path(path);
+	ret = update_free_space_extent_count(trans, block_group, path,
+			new_extents);
+
+out:
+	return ret;
+}
+
+static int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
+			     struct btrfs_block_group_cache *block_group,
+			     struct btrfs_path *path, u64 start, u64 size)
+{
+	struct btrfs_fs_info *fs_info = trans->fs_info;
+	struct btrfs_free_space_info *info;
+	u32 flags;
+
+	info = search_free_space_info(NULL, fs_info, block_group, path, 0);
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+	flags = btrfs_free_space_flags(path->nodes[0], info);
+	btrfs_release_path(path);
+
+	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
+		return modify_free_space_bitmap(trans, block_group, path,
+				start, size, 0);
+	} else {
+		return add_free_space_extent(trans, block_group, path, start,
+				size);
+	}
+}
+
+int add_to_free_space_tree(struct btrfs_trans_handle *trans, u64 start,
+		u64 size)
+{
+	struct btrfs_block_group_cache *block_group;
+	struct btrfs_path *path;
+	int ret;
+
+	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
+		return 0;
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	block_group = btrfs_lookup_block_group(trans->fs_info, start);
+	if (!block_group) {
+		ASSERT(0);
+		ret = -ENOENT;
+		goto out;
+	}
+
+	ret = __add_to_free_space_tree(trans, block_group, path, start, size);
+out:
+	btrfs_free_path(path);
+	if (ret)
+		btrfs_abort_transaction(trans, ret);
+	return ret;
+}
+
+int populate_free_space_tree(struct btrfs_trans_handle *trans,
+			     struct btrfs_block_group_cache *block_group)
+{
+	struct btrfs_root *extent_root = trans->fs_info->extent_root;
+	struct btrfs_path *path, *path2;
+	struct btrfs_key key;
+	u64 start, end;
+	int ret;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+	path->reada = READA_FORWARD;
+
+	path2 = btrfs_alloc_path();
+	if (!path2) {
+		btrfs_free_path(path);
+		return -ENOMEM;
+	}
+
+	ret = add_new_free_space_info(trans, block_group, path2);
+	if (ret)
+		goto out;
+
+	/*
+	 * Iterate through all of the extent and metadata items in this block
+	 * group, adding the free space between them and the free space at the
+	 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
+	 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
+	 * contained in.
+	 */
+	key.objectid = block_group->key.objectid;
+	key.type = BTRFS_EXTENT_ITEM_KEY;
+	key.offset = 0;
+
+	ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
+	if (ret < 0)
+		goto out;
+	ASSERT(ret == 0);
+
+	start = block_group->key.objectid;
+	end = block_group->key.objectid + block_group->key.offset;
+	while (1) {
+		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
+				key.type == BTRFS_METADATA_ITEM_KEY) {
+			if (key.objectid >= end)
+				break;
+
+			if (start < key.objectid) {
+				ret = __add_to_free_space_tree(trans,
+						block_group, path2, start,
+						key.objectid - start);
+				if (ret)
+					goto out;
+			}
+			start = key.objectid;
+			if (key.type == BTRFS_METADATA_ITEM_KEY)
+				start += trans->fs_info->nodesize;
+			else
+				start += key.offset;
+		} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
+			if (key.objectid != block_group->key.objectid)
+				break;
+		}
+
+		ret = btrfs_next_item(extent_root, path);
+		if (ret < 0)
+			goto out;
+		if (ret)
+			break;
+	}
+	if (start < end) {
+		ret = __add_to_free_space_tree(trans, block_group, path2,
+				start, end - start);
+		if (ret)
+			goto out;
+	}
+
+	ret = 0;
+out:
+	btrfs_free_path(path2);
+	btrfs_free_path(path);
+	return ret;
+}
+
+int remove_block_group_free_space(struct btrfs_trans_handle *trans,
+				  struct btrfs_block_group_cache *block_group)
+{
+	struct btrfs_root *root = trans->fs_info->free_space_root;
+	struct btrfs_path *path;
+	struct btrfs_key key, found_key;
+	struct extent_buffer *leaf;
+	u64 start, end;
+	int done = 0, nr;
+	int ret;
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	start = block_group->key.objectid;
+	end = block_group->key.objectid + block_group->key.offset;
+
+	key.objectid = end - 1;
+	key.type = (u8)-1;
+	key.offset = (u64)-1;
+
+	while (!done) {
+		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+		if (ret)
+			goto out;
+
+		leaf = path->nodes[0];
+		nr = 0;
+		path->slots[0]++;
+		while (path->slots[0] > 0) {
+			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
+
+			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
+				ASSERT(found_key.objectid == block_group->key.objectid);
+				ASSERT(found_key.offset == block_group->key.offset);
+				done = 1;
+				nr++;
+				path->slots[0]--;
+				break;
+			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
+				   found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
+				ASSERT(found_key.objectid >= start);
+				ASSERT(found_key.objectid < end);
+				ASSERT(found_key.objectid + found_key.offset <= end);
+				nr++;
+				path->slots[0]--;
+			} else {
+				ASSERT(0);
+			}
+		}
+
+		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
+		if (ret)
+			goto out;
+		btrfs_release_path(path);
+	}
+
+	ret = 0;
+out:
+	btrfs_free_path(path);
+	if (ret)
+		btrfs_abort_transaction(trans, ret);
+	return ret;
+}
 static int clear_free_space_tree(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *root)
 {
@@ -204,8 +1304,8 @@ static int load_free_space_bitmaps(struct btrfs_fs_info *fs_info,
 
 		offset = key.objectid;
 		while (offset < key.objectid + key.offset) {
-			bit = free_space_test_bit(block_group, path, offset,
-						  fs_info->sectorsize);
+			bit = free_space_test_bit(block_group, path, offset);
+
 			if (prev_bit == 0 && bit == 1) {
 				extent_start = offset;
 			} else if (prev_bit == 1 && bit == 0) {
@@ -276,7 +1376,7 @@ static int load_free_space_extents(struct btrfs_fs_info *fs_info,
 		if (key.objectid + key.offset > end) {
 			fprintf(stderr,
 	"free space extent ends at %llu, beyond end of block group %llu-%llu\n",
-				key.objectid, start, end);
+				key.objectid + key.offset, start, end);
 			(*errors)++;
 			break;
 		}
@@ -320,6 +1420,143 @@ out:
 	return ret;
 }
 
+static struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
+				     struct btrfs_fs_info *fs_info,
+				     u64 objectid)
+{
+	struct extent_buffer *leaf;
+	struct btrfs_root *tree_root = fs_info->tree_root;
+	struct btrfs_root *root;
+	struct btrfs_key key;
+	int ret = 0;
+
+	root = kzalloc(sizeof(*root), GFP_KERNEL);
+	if (!root)
+		return ERR_PTR(-ENOMEM);
+
+	btrfs_setup_root(root, fs_info, objectid);
+	root->root_key.objectid = objectid;
+	root->root_key.type = BTRFS_ROOT_ITEM_KEY;
+	root->root_key.offset = 0;
+
+	leaf = btrfs_alloc_free_block(trans, root, fs_info->nodesize, objectid,
+			NULL, 0, 0, 0);
+	if (IS_ERR(leaf)) {
+		ret = PTR_ERR(leaf);
+		leaf = NULL;
+		goto fail;
+	}
+
+	memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
+	btrfs_set_header_bytenr(leaf, leaf->start);
+	btrfs_set_header_generation(leaf, trans->transid);
+	btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
+	btrfs_set_header_owner(leaf, objectid);
+	root->node = leaf;
+	write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE);
+	write_extent_buffer(leaf, fs_info->chunk_tree_uuid,
+			    btrfs_header_chunk_tree_uuid(leaf),
+			    BTRFS_UUID_SIZE);
+	btrfs_mark_buffer_dirty(leaf);
+
+	extent_buffer_get(root->node);
+	root->commit_root = root->node;
+	root->track_dirty = 1;
+
+	root->root_item.flags = 0;
+	root->root_item.byte_limit = 0;
+	btrfs_set_root_bytenr(&root->root_item, leaf->start);
+	btrfs_set_root_generation(&root->root_item, trans->transid);
+	btrfs_set_root_level(&root->root_item, 0);
+	btrfs_set_root_refs(&root->root_item, 1);
+	btrfs_set_root_used(&root->root_item, leaf->len);
+	btrfs_set_root_last_snapshot(&root->root_item, 0);
+	btrfs_set_root_dirid(&root->root_item, 0);
+	memset(root->root_item.uuid, 0, BTRFS_UUID_SIZE);
+	root->root_item.drop_level = 0;
+
+	key.objectid = objectid;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = 0;
+	ret = btrfs_insert_root(trans, tree_root, &key, &root->root_item);
+	if (ret)
+		goto fail;
+
+	return root;
+
+fail:
+	if (leaf)
+		free_extent_buffer(leaf);
+
+	kfree(root);
+	return ERR_PTR(ret);
+}
+
+#define btrfs_set_fs_compat_ro(__fs_info, opt) \
+	__btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+
+static inline void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info,
+					    u64 flag)
+{
+	struct btrfs_super_block *disk_super;
+	u64 features;
+
+	disk_super = fs_info->super_copy;
+	features = btrfs_super_compat_ro_flags(disk_super);
+	if (!(features & flag)) {
+		features = btrfs_super_compat_ro_flags(disk_super);
+		if (!(features & flag)) {
+			features |= flag;
+			btrfs_set_super_compat_ro_flags(disk_super, features);
+		}
+	}
+}
+
+int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *tree_root = fs_info->tree_root;
+	struct btrfs_root *free_space_root;
+	struct btrfs_block_group_cache *block_group;
+	u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
+	int ret;
+
+	trans = btrfs_start_transaction(tree_root, 0);
+	if (IS_ERR(trans))
+		return PTR_ERR(trans);
+
+	free_space_root = btrfs_create_tree(trans, fs_info,
+					    BTRFS_FREE_SPACE_TREE_OBJECTID);
+	if (IS_ERR(free_space_root)) {
+		ret = PTR_ERR(free_space_root);
+		goto abort;
+	}
+	fs_info->free_space_root = free_space_root;
+
+	do {
+		block_group = btrfs_lookup_first_block_group(fs_info, start);
+		if (!block_group)
+			break;
+		start = block_group->key.objectid + block_group->key.offset;
+		ret = populate_free_space_tree(trans, block_group);
+		if (ret)
+			goto abort;
+	} while (block_group);
+
+	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
+
+	ret = btrfs_commit_transaction(trans, tree_root);
+	if (ret)
+		return ret;
+
+	return 0;
+
+abort:
+	btrfs_abort_transaction(trans, ret);
+	return ret;
+}
+
 int load_free_space_tree(struct btrfs_fs_info *fs_info,
 			 struct btrfs_block_group_cache *block_group)
 {
@@ -332,7 +1569,7 @@ int load_free_space_tree(struct btrfs_fs_info *fs_info,
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
-	path->reada = 1;
+	path->reada = READA_BACK;
 
 	info = search_free_space_info(NULL, fs_info, block_group, path, 0);
 	if (IS_ERR(info)) {
diff --git a/free-space-tree.h b/free-space-tree.h
index 4845f13e..1af12a81 100644
--- a/free-space-tree.h
+++ b/free-space-tree.h
@@ -19,8 +19,20 @@
 #ifndef __BTRFS_FREE_SPACE_TREE_H__
 #define __BTRFS_FREE_SPACE_TREE_H__
 
+#define BTRFS_FREE_SPACE_BITMAP_SIZE 256
+#define BTRFS_FREE_SPACE_BITMAP_BITS (BTRFS_FREE_SPACE_BITMAP_SIZE * BITS_PER_BYTE)
+
 int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info);
 int load_free_space_tree(struct btrfs_fs_info *fs_info,
 			 struct btrfs_block_group_cache *block_group);
+int populate_free_space_tree(struct btrfs_trans_handle *trans,
+			     struct btrfs_block_group_cache *block_group);
+int remove_block_group_free_space(struct btrfs_trans_handle *trans,
+				  struct btrfs_block_group_cache *block_group);
+int add_to_free_space_tree(struct btrfs_trans_handle *trans, u64 start,
+			   u64 size);
+int remove_from_free_space_tree(struct btrfs_trans_handle *trans, u64 start,
+				u64 size);
+int btrfs_create_free_space_tree(struct btrfs_fs_info *info);
 
 #endif
diff --git a/help.c b/help.c
index f1dd3946..b10b2b05 100644
--- a/help.c
+++ b/help.c
@@ -108,7 +108,7 @@ void clean_args_no_options(int argc, char *argv[], const char * const *usagestr)
 
 /*
  * Same as clean_args_no_options but pass through arguments that could look
- * like short options. Eg. reisze which takes a negative resize argument like
+ * like short options. Eg. resize which takes a negative resize argument like
  * '-123M' .
  *
  * This accepts only two forms:
diff --git a/image/main.c b/image/main.c
index 351c5a25..39d5265d 100644
--- a/image/main.c
+++ b/image/main.c
@@ -499,7 +499,8 @@ static int write_buffers(struct metadump_struct *md, u64 *next)
 	}
 
 	if (err) {
-		error("one of the threads failed: %s", strerror(-err));
+		errno = -err;
+		error("one of the threads failed: %m");
 		goto out;
 	}
 
@@ -690,10 +691,12 @@ static int flush_pending(struct metadump_struct *md, int done)
 	}
 	if (md->num_items >= ITEMS_PER_CLUSTER || done) {
 		ret = write_buffers(md, &start);
-		if (ret)
-			error("unable to write buffers: %s", strerror(-ret));
-		else
+		if (ret) {
+			errno = -ret;
+			error("unable to write buffers: %m");
+		} else {
 			meta_cluster_init(md, start);
+		}
 	}
 	pthread_mutex_unlock(&md->mutex);
 	return ret;
@@ -2093,6 +2096,10 @@ static int fixup_devices(struct btrfs_fs_info *fs_info,
 	u64 devid, cur_devid;
 	int ret;
 
+	if (btrfs_super_log_root(fs_info->super_copy)) {
+		warning(
+		"log tree detected, its generation will not match superblock");
+	}
 	trans = btrfs_start_transaction(fs_info->tree_root, 1);
 	if (IS_ERR(trans)) {
 		error("cannot starting transaction %ld", PTR_ERR(trans));
@@ -2373,10 +2380,12 @@ static int update_disk_super_on_device(struct btrfs_fs_info *info,
 
 	ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
 	if (ret != BTRFS_SUPER_INFO_SIZE) {
-		if (ret < 0)
-			error("cannot write superblock: %s", strerror(ret));
-		else
+		if (ret < 0) {
+			errno = ret;
+			error("cannot write superblock: %m");
+		} else {
 			error("cannot write superblock");
+		}
 		ret = -EIO;
 		goto out;
 	}
@@ -2487,7 +2496,7 @@ int main(int argc, char *argv[])
 	} else {
 		if (walk_trees || sanitize != SANITIZE_NONE || compress_level) {
 			error(
-			"useing -w, -s, -c options for restore makes no sense");
+			"using -w, -s, -c options for restore makes no sense");
 			usage_error++;
 		}
 		if (multi_devices && dev_cnt < 2) {
@@ -2531,8 +2540,8 @@ int main(int argc, char *argv[])
 	if (create) {
 		ret = check_mounted(source);
 		if (ret < 0) {
-			warning("unable to check mount status of: %s",
-					strerror(-ret));
+			errno = -ret;
+			warning("unable to check mount status of: %m");
 		} else if (ret) {
 			warning("%s already mounted, results may be inaccurate",
 					source);
diff --git a/image/metadump.h b/image/metadump.h
index b9275e95..8ace60f5 100644
--- a/image/metadump.h
+++ b/image/metadump.h
@@ -55,7 +55,7 @@ struct fs_chunk {
 	u64 logical;
 	u64 physical;
 	/*
-	 * physical_dup only store additonal physical for BTRFS_BLOCK_GROUP_DUP
+	 * physical_dup only store additional physical for BTRFS_BLOCK_GROUP_DUP
 	 * currently restore only support single and DUP
 	 * TODO: modify this structure and the function related to this
 	 * structure for support RAID*
diff --git a/ioctl.h b/ioctl.h
index 709e996f..4bf91ddb 100644
--- a/ioctl.h
+++ b/ioctl.h
@@ -320,6 +320,22 @@ struct btrfs_ioctl_ino_lookup_args {
 };
 BUILD_ASSERT(sizeof(struct btrfs_ioctl_ino_lookup_args) == 4096);
 
+#define BTRFS_INO_LOOKUP_USER_PATH_MAX	(4080 - BTRFS_VOL_NAME_MAX - 1)
+struct btrfs_ioctl_ino_lookup_user_args {
+	/* in, inode number containing the subvolume of 'subvolid' */
+	__u64 dirid;
+	/* in */
+	__u64 treeid;
+	/* out, name of the subvolume of 'treeid' */
+	char name[BTRFS_VOL_NAME_MAX + 1];
+	/*
+	 * out, constructed path from the directory with which the ioctl is
+	 * called to dirid
+	 */
+	char path[BTRFS_INO_LOOKUP_USER_PATH_MAX];
+};
+BUILD_ASSERT(sizeof(struct btrfs_ioctl_ino_lookup_user_args) == 4096);
+
 struct btrfs_ioctl_search_key {
 	/* which root are we searching.  0 is the tree of tree roots */
 	__u64 tree_id;
@@ -672,6 +688,83 @@ BUILD_ASSERT(sizeof(struct btrfs_ioctl_send_args_64) == 72);
 
 #define BTRFS_IOC_SEND_64_COMPAT_DEFINED 1
 
+/*
+ * Information about a fs tree root.
+ *
+ * All items are filled by the ioctl
+ */
+struct btrfs_ioctl_get_subvol_info_args {
+	/* Id of this subvolume */
+	__u64 treeid;
+
+	/* Name of this subvolume, used to get the real name at mount point */
+	char name[BTRFS_VOL_NAME_MAX + 1];
+
+	/*
+	 * Id of the subvolume which contains this subvolume.
+	 * Zero for top-level subvolume or a deleted subvolume.
+	 */
+	__u64 parent_id;
+
+	/*
+	 * Inode number of the directory which contains this subvolume.
+	 * Zero for top-level subvolume or a deleted subvolume
+	 */
+	__u64 dirid;
+
+	/* Latest transaction id of this subvolume */
+	__u64 generation;
+
+	/* Flags of this subvolume */
+	__u64 flags;
+
+	/* UUID of this subvolume */
+	__u8 uuid[BTRFS_UUID_SIZE];
+
+	/*
+	 * UUID of the subvolume of which this subvolume is a snapshot.
+	 * All zero for a non-snapshot subvolume.
+	 */
+	__u8 parent_uuid[BTRFS_UUID_SIZE];
+
+	/*
+	 * UUID of the subvolume from which this subvolume was received.
+	 * All zero for non-received subvolume.
+	 */
+	__u8 received_uuid[BTRFS_UUID_SIZE];
+
+	/* Transaction id indicating when change/create/send/receive happened */
+	__u64 ctransid;
+	__u64 otransid;
+	__u64 stransid;
+	__u64 rtransid;
+	/* Time corresponding to c/o/s/rtransid */
+	struct btrfs_ioctl_timespec ctime;
+	struct btrfs_ioctl_timespec otime;
+	struct btrfs_ioctl_timespec stime;
+	struct btrfs_ioctl_timespec rtime;
+
+	/* Must be zero */
+	__u64 reserved[8];
+};
+
+#define BTRFS_MAX_ROOTREF_BUFFER_NUM			255
+struct btrfs_ioctl_get_subvol_rootref_args {
+	/* in/out, minimum id of rootref's treeid to be searched */
+	__u64 min_treeid;
+
+	/* out */
+	struct {
+		__u64 treeid;
+		__u64 dirid;
+	} rootref[BTRFS_MAX_ROOTREF_BUFFER_NUM];
+
+	/* out, number of found items */
+	__u8 num_items;
+	__u8 align[7];
+};
+BUILD_ASSERT(sizeof(struct btrfs_ioctl_get_subvol_rootref_args) == 4096);
+
 /* Error codes as returned by the kernel */
 enum btrfs_err_code {
 	notused,
@@ -828,6 +921,12 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
                                   struct btrfs_ioctl_feature_flags[3])
 #define BTRFS_IOC_RM_DEV_V2	_IOW(BTRFS_IOCTL_MAGIC, 58, \
 				   struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_GET_SUBVOL_INFO _IOR(BTRFS_IOCTL_MAGIC, 60, \
+				struct btrfs_ioctl_get_subvol_info_args)
+#define BTRFS_IOC_GET_SUBVOL_ROOTREF _IOWR(BTRFS_IOCTL_MAGIC, 61, \
+				struct btrfs_ioctl_get_subvol_rootref_args)
+#define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 62, \
+				struct btrfs_ioctl_ino_lookup_user_args)
 #ifdef __cplusplus
 }
 #endif
diff --git a/kerncompat.h b/kerncompat.h
index fa96715f..08133164 100644
--- a/kerncompat.h
+++ b/kerncompat.h
@@ -263,6 +263,16 @@ static inline int IS_ERR_OR_NULL(const void *ptr)
 	return !ptr || IS_ERR(ptr);
 }
 
+#define div_u64(x, y) ((x) / (y))
+
+/**
+ * __swap - swap values of @a and @b
+ * @a: first value
+ * @b: second value
+ */
+#define __swap(a, b) \
+        do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
 /*
  * This looks more complex than it should be. But we need to
  * get the type for the ~ right in round_down (it needs to be
@@ -289,6 +299,10 @@ static inline int IS_ERR_OR_NULL(const void *ptr)
 #define kfree(x) free(x)
 #define vmalloc(x) malloc(x)
 #define vfree(x) free(x)
+#define kvzalloc(x, y) kzalloc(x,y)
+#define kvfree(x) free(x)
+#define memalloc_nofs_save() (0)
+#define memalloc_nofs_restore(x)	((void)(x))
 
 #ifndef BTRFS_DISABLE_BACKTRACE
 static inline void assert_trace(const char *assertion, const char *filename,
diff --git a/kernel-lib/bitops.h b/kernel-lib/bitops.h
index 5b35f9fc..2c51a265 100644
--- a/kernel-lib/bitops.h
+++ b/kernel-lib/bitops.h
@@ -2,6 +2,8 @@
 #define _PERF_LINUX_BITOPS_H_
 
 #include <linux/kernel.h>
+#include <endian.h>
+#include "internal.h"
 
 #ifndef DIV_ROUND_UP
 #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
@@ -109,116 +111,146 @@ static __always_inline unsigned long __ffs(unsigned long word)
 
 #define ffz(x) __ffs(~(x))
 
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
+#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
+
 /*
- * Find the first set bit in a memory region.
+ * This is a common helper function for find_next_bit, find_next_zero_bit, and
+ * find_next_and_bit. The differences are:
+ *  - The "invert" argument, which is XORed with each fetched word before
+ *    searching it for one bits.
+ *  - The optional "addr2", which is anded with "addr1" if present.
  */
-static inline unsigned long
-find_first_bit(const unsigned long *addr, unsigned long size)
+static inline unsigned long _find_next_bit(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long nbits,
+		unsigned long start, unsigned long invert)
 {
-	const unsigned long *p = addr;
-	unsigned long result = 0;
 	unsigned long tmp;
 
-	while (size & ~(BITS_PER_LONG-1)) {
-		if ((tmp = *(p++)))
-			goto found;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
+	if (start >= nbits)
+		return nbits;
+
+	tmp = addr1[start / BITS_PER_LONG];
+	if (addr2)
+		tmp &= addr2[start / BITS_PER_LONG];
+	tmp ^= invert;
+
+	/* Handle 1st word. */
+	tmp &= BITMAP_FIRST_WORD_MASK(start);
+	start = round_down(start, BITS_PER_LONG);
+
+	while (!tmp) {
+		start += BITS_PER_LONG;
+		if (start >= nbits)
+			return nbits;
+
+		tmp = addr1[start / BITS_PER_LONG];
+		if (addr2)
+			tmp &= addr2[start / BITS_PER_LONG];
+		tmp ^= invert;
 	}
-	if (!size)
-		return result;
 
-	tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size;	/* Nope. */
-found:
-	return result + __ffs(tmp);
+	return min(start + __ffs(tmp), nbits);
 }
 
 /*
  * Find the next set bit in a memory region.
  */
-static inline unsigned long
-find_next_bit(const unsigned long *addr, unsigned long size,
-	      unsigned long offset)
+static inline unsigned long find_next_bit(const unsigned long *addr,
+					  unsigned long size,
+					  unsigned long offset)
 {
-	const unsigned long *p = addr + BITOP_WORD(offset);
-	unsigned long result = offset & ~(BITS_PER_LONG-1);
-	unsigned long tmp;
+	return _find_next_bit(addr, NULL, size, offset, 0UL);
+}
 
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset %= BITS_PER_LONG;
-	if (offset) {
-		tmp = *(p++);
-		tmp &= (~0UL << offset);
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
-	while (size & ~(BITS_PER_LONG-1)) {
-		if ((tmp = *(p++)))
-			goto found_middle;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
+static inline unsigned long find_next_zero_bit(const unsigned long *addr,
+					       unsigned long size,
+					       unsigned long offset)
+{
+	return _find_next_bit(addr, NULL, size, offset, ~0UL);
+}
 
-found_first:
-	tmp &= (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size;	/* Nope. */
-found_middle:
-	return result + __ffs(tmp);
+#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
+#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+
+static inline unsigned long ext2_swab(const unsigned long y)
+{
+#if BITS_PER_LONG == 64
+	return (unsigned long) bswap_64((u64) y);
+#elif BITS_PER_LONG == 32
+	return (unsigned long) bswap_32((u32) y);
+#else
+#error BITS_PER_LONG not defined
+#endif
 }
 
-/*
- * This implementation of find_{first,next}_zero_bit was stolen from
- * Linus' asm-alpha/bitops.h.
- */
-static inline unsigned long
-find_next_zero_bit(const unsigned long *addr, unsigned long size,
-		   unsigned long offset)
+static inline unsigned long _find_next_bit_le(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long nbits,
+		unsigned long start, unsigned long invert)
 {
-	const unsigned long *p = addr + BITOP_WORD(offset);
-	unsigned long result = offset & ~(BITS_PER_LONG-1);
 	unsigned long tmp;
 
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset %= BITS_PER_LONG;
-	if (offset) {
-		tmp = *(p++);
-		tmp |= ~0UL >> (BITS_PER_LONG - offset);
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (~tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
-	while (size & ~(BITS_PER_LONG-1)) {
-		if (~(tmp = *(p++)))
-			goto found_middle;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
+	if (start >= nbits)
+		return nbits;
+
+	tmp = addr1[start / BITS_PER_LONG];
+	if (addr2)
+		tmp &= addr2[start / BITS_PER_LONG];
+	tmp ^= invert;
+
+	/* Handle 1st word. */
+	tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start));
+	start = round_down(start, BITS_PER_LONG);
+
+	while (!tmp) {
+		start += BITS_PER_LONG;
+		if (start >= nbits)
+			return nbits;
+
+		tmp = addr1[start / BITS_PER_LONG];
+		if (addr2)
+			tmp &= addr2[start / BITS_PER_LONG];
+		tmp ^= invert;
 	}
-	if (!size)
-		return result;
-	tmp = *p;
 
-found_first:
-	tmp |= ~0UL << size;
-	if (tmp == ~0UL)	/* Are any bits zero? */
-		return result + size;	/* Nope. */
-found_middle:
-	return result + ffz(tmp);
+	return min(start + __ffs(ext2_swab(tmp)), nbits);
+}
+
+static inline unsigned long find_next_zero_bit_le(const void *addr, unsigned long size,
+		unsigned long offset)
+{
+	return _find_next_bit_le(addr, NULL, size, offset, ~0UL);
 }
+
+
+static inline unsigned long find_next_bit_le(const void *addr, unsigned long size,
+		unsigned long offset)
+{
+	return _find_next_bit_le(addr, NULL, size, offset, 0UL);
+}
+
+#else
+
+static inline unsigned long find_next_zero_bit_le(const void *addr,
+                unsigned long size, unsigned long offset)
+{
+        return find_next_zero_bit(addr, size, offset);
+}
+
+static inline unsigned long find_next_bit_le(const void *addr,
+                unsigned long size, unsigned long offset)
+{
+        return find_next_bit(addr, size, offset);
+}
+
+static inline unsigned long find_first_zero_bit_le(const void *addr,
+                unsigned long size)
+{
+        return find_first_zero_bit(addr, size);
+}
+
+#endif
+
 #endif
diff --git a/kernel-lib/raid56.c b/kernel-lib/raid56.c
index e3a9339e..cd5ee818 100644
--- a/kernel-lib/raid56.c
+++ b/kernel-lib/raid56.c
@@ -330,7 +330,7 @@ int raid56_recov(int nr_devs, size_t stripe_len, u64 profile, int dest1,
 			return 0;
 		}
 
-		/* Regerneate data from P */
+		/* Regenerate data from P */
 		return raid5_gen_result(nr_devs - 1, stripe_len, dest1, data);
 	}
 
diff --git a/kernel-lib/rbtree.h b/kernel-lib/rbtree.h
index 47b662a3..6d689075 100644
--- a/kernel-lib/rbtree.h
+++ b/kernel-lib/rbtree.h
@@ -19,7 +19,7 @@
   linux/include/linux/rbtree.h
 
   To use rbtrees you'll have to implement your own insert and search cores.
-  This will avoid us to use callbacks and to drop drammatically performances.
+  This will avoid us to use callbacks and to drop dramatically performances.
   I know it's not the cleaner way,  but in C (not in C++) to get
   performances and genericity...
 
diff --git a/libbtrfsutil/README.md b/libbtrfsutil/README.md
index 0c8eba44..8abb426d 100644
--- a/libbtrfsutil/README.md
+++ b/libbtrfsutil/README.md
@@ -6,6 +6,425 @@ the LGPL. libbtrfsutil provides interfaces for a subset of the operations
 offered by the `btrfs` command line utility. It also includes official Python
 bindings (Python 3 only).
 
+API Overview
+------------
+
+This section provides an overview of the interfaces available in libbtrfsutil
+as well as example usages. Detailed documentation for the C API can be found in
+[`btrfsutil.h`](btrfsutil.h). Detailed documentation for the Python bindings is
+available with `pydoc3 btrfsutil` or in the interpreter:
+
+```
+>>> import btrfsutil
+>>> help(btrfsutil)
+```
+
+Many functions in the C API have a variant taking a path and a variant taking a
+file descriptor. The latter has the same name as the former with an `_fd`
+suffix. The Python bindings for these functions can take a path, a file object,
+or a file descriptor.
+
+Error handling is omitted from most of these examples for brevity. Please
+handle errors in production code.
+
+### Error Handling
+
+In the C API, all functions that can return an error return an `enum
+btrfs_util_error` and set `errno`. `BTRFS_UTIL_OK` (zero) is returned on
+success. `btrfs_util_strerror()` converts an error code to a string
+description suitable for human-friendly error reporting.
+
+```c
+enum btrfs_util_err err;
+
+err = btrfs_util_sync("/");
+if (err)
+	fprintf("stderr, %s: %m\n", btrfs_util_strerror(err));
+```
+
+In the Python bindings, functions may raise a `BtrfsUtilError`, which is a
+subclass of `OSError` with an added `btrfsutilerror` error code member. Error
+codes are available as `ERROR_*` constants.
+
+```python
+try:
+    btrfsutil.sync('/')
+except btrfsutil.BtrfsUtilError as e:
+    print(e, file=sys.stderr)
+```
+
+### Filesystem Operations
+
+There are several operations which act on the entire filesystem.
+
+#### Sync
+
+Btrfs can commit all caches for a specific filesystem to disk.
+
+`btrfs_util_sync()` forces a sync on the filesystem containing the given file
+and waits for it to complete.
+
+`btrfs_wait_sync()` waits for a previously started transaction to complete. The
+transaction is specified by ID, which may be zero to indicate the current
+transaction.
+
+`btrfs_start_sync()` asynchronously starts a sync and returns a transaction ID
+which can then be passed to `btrfs_wait_sync()`.
+
+```c
+uint64_t transid;
+btrfs_util_sync("/");
+btrfs_util_start_sync("/", &transid);
+btrfs_util_wait_sync("/", &transid);
+btrfs_util_wait_sync("/", 0);
+```
+
+```python
+btrfsutil.sync('/')
+transid = btrfsutil.start_sync('/')
+btrfsutil.wait_sync('/', transid)
+btrfsutil.wait_sync('/')  # equivalent to wait_sync('/', 0)
+```
+
+All of these functions have `_fd` variants.
+
+The equivalent `btrfs-progs` command is `btrfs filesystem sync`.
+
+### Subvolume Operations
+
+Functions which take a file and a subvolume ID can be used in two ways. If zero
+is given as the subvolume ID, then the given file is used as the subvolume.
+Otherwise, the given file can be any file in the filesystem, and the subvolume
+with the given ID is used.
+
+#### Subvolume Information
+
+`btrfs_util_is_subvolume()` returns whether a given file is a subvolume.
+
+`btrfs_util_subvolume_id()` returns the ID of the subvolume containing the
+given file.
+
+```c
+enum btrfs_util_error err;
+err = btrfs_util_is_subvolume("/subvol");
+if (!err)
+	printf("Subvolume\n");
+else if (err == BTRFS_UTIL_ERROR_NOT_BTRFS || err == BTRFS_UTIL_ERROR_NOT_SUBVOLUME)
+	printf("Not subvolume\n");
+uint64_t id;
+btrfs_util_subvolume_id("/subvol", &id);
+```
+
+```python
+if btrfsutil.is_subvolume('/subvol'):
+    print('Subvolume')
+else:
+    print('Not subvolume')
+id_ = btrfsutil.subvolume_id('/subvol')
+```
+
+`btrfs_util_subvolume_path()` returns the path of the subvolume with the given
+ID relative to the filesystem root. This requires `CAP_SYS_ADMIN`. The path
+must be freed with `free()`.
+
+```c
+char *path;
+btrfs_util_subvolume_path("/", 256, &path);
+free(path);
+btrfs_util_subvolume_path("/subvol", 0, &path);
+free(path);
+```
+
+```python
+path = btrfsutil.subvolume_path('/', 256)
+path = btrfsutil.subvolume_path('/subvol')  # equivalent to subvolume_path('/subvol', 0)
+```
+
+`btrfs_util_subvolume_info()` returns information (including ID, parent ID,
+UUID) about a subvolume. In the C API, this is returned as a `struct
+btrfs_util_subvolume_info`. The Python bindings use a `SubvolumeInfo` object.
+
+This requires `CAP_SYS_ADMIN` unless the given subvolume ID is zero and the
+kernel supports the `BTRFS_IOC_GET_SUBVOL_INFO` ioctl (added in 4.18).
+
+The equivalent `btrfs-progs` command is `btrfs subvolume show`.
+
+```c
+struct btrfs_util_subvolume_info info;
+btrfs_util_subvolume_info("/", 256, &info);
+btrfs_util_subvolume_info("/subvol", 0, &info);
+```
+
+```python
+info = btrfsutil.subvolume_info('/', 256)
+info = btrfsutil.subvolume_info('/subvol')  # equivalent to subvolume_info('/subvol', 0)
+```
+
+All of these functions have `_fd` variants.
+
+#### Enumeration
+
+An iterator interface is provided for enumerating subvolumes on a filesystem.
+In the C API, a `struct btrfs_util_subvolume_iterator` is initialized by
+`btrfs_util_create_subvolume_iterator()`, which takes a top subvolume to
+enumerate under and flags. Currently, the only flag is to specify post-order
+traversal instead of the default pre-order. This function has an `_fd` variant.
+
+`btrfs_util_destroy_subvolume_iterator()` must be called to free a previously
+created `struct btrfs_util_subvolume_iterator`.
+
+`btrfs_util_subvolume_iterator_fd()` returns the file descriptor opened by
+`btrfs_util_create_subvolume_iterator()` which can be used for other functions.
+
+`btrfs_util_subvolume_iterator_next()` returns the path (relative to the top
+subvolume that the iterator was created with) and ID of the next subvolume.
+`btrfs_util_subvolume_iterator_next_info()` returns a `struct
+btrfs_subvolume_info` instead of the ID. It is slightly more efficient than
+doing separate `btrfs_util_subvolume_iterator_next()` and
+`btrfs_util_subvolume_info()` calls if the subvolume information is needed. The
+path returned by these functions must be freed with `free()`. When there are no
+more subvolumes, they return `BTRFS_UTIL_ERROR_STOP_ITERATION`.
+
+```c
+struct btrfs_util_subvolume_iterator *iter;
+enum btrfs_util_error err;
+char *path;
+uint64_t id;
+struct btrfs_util_subvolume_info info;
+
+btrfs_util_create_subvolume_iterator("/", 256, 0, &iter);
+/*
+ * This is just an example use-case for btrfs_util_subvolume_iterator_fd(). It
+ * is not necessary.
+ */
+btrfs_util_sync_fd(btrfs_util_subvolume_iterator_fd(iter));
+while (!(err = btrfs_util_subvolume_iterator_next(iter, &path, &id))) {
+	printf("%" PRIu64 " %s\n", id, path);
+	free(path);
+}
+btrfs_util_destroy_subvolume_iterator(iter);
+
+btrfs_util_create_subvolume_iterator("/subvol", 0,
+				     BTRFS_UTIL_SUBVOLUME_ITERATOR_POST_ORDER,
+				     &iter);
+while (!(err = btrfs_util_subvolume_iterator_next_info(iter, &path, &info))) {
+	printf("%" PRIu64 " %" PRIu64 " %s\n", info.id, info.parent_id, path);
+	free(path);
+}
+btrfs_util_destroy_subvolume_iterator(iter);
+```
+
+The Python bindings provide this interface as an iterable `SubvolumeIterator`
+class. It should be used as a context manager to ensure that the underlying
+file descriptor is closed. Alternatively, it has a `close()` method for closing
+explicitly. It also has a `fileno()` method to get the underlying file
+descriptor.
+
+```python
+with btrfsutil.SubvolumeIterator('/', 256) as it:
+    # This is just an example use-case for fileno(). It is not necessary.
+    btrfsutil.sync(it.fileno())
+    for path, id_ in it:
+        print(id_, path)
+
+it = btrfsutil.SubvolumeIterator('/subvol', info=True, post_order=True)
+try:
+    for path, info in it:
+        print(info.id, info.parent_id, path)
+finally:
+    it.close()
+```
+
+This interface requires `CAP_SYS_ADMIN` unless the given top subvolume ID is
+zero and the kernel supports the `BTRFS_IOC_GET_SUBVOL_ROOTREF` and
+`BTRFS_IOC_INO_LOOKUP_USER` ioctls (added in 4.18). In the unprivileged case,
+subvolumes which cannot be accessed are skipped.
+
+The equivalent `btrfs-progs` command is `btrfs subvolume list`.
+
+#### Creation
+
+`btrfs_util_create_subvolume()` creates a new subvolume at the given path. The
+subvolume can be created asynchronously and inherit from quota groups
+(qgroups).
+
+Qgroups to inherit are specified with a `struct btrfs_util_qgroup_inherit`,
+which is created by `btrfs_util_create_qgroup_inherit()` and freed by
+`btrfs_util_destroy_qgroup_inherit()`. Qgroups are added with
+`btrfs_util_qgroup_inherit_add_group()`. The list of added groups can be
+retrieved with `btrfs_util_qgroup_inherit_get_groups()`; note that the returned
+array does not need to be freed and is no longer valid when the `struct
+btrfs_util_qgroup_inherit` is freed.
+
+The Python bindings provide a `QgroupInherit` class. It has an `add_group()`
+method and a `groups` member, which is a list of ints.
+
+```c
+btrfs_util_create_subvolume("/subvol2", 0, NULL, NULL);
+
+uint64_t async_transid;
+btrfs_util_create_subvolume("/subvol2", 0, &async_transid, NULL);
+btrfs_util_wait_sync("/", async_transid);
+
+struct btrfs_util_qgroup_inherit *qgroups;
+btrfs_util_create_qgroup_inherit(0, &qgroups);
+btrfs_util_qgroup_inherit_add_group(&qgroups, 256);
+btrfs_util_create_subvolume("/subvol2", 0, NULL, qgroups);
+btrfs_util_destroy_qgroup_inherit(qgroups);
+```
+
+```python
+btrfsutil.create_subvolume('/subvol2')
+
+async_transid = btrfsutil.create_subvolume('/subvol2', async_=True)
+btrfsutil.wait_sync('/', async_transid)
+
+qgroups = btrfsutil.QgroupInherit()
+qgroups.add_group(256)
+btrfsutil.create_subvolume('/subvol2', qgroup_inherit=qgroups)
+```
+
+The C API has an `_fd` variant which takes a name and a file descriptor
+referring to the parent directory.
+
+The equivalent `btrfs-progs` command is `btrfs subvolume create`.
+
+#### Snapshotting
+
+Snapshots are created with `btrfs_util_create_snapshot()`, which takes a source
+path, a destination path, and flags. It can also be asynchronous and inherit
+from quota groups; see [subvolume creation](#Creation).
+
+Snapshot creation can be recursive, in which case subvolumes underneath the
+subvolume being snapshotted will also be snapshotted onto the same location in
+the new snapshot (note that this is implemented in userspace non-atomically and
+has the same capability requirements as a [subvolume iterator](#Enumeration)).
+The newly created snapshot can also be read-only, but not if doing a recursive
+snapshot.
+
+```c
+btrfs_util_create_snapshot("/subvol", "/snapshot", 0, NULL, NULL);
+btrfs_util_create_snapshot("/nested_subvol", "/nested_snapshot",
+			   BTRFS_UTIL_CREATE_SNAPSHOT_RECURSIVE, NULL, NULL);
+btrfs_util_create_snapshot("/subvol", "/rosnapshot",
+			   BTRFS_UTIL_CREATE_SNAPSHOT_READ_ONLY, NULL, NULL);
+```
+
+```python
+btrfsutil.create_snapshot('/subvol', '/snapshot')
+btrfsutil.create_snapshot('/nested_subvol', '/nested_snapshot', recursive=True)
+btrfsutil.create_snapshot('/subvol', '/rosnapshot', read_only=True)
+```
+
+The C API has two `_fd` variants. `btrfs_util_create_snapshot_fd()` takes the
+source subvolume as a file descriptor. `btrfs_util_create_snapshot_fd2()` takes
+the source subvolume as a file descriptor and the destination as a name and
+parent file descriptor.
+
+The equivalent `btrfs-progs` command is `btrfs subvolume snapshot`.
+
+#### Deletion
+
+`btrfs_util_delete_subvolume()` takes a subvolume to delete and flags. This
+requires `CAP_SYS_ADMIN` if the filesystem was not mounted with
+`user_subvol_rm_allowed`. Deletion may be recursive, in which case all
+subvolumes beneath the given subvolume are deleted before the given subvolume
+is deleted. This is implemented in user-space non-atomically and has the same
+capability requirements as a [subvolume iterator](#Enumeration).
+
+```c
+btrfs_util_delete_subvolume("/subvol", 0);
+btrfs_util_delete_subvolume("/nested_subvol",
+			    BTRFS_UTIL_DELETE_SUBVOLUME_RECURSIVE);
+```
+
+```python
+btrfsutil.delete_subvolume('/subvol')
+btrfsutil.delete_subvolume('/nested_subvol', recursive=True)
+```
+
+The C API has an `_fd` variant which takes a name and a file descriptor
+referring to the parent directory.
+
+The equivalent `btrfs-progs` command is `btrfs subvolume delete`.
+
+#### Deleted Subvolumes
+
+Btrfs lazily cleans up deleted subvolumes. `btrfs_util_deleted_subvolumes()`
+returns an array of subvolume IDs which have been deleted but not yet cleaned
+up. The returned array should be freed with `free()`.
+```c
+uint64_t *ids;
+size_t n; /* Number of returned IDs. */
+btrfs_util_deleted_subvolumes("/", &ids, &n);
+free(ids);
+```
+
+The Python binding returns a list of ints.
+
+```python
+ids = btrfsutil.deleted_subvolumes('/')
+```
+
+This function also has an `_fd` variant. It requires `CAP_SYS_ADMIN`.
+
+The closest `btrfs-progs` command is `btrfs subvolume sync`, which waits for
+deleted subvolumes to be cleaned up.
+
+#### Read-Only Flag
+
+Subvolumes can be set to read-only. `btrfs_util_get_subvolume_read_only()`
+returns whether a subvolume is read-only.
+`btrfs_util_set_subvolume_read_only()` sets the read-only flag to the desired
+value.
+
+```c
+bool read_only;
+btrfs_util_get_subvolume_read_only("/subvol", &read_only);
+btrfs_util_set_subvolume_read_only("/subvol", true);
+btrfs_util_set_subvolume_read_only("/subvol", false);
+```
+
+```python
+read_only = btrfsutil.get_subvolume_read_only('/subvol')
+btrfsutil.set_subvolume_read_only('/subvol', True)
+btrfsutil.set_subvolume_read_only('/subvol', False)
+```
+
+Both of these functions have `_fd` variants.
+
+The equivalent `btrfs-progs` commands are `btrfs property get` and `btrfs
+property set` with the `ro` property.
+
+#### Default Subvolume
+
+The default subvolume of a filesystem is the subvolume which is mounted when no
+`subvol` or `subvolid` mount option is passed.
+
+`btrfs_util_get_default_subvolume()` gets the ID of the default subvolume for
+the filesystem containing the given file.
+
+`btrfs_util_set_default_subvolume()` sets the default subvolume.
+
+```c
+uint64_t id;
+btrfs_util_get_default_subvolume("/", &id);
+btrfs_util_set_default_subvolume("/", 256);
+btrfs_util_set_default_subvolume("/subvol", 0);
+```
+
+```python
+id = btrfsutil.get_default_subvolume('/')
+btrfsutil.set_default_subvolume('/', 256)
+btrfsutil.set_default_subvolume('/subvol')  # equivalent to set_default_subvolume('/subvol', 0)
+```
+
+Both of these functions have an `_fd` variant. They both require
+`CAP_SYS_ADMIN`.
+
+The equivalent `btrfs-progs` commands are `btrfs subvolume get-default` and
+`btrfs subvolume set-default`.
+
 Development
 -----------
 
@@ -24,7 +443,8 @@ release of btrfs-progs).
 
 A few guidelines:
 
-* All interfaces must be documented in `btrfsutil.h` using the kernel-doc style
+* All interfaces must be documented in this README and in `btrfsutil.h` using
+  the kernel-doc style
 * Error codes should be specific about what _exactly_ failed
 * Functions should have a path and an fd variant whenever possible
 * Spell out terms in function names, etc. rather than abbreviating whenever
@@ -32,7 +452,7 @@ A few guidelines:
 * Don't require the Btrfs UAPI headers for any interfaces (e.g., instead of
   directly exposing a type from `linux/btrfs_tree.h`, abstract it away in a
   type specific to `libbtrfsutil`)
-* Preserve API and ABI compatability at all times (i.e., we don't want to bump
+* Preserve API and ABI compatibility at all times (i.e., we don't want to bump
   the library major version if we don't have to)
 * Include Python bindings for all interfaces
 * Write tests for all interfaces
diff --git a/libbtrfsutil/btrfs.h b/libbtrfsutil/btrfs.h
index c293f6bf..944d5013 100644
--- a/libbtrfsutil/btrfs.h
+++ b/libbtrfsutil/btrfs.h
@@ -134,7 +134,7 @@ struct btrfs_scrub_progress {
 	__u64 tree_bytes_scrubbed;	/* # of tree bytes scrubbed */
 	__u64 read_errors;		/* # of read errors encountered (EIO) */
 	__u64 csum_errors;		/* # of failed csum checks */
-	__u64 verify_errors;		/* # of occurences, where the metadata
+	__u64 verify_errors;		/* # of occurrences, where the metadata
 					 * of a tree block did not match the
 					 * expected values, like generation or
 					 * logical */
@@ -154,7 +154,7 @@ struct btrfs_scrub_progress {
 	__u64 last_physical;		/* last physical address scrubbed. In
 					 * case a scrub was aborted, this can
 					 * be used to restart the scrub */
-	__u64 unverified_errors;	/* # of occurences where a read for a
+	__u64 unverified_errors;	/* # of occurrences where a read for a
 					 * full (64k) bio failed, but the re-
 					 * check succeeded for each 4k piece.
 					 * Intermittent error. */
@@ -421,6 +421,21 @@ struct btrfs_ioctl_ino_lookup_args {
 	char name[BTRFS_INO_LOOKUP_PATH_MAX];
 };
 
+#define BTRFS_INO_LOOKUP_USER_PATH_MAX	(4080 - BTRFS_VOL_NAME_MAX - 1)
+struct btrfs_ioctl_ino_lookup_user_args {
+	/* in, inode number containing the subvolume of 'subvolid' */
+	__u64 dirid;
+	/* in */
+	__u64 treeid;
+	/* out, name of the subvolume of 'treeid' */
+	char name[BTRFS_VOL_NAME_MAX + 1];
+	/*
+	 * out, constructed path from the directory with which the ioctl is
+	 * called to dirid
+	 */
+	char path[BTRFS_INO_LOOKUP_USER_PATH_MAX];
+};
+
 /* Search criteria for the btrfs SEARCH ioctl family. */
 struct btrfs_ioctl_search_key {
 	/*
@@ -720,6 +735,82 @@ struct btrfs_ioctl_send_args {
 	__u64 reserved[4];		/* in */
 };
 
+/*
+ * Information about a fs tree root.
+ *
+ * All items are filled by the ioctl
+ */
+struct btrfs_ioctl_get_subvol_info_args {
+	/* Id of this subvolume */
+	__u64 treeid;
+
+	/* Name of this subvolume, used to get the real name at mount point */
+	char name[BTRFS_VOL_NAME_MAX + 1];
+
+	/*
+	 * Id of the subvolume which contains this subvolume.
+	 * Zero for top-level subvolume or a deleted subvolume.
+	 */
+	__u64 parent_id;
+
+	/*
+	 * Inode number of the directory which contains this subvolume.
+	 * Zero for top-level subvolume or a deleted subvolume
+	 */
+	__u64 dirid;
+
+	/* Latest transaction id of this subvolume */
+	__u64 generation;
+
+	/* Flags of this subvolume */
+	__u64 flags;
+
+	/* UUID of this subvolume */
+	__u8 uuid[BTRFS_UUID_SIZE];
+
+	/*
+	 * UUID of the subvolume of which this subvolume is a snapshot.
+	 * All zero for a non-snapshot subvolume.
+	 */
+	__u8 parent_uuid[BTRFS_UUID_SIZE];
+
+	/*
+	 * UUID of the subvolume from which this subvolume was received.
+	 * All zero for non-received subvolume.
+	 */
+	__u8 received_uuid[BTRFS_UUID_SIZE];
+
+	/* Transaction id indicating when change/create/send/receive happened */
+	__u64 ctransid;
+	__u64 otransid;
+	__u64 stransid;
+	__u64 rtransid;
+	/* Time corresponding to c/o/s/rtransid */
+	struct btrfs_ioctl_timespec ctime;
+	struct btrfs_ioctl_timespec otime;
+	struct btrfs_ioctl_timespec stime;
+	struct btrfs_ioctl_timespec rtime;
+
+	/* Must be zero */
+	__u64 reserved[8];
+};
+
+#define BTRFS_MAX_ROOTREF_BUFFER_NUM			255
+struct btrfs_ioctl_get_subvol_rootref_args {
+	/* in/out, minimum id of rootref's treeid to be searched */
+	__u64 min_treeid;
+
+	/* out */
+	struct {
+		__u64 treeid;
+		__u64 dirid;
+	} rootref[BTRFS_MAX_ROOTREF_BUFFER_NUM];
+
+	/* out, number of found items */
+	__u8 num_items;
+	__u8 align[7];
+};
+
 /* Error codes as returned by the kernel */
 enum btrfs_err_code {
 	BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
@@ -836,5 +927,11 @@ enum btrfs_err_code {
 				   struct btrfs_ioctl_feature_flags[3])
 #define BTRFS_IOC_RM_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 58, \
 				   struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_GET_SUBVOL_INFO _IOR(BTRFS_IOCTL_MAGIC, 60, \
+				struct btrfs_ioctl_get_subvol_info_args)
+#define BTRFS_IOC_GET_SUBVOL_ROOTREF _IOWR(BTRFS_IOCTL_MAGIC, 61, \
+				struct btrfs_ioctl_get_subvol_rootref_args)
+#define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 62, \
+				struct btrfs_ioctl_ino_lookup_user_args)
 
 #endif /* _LINUX_BTRFS_H */
diff --git a/libbtrfsutil/btrfs_tree.h b/libbtrfsutil/btrfs_tree.h
index f2ac0267..2af7205c 100644
--- a/libbtrfsutil/btrfs_tree.h
+++ b/libbtrfsutil/btrfs_tree.h
@@ -54,7 +54,7 @@
 /* for storing balance parameters in the root tree */
 #define BTRFS_BALANCE_OBJECTID -4ULL
 
-/* orhpan objectid for tracking unlinked/truncated files */
+/* orphan objectid for tracking unlinked/truncated files */
 #define BTRFS_ORPHAN_OBJECTID -5ULL
 
 /* does write ahead logging to speed up fsyncs */
@@ -270,7 +270,7 @@
 #define BTRFS_PERSISTENT_ITEM_KEY	249
 
 /*
- * Persistantly stores the device replace state in the device tree.
+ * Persistently stores the device replace state in the device tree.
  * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0).
  */
 #define BTRFS_DEV_REPLACE_KEY	250
diff --git a/libbtrfsutil/btrfsutil.h b/libbtrfsutil/btrfsutil.h
index 6d655f49..ad4f043e 100644
--- a/libbtrfsutil/btrfsutil.h
+++ b/libbtrfsutil/btrfsutil.h
@@ -26,7 +26,7 @@
 #include <sys/time.h>
 
 #define BTRFS_UTIL_VERSION_MAJOR 1
-#define BTRFS_UTIL_VERSION_MINOR 0
+#define BTRFS_UTIL_VERSION_MINOR 1
 #define BTRFS_UTIL_VERSION_PATCH 0
 
 #ifdef __cplusplus
@@ -63,6 +63,10 @@ enum btrfs_util_error {
 	BTRFS_UTIL_ERROR_SYNC_FAILED,
 	BTRFS_UTIL_ERROR_START_SYNC_FAILED,
 	BTRFS_UTIL_ERROR_WAIT_SYNC_FAILED,
+	BTRFS_UTIL_ERROR_GET_SUBVOL_INFO_FAILED,
+	BTRFS_UTIL_ERROR_GET_SUBVOL_ROOTREF_FAILED,
+	BTRFS_UTIL_ERROR_INO_LOOKUP_USER_FAILED,
+	BTRFS_UTIL_ERROR_FS_INFO_FAILED,
 };
 
 /**
@@ -266,7 +270,8 @@ struct btrfs_util_subvolume_info {
  * to check whether the subvolume exists; %BTRFS_UTIL_ERROR_SUBVOLUME_NOT_FOUND
  * will be returned if it does not.
  *
- * This requires appropriate privilege (CAP_SYS_ADMIN).
+ * This requires appropriate privilege (CAP_SYS_ADMIN) unless @id is zero and
+ * the kernel supports BTRFS_IOC_GET_SUBVOL_INFO (kernel >= 4.18).
  *
  * Return: %BTRFS_UTIL_OK on success, non-zero error code on failure.
  */
@@ -505,6 +510,12 @@ struct btrfs_util_subvolume_iterator;
  * @flags: Bitmask of BTRFS_UTIL_SUBVOLUME_ITERATOR_* flags.
  * @ret: Returned iterator.
  *
+ * Subvolume iterators require appropriate privilege (CAP_SYS_ADMIN) unless @top
+ * is zero and the kernel supports BTRFS_IOC_GET_SUBVOL_ROOTREF and
+ * BTRFS_IOC_INO_LOOKUP_USER (kernel >= 4.18). In this case, subvolumes which
+ * cannot be accessed (e.g., due to permissions or other mounts) will be
+ * skipped.
+ *
  * The returned iterator must be freed with
  * btrfs_util_destroy_subvolume_iterator().
  *
@@ -553,7 +564,8 @@ int btrfs_util_subvolume_iterator_fd(const struct btrfs_util_subvolume_iterator
  * Must be freed with free().
  * @id_ret: Returned subvolume ID. May be %NULL.
  *
- * This requires appropriate privilege (CAP_SYS_ADMIN).
+ * This requires appropriate privilege (CAP_SYS_ADMIN) for kernels < 4.18. See
+ * btrfs_util_create_subvolume_iterator().
  *
  * Return: %BTRFS_UTIL_OK on success, %BTRFS_UTIL_ERROR_STOP_ITERATION if there
  * are no more subvolumes, non-zero error code on failure.
@@ -572,7 +584,8 @@ enum btrfs_util_error btrfs_util_subvolume_iterator_next(struct btrfs_util_subvo
  * This convenience function basically combines
  * btrfs_util_subvolume_iterator_next() and btrfs_util_subvolume_info().
  *
- * This requires appropriate privilege (CAP_SYS_ADMIN).
+ * This requires appropriate privilege (CAP_SYS_ADMIN) for kernels < 4.18. See
+ * btrfs_util_create_subvolume_iterator().
  *
  * Return: See btrfs_util_subvolume_iterator_next().
  */
diff --git a/libbtrfsutil/errors.c b/libbtrfsutil/errors.c
index 634edc65..d39b38d0 100644
--- a/libbtrfsutil/errors.c
+++ b/libbtrfsutil/errors.c
@@ -45,6 +45,14 @@ static const char * const error_messages[] = {
 	[BTRFS_UTIL_ERROR_SYNC_FAILED] = "Could not sync filesystem",
 	[BTRFS_UTIL_ERROR_START_SYNC_FAILED] = "Could not start filesystem sync",
 	[BTRFS_UTIL_ERROR_WAIT_SYNC_FAILED] = "Could not wait for filesystem sync",
+	[BTRFS_UTIL_ERROR_GET_SUBVOL_INFO_FAILED] =
+		"Could not get subvolume information with BTRFS_IOC_GET_SUBVOL_INFO",
+	[BTRFS_UTIL_ERROR_GET_SUBVOL_ROOTREF_FAILED] =
+		"Could not get rootref information with BTRFS_IOC_GET_SUBVOL_ROOTREF",
+	[BTRFS_UTIL_ERROR_INO_LOOKUP_USER_FAILED] =
+		"Could not resolve subvolume path with BTRFS_IOC_INO_LOOKUP_USER",
+	[BTRFS_UTIL_ERROR_FS_INFO_FAILED] =
+		"Could not get filesystem information",
 };
 
 PUBLIC const char *btrfs_util_strerror(enum btrfs_util_error err)
diff --git a/libbtrfsutil/python/module.c b/libbtrfsutil/python/module.c
index 2dbdc7be..f8260c84 100644
--- a/libbtrfsutil/python/module.c
+++ b/libbtrfsutil/python/module.c
@@ -233,23 +233,28 @@ static PyMethodDef btrfsutil_methods[] = {
 	 "this ID instead of the given path"},
 	{"create_subvolume", (PyCFunction)create_subvolume,
 	 METH_VARARGS | METH_KEYWORDS,
-	 "create_subvolume(path, async=False)\n\n"
+	 "create_subvolume(path, async_=False, qgroup_inherit=None)\n\n"
 	 "Create a new subvolume.\n\n"
 	 "Arguments:\n"
 	 "path -- string, bytes, or path-like object\n"
-	 "async -- create the subvolume without waiting for it to commit to\n"
-	 "disk and return the transaction ID"},
+	 "async_ -- create the subvolume without waiting for it to commit to\n"
+	 "disk and return the transaction ID\n"
+	 "qgroup_inherit -- optional QgroupInherit object of qgroups to\n"
+	 "inherit from"},
 	{"create_snapshot", (PyCFunction)create_snapshot,
 	 METH_VARARGS | METH_KEYWORDS,
-	 "create_snapshot(source, path, recursive=False, read_only=False, async=False)\n\n"
+	 "create_snapshot(source, path, recursive=False, read_only=False,\n"
+	 "                async_=False, qgroup_inherit=None)\n\n"
 	 "Create a new snapshot.\n\n"
 	 "Arguments:\n"
 	 "source -- string, bytes, path-like object, or open file descriptor\n"
 	 "path -- string, bytes, or path-like object\n"
 	 "recursive -- also snapshot child subvolumes\n"
 	 "read_only -- create a read-only snapshot\n"
-	 "async -- create the subvolume without waiting for it to commit to\n"
-	 "disk and return the transaction ID"},
+	 "async_ -- create the subvolume without waiting for it to commit to\n"
+	 "disk and return the transaction ID\n"
+	 "qgroup_inherit -- optional QgroupInherit object of qgroups to\n"
+	 "inherit from"},
 	{"delete_subvolume", (PyCFunction)delete_subvolume,
 	 METH_VARARGS | METH_KEYWORDS,
 	 "delete_subvolume(path, recursive=False)\n\n"
diff --git a/libbtrfsutil/python/subvolume.c b/libbtrfsutil/python/subvolume.c
index 069e606b..0f893b91 100644
--- a/libbtrfsutil/python/subvolume.c
+++ b/libbtrfsutil/python/subvolume.c
@@ -322,7 +322,7 @@ PyObject *set_default_subvolume(PyObject *self, PyObject *args, PyObject *kwds)
 
 PyObject *create_subvolume(PyObject *self, PyObject *args, PyObject *kwds)
 {
-	static char *keywords[] = {"path", "async", "qgroup_inherit", NULL};
+	static char *keywords[] = {"path", "async_", "qgroup_inherit", NULL};
 	struct path_arg path = {.allow_fd = false};
 	enum btrfs_util_error err;
 	int async = 0;
@@ -352,7 +352,7 @@ PyObject *create_subvolume(PyObject *self, PyObject *args, PyObject *kwds)
 PyObject *create_snapshot(PyObject *self, PyObject *args, PyObject *kwds)
 {
 	static char *keywords[] = {
-		"source", "path", "recursive", "read_only", "async",
+		"source", "path", "recursive", "read_only", "async_",
 		"qgroup_inherit", NULL,
 	};
 	struct path_arg src = {.allow_fd = true}, dst = {.allow_fd = false};
@@ -525,7 +525,7 @@ static int SubvolumeIterator_init(SubvolumeIterator *self, PyObject *args,
 	static char *keywords[] = {"path", "top", "info", "post_order", NULL};
 	struct path_arg path = {.allow_fd = true};
 	enum btrfs_util_error err;
-	unsigned long long top = 5;
+	unsigned long long top = 0;
 	int info = 0;
 	int post_order = 0;
 	int flags = 0;
diff --git a/libbtrfsutil/python/tests/__init__.py b/libbtrfsutil/python/tests/__init__.py
index 35550e0a..9fd6f6de 100644
--- a/libbtrfsutil/python/tests/__init__.py
+++ b/libbtrfsutil/python/tests/__init__.py
@@ -15,26 +15,60 @@
 # You should have received a copy of the GNU Lesser General Public License
 # along with libbtrfsutil.  If not, see <http://www.gnu.org/licenses/>.
 
+import contextlib
 import os
 from pathlib import PurePath
+import pwd
 import subprocess
 import tempfile
 import unittest
 
 
 HAVE_PATH_LIKE = hasattr(PurePath, '__fspath__')
+try:
+    NOBODY_UID = pwd.getpwnam('nobody').pw_uid
+    skipUnlessHaveNobody = lambda func: func
+except KeyError:
+    NOBODY_UID = None
+    skipUnlessHaveNobody = unittest.skip('must have nobody user')
+
+
+@contextlib.contextmanager
+def drop_privs():
+    try:
+        os.seteuid(NOBODY_UID)
+        yield
+    finally:
+        os.seteuid(0)
+
+
+@contextlib.contextmanager
+def regain_privs():
+    uid = os.geteuid()
+    if uid:
+        try:
+            os.seteuid(0)
+            yield
+        finally:
+            os.seteuid(uid)
+    else:
+        yield
 
 
 @unittest.skipIf(os.geteuid() != 0, 'must be run as root')
 class BtrfsTestCase(unittest.TestCase):
-    def setUp(self):
-        self.mountpoint = tempfile.mkdtemp()
+    def __init__(self, *args, **kwds):
+        super().__init__(*args, **kwds)
+        self._mountpoints = []
+
+    def mount_btrfs(self):
+        mountpoint = tempfile.mkdtemp()
         try:
             with tempfile.NamedTemporaryFile(delete=False) as f:
                 os.truncate(f.fileno(), 1024 * 1024 * 1024)
-                self.image = f.name
+                image = f.name
         except Exception as e:
-            os.rmdir(self.mountpoint)
+            os.rmdir(mountpoint)
             raise e
 
         if os.path.exists('../../mkfs.btrfs'):
@@ -42,19 +76,24 @@ class BtrfsTestCase(unittest.TestCase):
         else:
             mkfs = 'mkfs.btrfs'
         try:
-            subprocess.check_call([mkfs, '-q', self.image])
-            subprocess.check_call(['mount', '-o', 'loop', '--', self.image, self.mountpoint])
+            subprocess.check_call([mkfs, '-q', image])
+            subprocess.check_call(['mount', '-o', 'loop', '--', image, mountpoint])
         except Exception as e:
-            os.remove(self.image)
-            os.rmdir(self.mountpoint)
+            os.rmdir(mountpoint)
+            os.remove(image)
             raise e
 
+        self._mountpoints.append((mountpoint, image))
+        return mountpoint, image
+
+    def setUp(self):
+        self.mountpoint, self.image = self.mount_btrfs()
+
     def tearDown(self):
-        try:
-            subprocess.check_call(['umount', self.mountpoint])
-        finally:
-            os.remove(self.image)
-            os.rmdir(self.mountpoint)
+        for mountpoint, image in self._mountpoints:
+            subprocess.call(['umount', '-R', mountpoint])
+            os.rmdir(mountpoint)
+            os.remove(image)
 
     @staticmethod
     def path_or_fd(path, open_flags=os.O_RDONLY):
@@ -67,4 +106,3 @@ class BtrfsTestCase(unittest.TestCase):
             yield fd
         finally:
             os.close(fd)
-
diff --git a/libbtrfsutil/python/tests/test_subvolume.py b/libbtrfsutil/python/tests/test_subvolume.py
index 93396cba..99ec97bc 100644
--- a/libbtrfsutil/python/tests/test_subvolume.py
+++ b/libbtrfsutil/python/tests/test_subvolume.py
@@ -20,10 +20,18 @@ import errno
 import os
 import os.path
 from pathlib import PurePath
+import subprocess
 import traceback
 
 import btrfsutil
-from tests import BtrfsTestCase, HAVE_PATH_LIKE
+from tests import (
+    BtrfsTestCase,
+    drop_privs,
+    HAVE_PATH_LIKE,
+    NOBODY_UID,
+    regain_privs,
+    skipUnlessHaveNobody,
+)
 
 
 class TestSubvolume(BtrfsTestCase):
@@ -87,7 +95,7 @@ class TestSubvolume(BtrfsTestCase):
         finally:
             os.chdir(pwd)
 
-    def test_subvolume_info(self):
+    def _test_subvolume_info(self, subvol, snapshot):
         for arg in self.path_or_fd(self.mountpoint):
             with self.subTest(type=type(arg)):
                 info = btrfsutil.subvolume_info(arg)
@@ -100,7 +108,7 @@ class TestSubvolume(BtrfsTestCase):
                 self.assertEqual(info.parent_uuid, bytes(16))
                 self.assertEqual(info.received_uuid, bytes(16))
                 self.assertNotEqual(info.generation, 0)
-                self.assertEqual(info.ctransid, 0)
+                self.assertGreaterEqual(info.ctransid, 0)
                 self.assertEqual(info.otransid, 0)
                 self.assertEqual(info.stransid, 0)
                 self.assertEqual(info.rtransid, 0)
@@ -109,9 +117,6 @@ class TestSubvolume(BtrfsTestCase):
                 self.assertEqual(info.stime, 0)
                 self.assertEqual(info.rtime, 0)
 
-        subvol = os.path.join(self.mountpoint, 'subvol')
-        btrfsutil.create_subvolume(subvol)
-
         info = btrfsutil.subvolume_info(subvol)
         self.assertEqual(info.id, 256)
         self.assertEqual(info.parent_id, 5)
@@ -132,19 +137,43 @@ class TestSubvolume(BtrfsTestCase):
         self.assertEqual(info.rtime, 0)
 
         subvol_uuid = info.uuid
-        snapshot = os.path.join(self.mountpoint, 'snapshot')
-        btrfsutil.create_snapshot(subvol, snapshot)
 
         info = btrfsutil.subvolume_info(snapshot)
         self.assertEqual(info.parent_uuid, subvol_uuid)
 
         # TODO: test received_uuid, stransid, rtransid, stime, and rtime
 
+    def test_subvolume_info(self):
+        subvol = os.path.join(self.mountpoint, 'subvol')
+        btrfsutil.create_subvolume(subvol)
+        snapshot = os.path.join(self.mountpoint, 'snapshot')
+        btrfsutil.create_snapshot(subvol, snapshot)
+
+        self._test_subvolume_info(subvol, snapshot)
+
         for arg in self.path_or_fd(self.mountpoint):
             with self.subTest(type=type(arg)):
                 with self.assertRaises(btrfsutil.BtrfsUtilError) as e:
                     # BTRFS_EXTENT_TREE_OBJECTID
                     btrfsutil.subvolume_info(arg, 2)
+                self.assertEqual(e.exception.btrfsutilerror,
+                                 btrfsutil.ERROR_SUBVOLUME_NOT_FOUND)
+
+    @skipUnlessHaveNobody
+    def test_subvolume_info_unprivileged(self):
+        subvol = os.path.join(self.mountpoint, 'subvol')
+        btrfsutil.create_subvolume(subvol)
+        snapshot = os.path.join(self.mountpoint, 'snapshot')
+        btrfsutil.create_snapshot(subvol, snapshot)
+
+        with drop_privs():
+            try:
+                self._test_subvolume_info(subvol, snapshot)
+            except OSError as e:
+                if e.errno == errno.ENOTTY:
+                    self.skipTest('BTRFS_IOC_GET_SUBVOL_INFO is not available')
+                else:
+                    raise
 
     def test_read_only(self):
         for arg in self.path_or_fd(self.mountpoint):
@@ -202,7 +231,7 @@ class TestSubvolume(BtrfsTestCase):
         btrfsutil.create_subvolume(subvol + '6//')
         self.assertTrue(btrfsutil.is_subvolume(subvol + '6'))
 
-        transid = btrfsutil.create_subvolume(subvol + '7', async=True)
+        transid = btrfsutil.create_subvolume(subvol + '7', async_=True)
         self.assertTrue(btrfsutil.is_subvolume(subvol + '7'))
         self.assertGreater(transid, 0)
 
@@ -265,7 +294,7 @@ class TestSubvolume(BtrfsTestCase):
         btrfsutil.create_snapshot(subvol, snapshot + '2', recursive=True)
         self.assertTrue(os.path.exists(os.path.join(snapshot + '2', 'nested/more_nested/nested_dir')))
 
-        transid = btrfsutil.create_snapshot(subvol, snapshot + '3', recursive=True, async=True)
+        transid = btrfsutil.create_snapshot(subvol, snapshot + '3', recursive=True, async_=True)
         self.assertTrue(os.path.exists(os.path.join(snapshot + '3', 'nested/more_nested/nested_dir')))
         self.assertGreater(transid, 0)
 
@@ -328,58 +357,136 @@ class TestSubvolume(BtrfsTestCase):
             with self.subTest(type=type(arg)):
                 self.assertEqual(btrfsutil.deleted_subvolumes(arg), [256])
 
-    def test_subvolume_iterator(self):
-        pwd = os.getcwd()
-        try:
-            os.chdir(self.mountpoint)
-            btrfsutil.create_subvolume('foo')
+    def _test_subvolume_iterator(self):
+        btrfsutil.create_subvolume('foo')
 
-            path, subvol = next(btrfsutil.SubvolumeIterator('.', info=True))
+        with btrfsutil.SubvolumeIterator('.', info=True) as it:
+            path, subvol = next(it)
             self.assertEqual(path, 'foo')
             self.assertIsInstance(subvol, btrfsutil.SubvolumeInfo)
             self.assertEqual(subvol.id, 256)
             self.assertEqual(subvol.parent_id, 5)
-
-            btrfsutil.create_subvolume('foo/bar')
-            btrfsutil.create_subvolume('foo/bar/baz')
-
-            subvols = [
-                ('foo', 256),
-                ('foo/bar', 257),
-                ('foo/bar/baz', 258),
-            ]
-
-            for arg in self.path_or_fd('.'):
-                with self.subTest(type=type(arg)):
-                    self.assertEqual(list(btrfsutil.SubvolumeIterator(arg)), subvols)
-            self.assertEqual(list(btrfsutil.SubvolumeIterator('.', top=0)), subvols)
-
-            self.assertEqual(list(btrfsutil.SubvolumeIterator('.', post_order=True)),
+            self.assertRaises(StopIteration, next, it)
+
+        btrfsutil.create_subvolume('foo/bar')
+        btrfsutil.create_subvolume('foo/bar/baz')
+
+        subvols = [
+            ('foo', 256),
+            ('foo/bar', 257),
+            ('foo/bar/baz', 258),
+        ]
+
+        for arg in self.path_or_fd('.'):
+            with self.subTest(type=type(arg)), btrfsutil.SubvolumeIterator(arg) as it:
+                self.assertEqual(list(it), subvols)
+        with btrfsutil.SubvolumeIterator('.', top=0) as it:
+            self.assertEqual(list(it), subvols)
+        if os.geteuid() == 0:
+            with btrfsutil.SubvolumeIterator('foo', top=5) as it:
+                self.assertEqual(list(it), subvols)
+
+        with btrfsutil.SubvolumeIterator('.', post_order=True) as it:
+            self.assertEqual(list(it),
                              [('foo/bar/baz', 258),
                               ('foo/bar', 257),
                               ('foo', 256)])
 
-            subvols = [
-                ('bar', 257),
-                ('bar/baz', 258),
-            ]
-
-            self.assertEqual(list(btrfsutil.SubvolumeIterator('.', top=256)), subvols)
-            self.assertEqual(list(btrfsutil.SubvolumeIterator('foo', top=0)), subvols)
-
-            os.rename('foo/bar/baz', 'baz')
-            self.assertEqual(sorted(btrfsutil.SubvolumeIterator('.')),
-                             [('baz', 258),
-                              ('foo', 256),
-                              ('foo/bar', 257)])
-
-            with btrfsutil.SubvolumeIterator('.') as it:
-                self.assertGreaterEqual(it.fileno(), 0)
-                it.close()
-                with self.assertRaises(ValueError):
-                    next(iter(it))
-                with self.assertRaises(ValueError):
-                    it.fileno()
-                it.close()
+        subvols = [
+            ('bar', 257),
+            ('bar/baz', 258),
+        ]
+
+        if os.geteuid() == 0:
+            with btrfsutil.SubvolumeIterator('.', top=256) as it:
+                self.assertEqual(list(it), subvols)
+        with btrfsutil.SubvolumeIterator('foo') as it:
+            self.assertEqual(list(it), subvols)
+        with btrfsutil.SubvolumeIterator('foo', top=0) as it:
+            self.assertEqual(list(it), subvols)
+
+        os.rename('foo/bar/baz', 'baz')
+        os.mkdir('dir')
+        btrfsutil.create_subvolume('dir/qux')
+        os.mkdir('dir/qux/dir2')
+        btrfsutil.create_subvolume('dir/qux/dir2/quux')
+
+        subvols = [
+            ('baz', 258),
+            ('dir/qux', 259),
+            ('dir/qux/dir2/quux', 260),
+            ('foo', 256),
+            ('foo/bar', 257),
+        ]
+
+        # Test various corner cases of the unprivileged implementation
+        # where we can't access the subvolume.
+        if os.geteuid() != 0:
+            with regain_privs():
+                # We don't have permission to traverse the path.
+                os.mkdir('directory_perms', 0o700)
+                btrfsutil.create_subvolume('directory_perms/subvol')
+
+                # We don't have permission to resolve the subvolume path.
+                os.mkdir('subvol_perms', 0o755)
+                btrfsutil.create_subvolume('subvol_perms/subvol')
+                os.chmod('subvol_perms/subvol', 0o700)
+
+                # The path doesn't exist.
+                os.mkdir('enoent', 0o755)
+                btrfsutil.create_subvolume('enoent/subvol')
+                subprocess.check_call(['mount', '-t', 'tmpfs', 'tmpfs', 'enoent'])
+
+                # The path exists but it's not a subvolume.
+                os.mkdir('notsubvol', 0o755)
+                btrfsutil.create_subvolume('notsubvol/subvol')
+                subprocess.check_call(['mount', '-t', 'tmpfs', 'tmpfs', 'notsubvol'])
+                os.mkdir('notsubvol/subvol')
+
+                # The path exists and is a subvolume, but on a different
+                # filesystem.
+                os.mkdir('wrongfs', 0o755)
+                btrfsutil.create_subvolume('wrongfs/subvol')
+                other_mountpoint, _ = self.mount_btrfs()
+                subprocess.check_call(['mount', '--bind', '--',
+                                       other_mountpoint, 'wrongfs/subvol'])
+
+                # The path exists and is a subvolume on the same
+                # filesystem, but not the right one.
+                os.mkdir('wrongsubvol', 0o755)
+                btrfsutil.create_subvolume('wrongsubvol/subvol')
+                subprocess.check_call(['mount', '--bind', 'baz', 'wrongsubvol/subvol'])
+
+
+        with btrfsutil.SubvolumeIterator('.') as it:
+            self.assertEqual(sorted(it), subvols)
+        with btrfsutil.SubvolumeIterator('.', post_order=True) as it:
+            self.assertEqual(sorted(it), subvols)
+
+        with btrfsutil.SubvolumeIterator('.') as it:
+            self.assertGreaterEqual(it.fileno(), 0)
+            it.close()
+            with self.assertRaises(ValueError):
+                next(iter(it))
+            with self.assertRaises(ValueError):
+                it.fileno()
+            it.close()
+
+    def test_subvolume_iterator(self):
+        pwd = os.getcwd()
+        try:
+            os.chdir(self.mountpoint)
+            self._test_subvolume_iterator()
+        finally:
+            os.chdir(pwd)
+
+    @skipUnlessHaveNobody
+    def test_subvolume_iterator_unprivileged(self):
+        os.chown(self.mountpoint, NOBODY_UID, -1)
+        pwd = os.getcwd()
+        try:
+            os.chdir(self.mountpoint)
+            with drop_privs():
+                self._test_subvolume_iterator()
         finally:
             os.chdir(pwd)
diff --git a/libbtrfsutil/stubs.c b/libbtrfsutil/stubs.c
index 9b9e037f..c530e408 100644
--- a/libbtrfsutil/stubs.c
+++ b/libbtrfsutil/stubs.c
@@ -19,6 +19,7 @@
 
 #include <stdlib.h>
 #include <errno.h>
+#include "stubs.h"
 
 void *reallocarray(void *ptr, size_t nmemb, size_t size)
 {
diff --git a/libbtrfsutil/subvolume.c b/libbtrfsutil/subvolume.c
index 867b3e10..60ab9f9d 100644
--- a/libbtrfsutil/subvolume.c
+++ b/libbtrfsutil/subvolume.c
@@ -31,6 +31,11 @@
 
 #include "btrfsutil_internal.h"
 
+static bool is_root(void)
+{
+	return geteuid() == 0;
+}
+
 /*
  * This intentionally duplicates btrfs_util_is_subvolume_fd() instead of opening
  * a file descriptor and calling it, because fstat() and fstatfs() don't accept
@@ -295,8 +300,8 @@ PUBLIC enum btrfs_util_error btrfs_util_subvolume_info(const char *path,
 	return err;
 }
 
-PUBLIC enum btrfs_util_error btrfs_util_subvolume_info_fd(int fd, uint64_t id,
-							  struct btrfs_util_subvolume_info *subvol)
+static enum btrfs_util_error get_subvolume_info_privileged(int fd, uint64_t id,
+							   struct btrfs_util_subvolume_info *subvol)
 {
 	struct btrfs_ioctl_search_args search = {
 		.key = {
@@ -310,27 +315,10 @@ PUBLIC enum btrfs_util_error btrfs_util_subvolume_info_fd(int fd, uint64_t id,
 			.nr_items = 0,
 		},
 	};
-	enum btrfs_util_error err;
 	size_t items_pos = 0, buf_off = 0;
 	bool need_root_item = true, need_root_backref = true;
 	int ret;
 
-	if (id == 0) {
-		err = btrfs_util_is_subvolume_fd(fd);
-		if (err)
-			return err;
-
-		err = btrfs_util_subvolume_id_fd(fd, &id);
-		if (err)
-			return err;
-	}
-
-	if ((id < BTRFS_FIRST_FREE_OBJECTID && id != BTRFS_FS_TREE_OBJECTID) ||
-	    id > BTRFS_LAST_FREE_OBJECTID) {
-		errno = ENOENT;
-		return BTRFS_UTIL_ERROR_SUBVOLUME_NOT_FOUND;
-	}
-
 	search.key.min_objectid = search.key.max_objectid = id;
 
 	if (subvol) {
@@ -400,6 +388,72 @@ PUBLIC enum btrfs_util_error btrfs_util_subvolume_info_fd(int fd, uint64_t id,
 	return BTRFS_UTIL_OK;
 }
 
+static enum btrfs_util_error get_subvolume_info_unprivileged(int fd,
+							     struct btrfs_util_subvolume_info *subvol)
+{
+	struct btrfs_ioctl_get_subvol_info_args info;
+	int ret;
+
+	ret = ioctl(fd, BTRFS_IOC_GET_SUBVOL_INFO, &info);
+	if (ret == -1)
+		return BTRFS_UTIL_ERROR_GET_SUBVOL_INFO_FAILED;
+
+	subvol->id = info.treeid;
+	subvol->parent_id = info.parent_id;
+	subvol->dir_id = info.dirid;
+	subvol->flags = info.flags;
+	subvol->generation = info.generation;
+
+	memcpy(subvol->uuid, info.uuid, sizeof(subvol->uuid));
+	memcpy(subvol->parent_uuid, info.parent_uuid,
+	       sizeof(subvol->parent_uuid));
+	memcpy(subvol->received_uuid, info.received_uuid,
+	       sizeof(subvol->received_uuid));
+
+	subvol->ctransid = info.ctransid;
+	subvol->otransid = info.otransid;
+	subvol->stransid = info.stransid;
+	subvol->rtransid = info.rtransid;
+
+	subvol->ctime.tv_sec = info.ctime.sec;
+	subvol->ctime.tv_nsec = info.ctime.nsec;
+	subvol->otime.tv_sec = info.otime.sec;
+	subvol->otime.tv_nsec = info.otime.nsec;
+	subvol->stime.tv_sec = info.stime.sec;
+	subvol->stime.tv_nsec = info.stime.nsec;
+	subvol->rtime.tv_sec = info.rtime.sec;
+	subvol->rtime.tv_nsec = info.rtime.nsec;
+
+	return BTRFS_UTIL_OK;
+}
+
+PUBLIC enum btrfs_util_error btrfs_util_subvolume_info_fd(int fd, uint64_t id,
+							  struct btrfs_util_subvolume_info *subvol)
+{
+	enum btrfs_util_error err;
+
+	if (id == 0) {
+		err = btrfs_util_is_subvolume_fd(fd);
+		if (err)
+			return err;
+
+		if (!is_root())
+			return get_subvolume_info_unprivileged(fd, subvol);
+
+		err = btrfs_util_subvolume_id_fd(fd, &id);
+		if (err)
+			return err;
+	}
+
+	if ((id < BTRFS_FIRST_FREE_OBJECTID && id != BTRFS_FS_TREE_OBJECTID) ||
+	    id > BTRFS_LAST_FREE_OBJECTID) {
+		errno = ENOENT;
+		return BTRFS_UTIL_ERROR_SUBVOLUME_NOT_FOUND;
+	}
+
+	return get_subvolume_info_privileged(fd, id, subvol);
+}
+
 PUBLIC enum btrfs_util_error btrfs_util_get_subvolume_read_only_fd(int fd,
 								   bool *read_only_ret)
 {
@@ -695,13 +749,28 @@ PUBLIC enum btrfs_util_error btrfs_util_create_subvolume_fd(int parent_fd,
 #define BTRFS_UTIL_SUBVOLUME_ITERATOR_CLOSE_FD (1 << 30)
 
 struct search_stack_entry {
-	struct btrfs_ioctl_search_args search;
-	size_t items_pos, buf_off;
+	union {
+		/* Used for subvolume_iterator_next_tree_search(). */
+		struct {
+			struct btrfs_ioctl_search_args search;
+			size_t buf_off;
+		};
+		/* Used for subvolume_iterator_next_unprivileged(). */
+		struct {
+			uint64_t id;
+			struct btrfs_ioctl_get_subvol_rootref_args rootref_args;
+		};
+	};
+	/* Used for both. */
+	size_t items_pos;
 	size_t path_len;
 };
 
 struct btrfs_util_subvolume_iterator {
+	bool use_tree_search;
 	int fd;
+	/* cur_fd is only used for subvolume_iterator_next_unprivileged(). */
+	int cur_fd;
 	int flags;
 
 	struct search_stack_entry *search_stack;
@@ -712,6 +781,58 @@ struct btrfs_util_subvolume_iterator {
 	size_t cur_path_capacity;
 };
 
+static struct search_stack_entry *top_search_stack_entry(struct btrfs_util_subvolume_iterator *iter)
+{
+	return &iter->search_stack[iter->search_stack_len - 1];
+}
+
+/*
+ * Check that a path that we opened is the subvolume which we expect. It may not
+ * be if there is another filesystem mounted over a parent directory or the
+ * subvolume itself.
+ */
+static enum btrfs_util_error check_expected_subvolume(int fd, int parent_fd,
+						      uint64_t tree_id)
+{
+	struct btrfs_ioctl_fs_info_args parent_fs_info, fs_info;
+	enum btrfs_util_error err;
+	uint64_t id;
+	int ret;
+
+	/* Make sure it's a subvolume. */
+	err = btrfs_util_is_subvolume_fd(fd);
+	if (err == BTRFS_UTIL_ERROR_NOT_BTRFS ||
+	    err == BTRFS_UTIL_ERROR_NOT_SUBVOLUME) {
+		errno = ENOENT;
+		return BTRFS_UTIL_ERROR_SUBVOLUME_NOT_FOUND;
+	} else if (err) {
+		return err;
+	}
+
+	/* Make sure it's on the same filesystem. */
+	ret = ioctl(parent_fd, BTRFS_IOC_FS_INFO, &parent_fs_info);
+	if (ret == -1)
+		return BTRFS_UTIL_ERROR_FS_INFO_FAILED;
+	ret = ioctl(fd, BTRFS_IOC_FS_INFO, &fs_info);
+	if (ret == -1)
+		return BTRFS_UTIL_ERROR_FS_INFO_FAILED;
+	if (memcmp(parent_fs_info.fsid, fs_info.fsid, sizeof(fs_info.fsid)) != 0) {
+		errno = ENOENT;
+		return BTRFS_UTIL_ERROR_SUBVOLUME_NOT_FOUND;
+	}
+
+	/* Make sure it's the subvolume that we expected. */
+	err = btrfs_util_subvolume_id_fd(fd, &id);
+	if (err)
+		return err;
+	if (id != tree_id) {
+		errno = ENOENT;
+		return BTRFS_UTIL_ERROR_SUBVOLUME_NOT_FOUND;
+	}
+
+	return BTRFS_UTIL_OK;
+}
+
 static enum btrfs_util_error append_to_search_stack(struct btrfs_util_subvolume_iterator *iter,
 						    uint64_t tree_id,
 						    size_t path_len)
@@ -732,24 +853,84 @@ static enum btrfs_util_error append_to_search_stack(struct btrfs_util_subvolume_
 		iter->search_stack = new_search_stack;
 	}
 
-	entry = &iter->search_stack[iter->search_stack_len++];
+	entry = &iter->search_stack[iter->search_stack_len];
 
-	memset(&entry->search, 0, sizeof(entry->search));
-	entry->search.key.tree_id = BTRFS_ROOT_TREE_OBJECTID;
-	entry->search.key.min_objectid = tree_id;
-	entry->search.key.max_objectid = tree_id;
-	entry->search.key.min_type = BTRFS_ROOT_REF_KEY;
-	entry->search.key.max_type = BTRFS_ROOT_REF_KEY;
-	entry->search.key.min_offset = 0;
-	entry->search.key.max_offset = UINT64_MAX;
-	entry->search.key.min_transid = 0;
-	entry->search.key.max_transid = UINT64_MAX;
-	entry->search.key.nr_items = 0;
+	memset(entry, 0, sizeof(*entry));
+	entry->path_len = path_len;
+	if (iter->use_tree_search) {
+		entry->search.key.tree_id = BTRFS_ROOT_TREE_OBJECTID;
+		entry->search.key.min_objectid = tree_id;
+		entry->search.key.max_objectid = tree_id;
+		entry->search.key.min_type = BTRFS_ROOT_REF_KEY;
+		entry->search.key.max_type = BTRFS_ROOT_REF_KEY;
+		entry->search.key.min_offset = 0;
+		entry->search.key.max_offset = UINT64_MAX;
+		entry->search.key.min_transid = 0;
+		entry->search.key.max_transid = UINT64_MAX;
+		entry->search.key.nr_items = 0;
+	} else {
+		entry->id = tree_id;
 
-	entry->items_pos = 0;
-	entry->buf_off = 0;
+		if (iter->search_stack_len) {
+			struct search_stack_entry *top;
+			enum btrfs_util_error err;
+			char *path;
+			int fd;
 
-	entry->path_len = path_len;
+			top = top_search_stack_entry(iter);
+			path = &iter->cur_path[top->path_len];
+			if (*path == '/')
+				path++;
+			fd = openat(iter->cur_fd, path, O_RDONLY);
+			if (fd == -1)
+				return BTRFS_UTIL_ERROR_OPEN_FAILED;
+
+			err = check_expected_subvolume(fd, iter->cur_fd,
+						       tree_id);
+			if (err) {
+				close(fd);
+				return err;
+			}
+
+			close(iter->cur_fd);
+			iter->cur_fd = fd;
+		}
+	}
+
+	iter->search_stack_len++;
+
+	return BTRFS_UTIL_OK;
+}
+
+static enum btrfs_util_error pop_search_stack(struct btrfs_util_subvolume_iterator *iter)
+{
+	struct search_stack_entry *top, *parent;
+	int fd, parent_fd;
+	size_t i;
+
+	if (iter->use_tree_search || iter->search_stack_len == 1) {
+		iter->search_stack_len--;
+		return BTRFS_UTIL_OK;
+	}
+
+	top = top_search_stack_entry(iter);
+	iter->search_stack_len--;
+	parent = top_search_stack_entry(iter);
+
+	fd = iter->cur_fd;
+	for (i = parent->path_len; i < top->path_len; i++) {
+		if (i == 0 || iter->cur_path[i] == '/') {
+			parent_fd = openat(fd, "..", O_RDONLY);
+			if (fd != iter->cur_fd)
+				SAVE_ERRNO_AND_CLOSE(fd);
+			if (parent_fd == -1)
+				return BTRFS_UTIL_ERROR_OPEN_FAILED;
+			fd = parent_fd;
+		}
+	}
+	if (iter->cur_fd != iter->fd)
+		close(iter->cur_fd);
+	iter->cur_fd = fd;
 
 	return BTRFS_UTIL_OK;
 }
@@ -782,12 +963,14 @@ PUBLIC enum btrfs_util_error btrfs_util_create_subvolume_iterator_fd(int fd,
 {
 	struct btrfs_util_subvolume_iterator *iter;
 	enum btrfs_util_error err;
+	bool use_tree_search;
 
 	if (flags & ~BTRFS_UTIL_SUBVOLUME_ITERATOR_MASK) {
 		errno = EINVAL;
 		return BTRFS_UTIL_ERROR_INVALID_ARGUMENT;
 	}
 
+	use_tree_search = top != 0 || is_root();
 	if (top == 0) {
 		err = btrfs_util_is_subvolume_fd(fd);
 		if (err)
@@ -803,7 +986,9 @@ PUBLIC enum btrfs_util_error btrfs_util_create_subvolume_iterator_fd(int fd,
 		return BTRFS_UTIL_ERROR_NO_MEMORY;
 
 	iter->fd = fd;
+	iter->cur_fd = fd;
 	iter->flags = flags;
+	iter->use_tree_search = use_tree_search;
 
 	iter->search_stack_len = 0;
 	iter->search_stack_capacity = 4;
@@ -1112,6 +1297,8 @@ PUBLIC void btrfs_util_destroy_subvolume_iterator(struct btrfs_util_subvolume_it
 	if (iter) {
 		free(iter->cur_path);
 		free(iter->search_stack);
+		if (iter->cur_fd != iter->fd)
+			SAVE_ERRNO_AND_CLOSE(iter->cur_fd);
 		if (iter->flags & BTRFS_UTIL_SUBVOLUME_ITERATOR_CLOSE_FD)
 			SAVE_ERRNO_AND_CLOSE(iter->fd);
 		free(iter);
@@ -1123,32 +1310,14 @@ PUBLIC int btrfs_util_subvolume_iterator_fd(const struct btrfs_util_subvolume_it
 	return iter->fd;
 }
 
-static struct search_stack_entry *top_search_stack_entry(struct btrfs_util_subvolume_iterator *iter)
-{
-	return &iter->search_stack[iter->search_stack_len - 1];
-}
-
 static enum btrfs_util_error build_subvol_path(struct btrfs_util_subvolume_iterator *iter,
-					       const struct btrfs_ioctl_search_header *header,
-					       const struct btrfs_root_ref *ref,
-					       const char *name,
+					       const char *name, size_t name_len,
+					       const char *dir, size_t dir_len,
 					       size_t *path_len_ret)
 {
-	struct btrfs_ioctl_ino_lookup_args lookup = {
-		.treeid = header->objectid,
-		.objectid = le64_to_cpu(ref->dirid),
-	};
 	struct search_stack_entry *top = top_search_stack_entry(iter);
-	size_t dir_len, name_len, path_len;
+	size_t path_len;
 	char *p;
-	int ret;
-
-	ret = ioctl(iter->fd, BTRFS_IOC_INO_LOOKUP, &lookup);
-	if (ret == -1)
-		return BTRFS_UTIL_ERROR_INO_LOOKUP_FAILED;
-
-	dir_len = strlen(lookup.name);
-	name_len = le16_to_cpu(ref->name_len);
 
 	path_len = top->path_len;
 	/*
@@ -1166,33 +1335,75 @@ static enum btrfs_util_error build_subvol_path(struct btrfs_util_subvolume_itera
 		path_len++;
 	path_len += name_len;
 
-	if (path_len > iter->cur_path_capacity) {
-		char *tmp = realloc(iter->cur_path, path_len);
+	/* We need one extra character for the NUL terminator. */
+	if (path_len + 1 > iter->cur_path_capacity) {
+		char *tmp = realloc(iter->cur_path, path_len + 1);
 
 		if (!tmp)
 			return BTRFS_UTIL_ERROR_NO_MEMORY;
 		iter->cur_path = tmp;
-		iter->cur_path_capacity = path_len;
+		iter->cur_path_capacity = path_len + 1;
 	}
 
 	p = iter->cur_path + top->path_len;
 	if (top->path_len && dir_len)
 		*p++ = '/';
-	memcpy(p, lookup.name, dir_len);
+	memcpy(p, dir, dir_len);
 	p += dir_len;
 	if (top->path_len && !dir_len && name_len)
 		*p++ = '/';
 	memcpy(p, name, name_len);
 	p += name_len;
+	*p = '\0';
 
 	*path_len_ret = path_len;
 
 	return BTRFS_UTIL_OK;
 }
 
-PUBLIC enum btrfs_util_error btrfs_util_subvolume_iterator_next(struct btrfs_util_subvolume_iterator *iter,
-								char **path_ret,
-								uint64_t *id_ret)
+static enum btrfs_util_error build_subvol_path_privileged(struct btrfs_util_subvolume_iterator *iter,
+							  const struct btrfs_ioctl_search_header *header,
+							  const struct btrfs_root_ref *ref,
+							  const char *name,
+							  size_t *path_len_ret)
+{
+	struct btrfs_ioctl_ino_lookup_args lookup = {
+		.treeid = header->objectid,
+		.objectid = le64_to_cpu(ref->dirid),
+	};
+	int ret;
+
+	ret = ioctl(iter->fd, BTRFS_IOC_INO_LOOKUP, &lookup);
+	if (ret == -1)
+		return BTRFS_UTIL_ERROR_INO_LOOKUP_FAILED;
+
+	return build_subvol_path(iter, name, le16_to_cpu(ref->name_len),
+				 lookup.name, strlen(lookup.name),
+				 path_len_ret);
+}
+
+static enum btrfs_util_error build_subvol_path_unprivileged(struct btrfs_util_subvolume_iterator *iter,
+							    uint64_t treeid,
+							    uint64_t dirid,
+							    size_t *path_len_ret)
+{
+	struct btrfs_ioctl_ino_lookup_user_args args = {
+		.treeid = treeid,
+		.dirid = dirid,
+	};
+	int ret;
+
+	ret = ioctl(iter->cur_fd, BTRFS_IOC_INO_LOOKUP_USER, &args);
+	if (ret == -1)
+		return BTRFS_UTIL_ERROR_INO_LOOKUP_USER_FAILED;
+
+	return build_subvol_path(iter, args.name, strlen(args.name),
+				 args.path, strlen(args.path), path_len_ret);
+}
+
+static enum btrfs_util_error subvolume_iterator_next_tree_search(struct btrfs_util_subvolume_iterator *iter,
+								 char **path_ret,
+								 uint64_t *id_ret)
 {
 	struct search_stack_entry *top;
 	const struct btrfs_ioctl_search_header *header;
@@ -1219,7 +1430,10 @@ PUBLIC enum btrfs_util_error btrfs_util_subvolume_iterator_next(struct btrfs_uti
 				top->buf_off = 0;
 
 				if (top->search.key.nr_items == 0) {
-					iter->search_stack_len--;
+					/*
+					 * This never fails for use_tree_search.
+					 */
+					pop_search_stack(iter);
 					if ((iter->flags & BTRFS_UTIL_SUBVOLUME_ITERATOR_POST_ORDER) &&
 					    iter->search_stack_len)
 						goto out;
@@ -1239,7 +1453,8 @@ PUBLIC enum btrfs_util_error btrfs_util_subvolume_iterator_next(struct btrfs_uti
 
 		ref = (struct btrfs_root_ref *)(header + 1);
 		name = (const char *)(ref + 1);
-		err = build_subvol_path(iter, header, ref, name, &path_len);
+		err = build_subvol_path_privileged(iter, header, ref, name,
+						   &path_len);
 		if (err)
 			return err;
 
@@ -1266,6 +1481,100 @@ out:
 	return BTRFS_UTIL_OK;
 }
 
+static enum btrfs_util_error subvolume_iterator_next_unprivileged(struct btrfs_util_subvolume_iterator *iter,
+								  char **path_ret,
+								  uint64_t *id_ret)
+{
+	struct search_stack_entry *top;
+	uint64_t treeid, dirid;
+	enum btrfs_util_error err;
+	size_t path_len;
+	int ret;
+
+	for (;;) {
+		for (;;) {
+			if (iter->search_stack_len == 0)
+				return BTRFS_UTIL_ERROR_STOP_ITERATION;
+
+			top = top_search_stack_entry(iter);
+			if (top->items_pos < top->rootref_args.num_items) {
+				break;
+			} else {
+				ret = ioctl(iter->cur_fd,
+					    BTRFS_IOC_GET_SUBVOL_ROOTREF,
+					    &top->rootref_args);
+				if (ret == -1 && errno != EOVERFLOW)
+					return BTRFS_UTIL_ERROR_GET_SUBVOL_ROOTREF_FAILED;
+				top->items_pos = 0;
+
+				if (top->rootref_args.num_items == 0) {
+					err = pop_search_stack(iter);
+					if (err)
+						return err;
+					if ((iter->flags & BTRFS_UTIL_SUBVOLUME_ITERATOR_POST_ORDER) &&
+					    iter->search_stack_len)
+						goto out;
+				}
+			}
+		}
+
+		treeid = top->rootref_args.rootref[top->items_pos].treeid;
+		dirid = top->rootref_args.rootref[top->items_pos].dirid;
+		top->items_pos++;
+		err = build_subvol_path_unprivileged(iter, treeid, dirid,
+						     &path_len);
+		if (err) {
+			/* Skip the subvolume if we can't access it. */
+			if (errno == EACCES)
+				continue;
+			return err;
+		}
+
+		err = append_to_search_stack(iter, treeid, path_len);
+		if (err) {
+			/*
+			 * Skip the subvolume if it does not exist (which can
+			 * happen if there is another filesystem mounted over a
+			 * parent directory) or we don't have permission to
+			 * access it.
+			 */
+			if (errno == ENOENT || errno == EACCES)
+				continue;
+			return err;
+		}
+
+		if (!(iter->flags & BTRFS_UTIL_SUBVOLUME_ITERATOR_POST_ORDER)) {
+			top = top_search_stack_entry(iter);
+			goto out;
+		}
+	}
+
+out:
+	if (path_ret) {
+		*path_ret = malloc(top->path_len + 1);
+		if (!*path_ret)
+			return BTRFS_UTIL_ERROR_NO_MEMORY;
+		memcpy(*path_ret, iter->cur_path, top->path_len);
+		(*path_ret)[top->path_len] = '\0';
+	}
+	if (id_ret)
+		*id_ret = top->id;
+	return BTRFS_UTIL_OK;
+}
+
+PUBLIC enum btrfs_util_error btrfs_util_subvolume_iterator_next(struct btrfs_util_subvolume_iterator *iter,
+								char **path_ret,
+								uint64_t *id_ret)
+{
+	if (iter->use_tree_search) {
+		return subvolume_iterator_next_tree_search(iter, path_ret,
+							   id_ret);
+	} else {
+		return subvolume_iterator_next_unprivileged(iter, path_ret,
+							    id_ret);
+	}
+}
+
 PUBLIC enum btrfs_util_error btrfs_util_subvolume_iterator_next_info(struct btrfs_util_subvolume_iterator *iter,
 								     char **path_ret,
 								     struct btrfs_util_subvolume_info *subvol)
@@ -1277,7 +1586,10 @@ PUBLIC enum btrfs_util_error btrfs_util_subvolume_iterator_next_info(struct btrf
 	if (err)
 		return err;
 
-	return btrfs_util_subvolume_info_fd(iter->fd, id, subvol);
+	if (iter->use_tree_search)
+		return btrfs_util_subvolume_info_fd(iter->fd, id, subvol);
+	else
+		return btrfs_util_subvolume_info_fd(iter->cur_fd, 0, subvol);
 }
 
 PUBLIC enum btrfs_util_error btrfs_util_deleted_subvolumes(const char *path,
diff --git a/messages.h b/messages.h
index 2b97aa08..ec7d9338 100644
--- a/messages.h
+++ b/messages.h
@@ -61,9 +61,9 @@
 		PRINT_TRACE_ON_ERROR;					\
 		PRINT_VERBOSE_ERROR;					\
 		if (lib_str && strcmp(errno_str, lib_str) != 0)		\
-			__btrfs_error("%s: %s", lib_str, errno_str);	\
+			__btrfs_error("%s: %m", lib_str);		\
 		else							\
-			__btrfs_error("%s", errno_str);			\
+			__btrfs_error("%m");				\
 		DO_ABORT_ON_ERROR;					\
 	} while (0)
 
diff --git a/mkfs/common.c b/mkfs/common.c
index 0ace262b..f7e3badc 100644
--- a/mkfs/common.c
+++ b/mkfs/common.c
@@ -714,7 +714,8 @@ int test_dev_for_mkfs(const char *file, int force_overwrite)
 
 	ret = is_swap_device(file);
 	if (ret < 0) {
-		error("checking status of %s: %s", file, strerror(-ret));
+		errno = -ret;
+		error("checking status of %s: %m", file);
 		return 1;
 	}
 	if (ret == 1) {
@@ -760,8 +761,8 @@ int test_status_for_mkfs(const char *file, bool force_overwrite)
 	}
 	ret = check_mounted(file);
 	if (ret < 0) {
-		error("cannot check mount status of %s: %s", file,
-				strerror(-ret));
+		errno = -ret;
+		error("cannot check mount status of %s: %m", file);
 		return 1;
 	}
 	if (ret == 1) {
diff --git a/mkfs/main.c b/mkfs/main.c
index b76462a7..b6748f7f 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -1094,7 +1094,8 @@ int main(int argc, char **argv)
 
 	ret = make_btrfs(fd, &mkfs_cfg);
 	if (ret) {
-		error("error during mkfs: %s", strerror(-ret));
+		errno = -ret;
+		error("error during mkfs: %m");
 		goto error;
 	}
 
diff --git a/mkfs/rootdir.c b/mkfs/rootdir.c
index c430bac6..356467be 100644
--- a/mkfs/rootdir.c
+++ b/mkfs/rootdir.c
@@ -237,8 +237,7 @@ static int add_xattr_item(struct btrfs_trans_handle *trans,
 	if (ret < 0) {
 		if (errno == ENOTSUP)
 			return 0;
-		error("getting a list of xattr failed for %s: %s", file_name,
-				strerror(errno));
+		error("getting a list of xattr failed for %s: %m", file_name);
 		return ret;
 	}
 	if (ret == 0)
@@ -253,8 +252,8 @@ static int add_xattr_item(struct btrfs_trans_handle *trans,
 		if (ret < 0) {
 			if (errno == ENOTSUP)
 				return 0;
-			error("getting a xattr value failed for %s attr %s: %s",
-				file_name, cur_name, strerror(errno));
+			error("getting a xattr value failed for %s attr %s: %m",
+				file_name, cur_name);
 			return ret;
 		}
 
@@ -262,8 +261,9 @@ static int add_xattr_item(struct btrfs_trans_handle *trans,
 					      cur_name_len, cur_value,
 					      ret, objectid);
 		if (ret) {
-			error("inserting a xattr item failed for %s: %s",
-					file_name, strerror(-ret));
+			errno = -ret;
+			error("inserting a xattr item failed for %s: %m",
+					file_name);
 		}
 
 		cur_name = strtok(next_location, &delimiter);
@@ -281,7 +281,7 @@ static int add_symbolic_link(struct btrfs_trans_handle *trans,
 
 	ret = readlink(path_name, buf, sizeof(buf));
 	if (ret <= 0) {
-		error("readlink failed for %s: %s", path_name, strerror(errno));
+		error("readlink failed for %s: %m", path_name);
 		goto fail;
 	}
 	if (ret >= sizeof(buf)) {
@@ -320,7 +320,7 @@ static int add_file_items(struct btrfs_trans_handle *trans,
 
 	fd = open(path_name, O_RDONLY);
 	if (fd == -1) {
-		error("cannot open %s: %s", path_name, strerror(errno));
+		error("cannot open %s: %m", path_name);
 		return ret;
 	}
 
@@ -339,10 +339,9 @@ static int add_file_items(struct btrfs_trans_handle *trans,
 
 		ret_read = pread64(fd, buffer, st->st_size, bytes_read);
 		if (ret_read == -1) {
-			error("cannot read %s at offset %llu length %llu: %s",
+			error("cannot read %s at offset %llu length %llu: %m",
 				path_name, (unsigned long long)bytes_read,
-				(unsigned long long)st->st_size,
-				strerror(errno));
+				(unsigned long long)st->st_size);
 			free(buffer);
 			goto end;
 		}
@@ -388,11 +387,10 @@ again:
 		ret_read = pread64(fd, eb->data, sectorsize, file_pos +
 				   bytes_read);
 		if (ret_read == -1) {
-			error("cannot read %s at offset %llu length %llu: %s",
+			error("cannot read %s at offset %llu length %llu: %m",
 				path_name,
 				(unsigned long long)file_pos + bytes_read,
-				(unsigned long long)sectorsize,
-				strerror(errno));
+				(unsigned long long)sectorsize);
 			goto end;
 		}
 
@@ -467,7 +465,7 @@ static int traverse_directory(struct btrfs_trans_handle *trans,
 	dir_entry->dir_name = dir_name;
 	dir_entry->path = realpath(dir_name, NULL);
 	if (!dir_entry->path) {
-		error("realpath failed for %s: %s", dir_name, strerror(errno));
+		error("realpath failed for %s: %m", dir_name);
 		ret = -1;
 		goto fail_no_dir;
 	}
@@ -506,8 +504,8 @@ static int traverse_directory(struct btrfs_trans_handle *trans,
 		parent_inum = parent_dir_entry->inum;
 		parent_dir_name = parent_dir_entry->dir_name;
 		if (chdir(parent_dir_entry->path)) {
-			error("chdir failed for %s: %s",
-				parent_dir_name, strerror(errno));
+			error("chdir failed for %s: %m",
+				parent_dir_name);
 			ret = -1;
 			goto fail_no_files;
 		}
@@ -515,8 +513,8 @@ static int traverse_directory(struct btrfs_trans_handle *trans,
 		count = scandir(parent_dir_entry->path, &files,
 				directory_select, NULL);
 		if (count == -1) {
-			error("scandir failed for %s: %s",
-				parent_dir_name, strerror(errno));
+			error("scandir failed for %s: %m",
+				parent_dir_name);
 			ret = -1;
 			goto fail;
 		}
@@ -525,8 +523,8 @@ static int traverse_directory(struct btrfs_trans_handle *trans,
 			cur_file = files[i];
 
 			if (lstat(cur_file->d_name, &st) == -1) {
-				error("lstat failed for %s: %s",
-					cur_file->d_name, strerror(errno));
+				error("lstat failed for %s: %m",
+					cur_file->d_name);
 				ret = -1;
 				goto fail;
 			}
@@ -648,7 +646,7 @@ int btrfs_mkfs_fill_dir(const char *source_dir, struct btrfs_root *root,
 
 	ret = lstat(source_dir, &root_st);
 	if (ret) {
-		error("unable to lstat %s: %s", source_dir, strerror(errno));
+		error("unable to lstat %s: %m", source_dir);
 		ret = -errno;
 		goto out;
 	}
@@ -726,8 +724,8 @@ u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
 	u64 meta_threshold = SZ_8M;
 	u64 data_threshold = SZ_8M;
 
-	float data_multipler = 1;
-	float meta_multipler = 1;
+	float data_multiplier = 1;
+	float meta_multiplier = 1;
 
 	fs_block_size = sectorsize;
 	ftw_data_size = 0;
@@ -739,8 +737,7 @@ u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
 	 */
 	ret = nftw(dir_name, ftw_add_entry_size, 10, FTW_PHYS);
 	if (ret < 0) {
-		error("ftw subdir walk of %s failed: %s", dir_name,
-			strerror(errno));
+		error("ftw subdir walk of %s failed: %m", dir_name);
 		exit(1);
 	}
 
@@ -766,11 +763,11 @@ u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
 	/* Minimal chunk size from btrfs_alloc_chunk(). */
 	if (meta_profile & BTRFS_BLOCK_GROUP_DUP) {
 		meta_threshold = SZ_32M;
-		meta_multipler = 2;
+		meta_multiplier = 2;
 	}
 	if (data_profile & BTRFS_BLOCK_GROUP_DUP) {
 		data_threshold = SZ_64M;
-		data_multipler = 2;
+		data_multiplier = 2;
 	}
 
 	/*
@@ -780,10 +777,10 @@ u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
 	 */
 	if (meta_size > meta_threshold)
 		meta_chunk_size = (round_up(meta_size, meta_threshold) -
-				   meta_threshold) * meta_multipler;
+				   meta_threshold) * meta_multiplier;
 	if (ftw_data_size > data_threshold)
 		data_chunk_size = (round_up(ftw_data_size, data_threshold) -
-				   data_threshold) * data_multipler;
+				   data_threshold) * data_multiplier;
 
 	total_size = data_chunk_size + meta_chunk_size + min_dev_size;
 	return total_size;
@@ -862,8 +859,8 @@ static int set_device_size(struct btrfs_fs_info *fs_info,
 	trans = btrfs_start_transaction(chunk_root, 1);
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
-		error("failed to start transaction: %d (%s)", ret,
-			strerror(-ret));
+		errno = -ret;
+		error("failed to start transaction: %d (%m)", ret);
 		return ret;
 	}
 	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
@@ -891,9 +888,10 @@ static int set_device_size(struct btrfs_fs_info *fs_info,
 	 * super->dev_item
 	 */
 	ret = btrfs_commit_transaction(trans, chunk_root);
-	if (ret < 0)
-		error("failed to commit current transaction: %d (%s)",
-			ret, strerror(-ret));
+	if (ret < 0) {
+		errno = -ret;
+		error("failed to commit current transaction: %d (%m)", ret);
+	}
 	btrfs_release_path(&path);
 	return ret;
 
@@ -928,8 +926,8 @@ int btrfs_mkfs_shrink_fs(struct btrfs_fs_info *fs_info, u64 *new_size_ret,
 
 	ret = get_device_extent_end(fs_info, 1, &new_size);
 	if (ret < 0) {
-		error("failed to get minimal device size: %d (%s)",
-			ret, strerror(-ret));
+		errno = -ret;
+		error("failed to get minimal device size: %d (%m)", ret);
 		return ret;
 	}
 
@@ -946,16 +944,15 @@ int btrfs_mkfs_shrink_fs(struct btrfs_fs_info *fs_info, u64 *new_size_ret,
 	if (shrink_file_size) {
 		ret = fstat64(device->fd, &file_stat);
 		if (ret < 0) {
-			error("failed to stat devid %llu: %s", device->devid,
-				strerror(errno));
+			error("failed to stat devid %llu: %m", device->devid);
 			return ret;
 		}
 		if (!S_ISREG(file_stat.st_mode))
 			return ret;
 		ret = ftruncate64(device->fd, new_size);
 		if (ret < 0) {
-			error("failed to truncate device file of devid %llu: %s",
-				device->devid, strerror(errno));
+			error("failed to truncate device file of devid %llu: %m",
+				device->devid);
 			return ret;
 		}
 	}
diff --git a/print-tree.c b/print-tree.c
index dfa7bb6b..ab774637 100644
--- a/print-tree.c
+++ b/print-tree.c
@@ -357,9 +357,9 @@ static void print_file_extent_item(struct extent_buffer *eb,
 			extent_type, file_extent_type_to_str(extent_type));
 
 	if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
-		printf("\t\tinline extent data size %u ram_bytes %u compression %hhu (%s)\n",
+		printf("\t\tinline extent data size %u ram_bytes %llu compression %hhu (%s)\n",
 				btrfs_file_extent_inline_item_len(eb, item),
-				btrfs_file_extent_inline_len(eb, slot, fi),
+				btrfs_file_extent_ram_bytes(eb, fi),
 				btrfs_file_extent_compression(eb, fi),
 				compress_str);
 		return;
@@ -887,7 +887,7 @@ static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
 })
 
 /*
- * Caller should ensure sizeof(*ret) >= 102: all charactors plus '|' of
+ * Caller should ensure sizeof(*ret) >= 102: all characters plus '|' of
  * BTRFS_INODE_* flags
  */
 static void inode_flags_to_str(u64 flags, char *ret)
@@ -1381,7 +1381,111 @@ void btrfs_print_leaf(struct extent_buffer *eb)
 	}
 }
 
-void btrfs_print_tree(struct extent_buffer *eb, int follow)
+/* Helper function to reach the leftmost tree block at @path->lowest_level */
+static int search_leftmost_tree_block(struct btrfs_fs_info *fs_info,
+				      struct btrfs_path *path, int root_level)
+{
+	int i;
+	int ret = 0;
+
+	/* Release all nodes except path->nodes[root_level] */
+	for (i = 0; i < root_level; i++) {
+		path->slots[i] = 0;
+		if (!path->nodes[i])
+			continue;
+		free_extent_buffer(path->nodes[i]);
+	}
+
+	/* Reach the leftmost tree block by always reading out slot 0 */
+	for (i = root_level; i > path->lowest_level; i--) {
+		struct extent_buffer *eb;
+
+		path->slots[i] = 0;
+		eb = read_node_slot(fs_info, path->nodes[i], 0);
+		if (!extent_buffer_uptodate(eb)) {
+			ret = -EIO;
+			goto out;
+		}
+		path->nodes[i - 1] = eb;
+	}
+out:
+	return ret;
+}
+
+static void bfs_print_children(struct extent_buffer *root_eb)
+{
+	struct btrfs_fs_info *fs_info = root_eb->fs_info;
+	struct btrfs_path path;
+	int root_level = btrfs_header_level(root_eb);
+	int cur_level;
+	int ret;
+
+	if (root_level < 1)
+		return;
+
+	btrfs_init_path(&path);
+	/* For path */
+	extent_buffer_get(root_eb);
+	path.nodes[root_level] = root_eb;
+
+	for (cur_level = root_level - 1; cur_level >= 0; cur_level--) {
+		path.lowest_level = cur_level;
+
+		/* Use the leftmost tree block as a starting point */
+		ret = search_leftmost_tree_block(fs_info, &path, root_level);
+		if (ret < 0)
+			goto out;
+
+		/* Print all sibling tree blocks */
+		while (1) {
+			btrfs_print_tree(path.nodes[cur_level], 0,
+					 BTRFS_PRINT_TREE_BFS);
+			ret = btrfs_next_sibling_tree_block(fs_info, &path);
+			if (ret < 0)
+				goto out;
+			if (ret > 0) {
+				ret = 0;
+				break;
+			}
+		}
+	}
+out:
+	btrfs_release_path(&path);
+	return;
+}
+
+static void dfs_print_children(struct extent_buffer *root_eb)
+{
+	struct btrfs_fs_info *fs_info = root_eb->fs_info;
+	struct extent_buffer *next;
+	int nr = btrfs_header_nritems(root_eb);
+	int root_eb_level = btrfs_header_level(root_eb);
+	int i;
+
+	for (i = 0; i < nr; i++) {
+		next = read_tree_block(fs_info, btrfs_node_blockptr(root_eb, i),
+				btrfs_node_ptr_generation(root_eb, i));
+		if (!extent_buffer_uptodate(next)) {
+			fprintf(stderr, "failed to read %llu in tree %llu\n",
+				btrfs_node_blockptr(root_eb, i),
+				btrfs_header_owner(root_eb));
+			continue;
+		}
+		if (btrfs_header_level(next) != root_eb_level - 1) {
+			warning(
+"eb corrupted: parent bytenr %llu slot %d level %d child bytenr %llu level has %d expect %d, skipping the slot",
+				btrfs_header_bytenr(root_eb), i, root_eb_level,
+				btrfs_header_bytenr(next),
+				btrfs_header_level(next), root_eb_level - 1);
+			free_extent_buffer(next);
+			continue;
+		}
+		btrfs_print_tree(next, 1, BTRFS_PRINT_TREE_DFS);
+		free_extent_buffer(next);
+	}
+}
+
+void btrfs_print_tree(struct extent_buffer *eb, bool follow, int traverse)
 {
 	u32 i;
 	u32 nr;
@@ -1389,10 +1493,12 @@ void btrfs_print_tree(struct extent_buffer *eb, int follow)
 	struct btrfs_fs_info *fs_info = eb->fs_info;
 	struct btrfs_disk_key disk_key;
 	struct btrfs_key key;
-	struct extent_buffer *next;
 
 	if (!eb)
 		return;
+	if (traverse != BTRFS_PRINT_TREE_DFS && traverse != BTRFS_PRINT_TREE_BFS)
+		traverse = BTRFS_PRINT_TREE_DEFAULT;
+
 	nr = btrfs_header_nritems(eb);
 	if (btrfs_is_leaf(eb)) {
 		btrfs_print_leaf(eb);
@@ -1420,9 +1526,8 @@ void btrfs_print_tree(struct extent_buffer *eb, int follow)
 		btrfs_disk_key_to_cpu(&key, &disk_key);
 		printf("\t");
 		btrfs_print_key(&disk_key);
-		printf(" block %llu (%llu) gen %llu\n",
+		printf(" block %llu gen %llu\n",
 		       (unsigned long long)blocknr,
-		       (unsigned long long)blocknr / eb->len,
 		       (unsigned long long)btrfs_node_ptr_generation(eb, i));
 		fflush(stdout);
 	}
@@ -1432,30 +1537,9 @@ void btrfs_print_tree(struct extent_buffer *eb, int follow)
 	if (follow && !fs_info)
 		return;
 
-	for (i = 0; i < nr; i++) {
-		next = read_tree_block(fs_info,
-				btrfs_node_blockptr(eb, i),
-				btrfs_node_ptr_generation(eb, i));
-		if (!extent_buffer_uptodate(next)) {
-			fprintf(stderr, "failed to read %llu in tree %llu\n",
-				(unsigned long long)btrfs_node_blockptr(eb, i),
-				(unsigned long long)btrfs_header_owner(eb));
-			continue;
-		}
-		if (btrfs_header_level(next) != btrfs_header_level(eb) - 1) {
-			warning(
-"eb corrupted: parent bytenr %llu slot %d level %d child bytenr %llu level has %d expect %d, skipping the slot",
-				btrfs_header_bytenr(eb), i,
-				btrfs_header_level(eb),
-				btrfs_header_bytenr(next),
-				btrfs_header_level(next),
-				btrfs_header_level(eb) - 1);
-			free_extent_buffer(next);
-			continue;
-		}
-		btrfs_print_tree(next, 1);
-		free_extent_buffer(next);
-	}
-
+	if (traverse == BTRFS_PRINT_TREE_DFS)
+		dfs_print_children(eb);
+	else
+		bfs_print_children(eb);
 	return;
 }
diff --git a/print-tree.h b/print-tree.h
index 62667d7f..d4721b60 100644
--- a/print-tree.h
+++ b/print-tree.h
@@ -20,7 +20,20 @@
 #define __PRINT_TREE_H__
 
 void btrfs_print_leaf(struct extent_buffer *l);
-void btrfs_print_tree(struct extent_buffer *t, int follow);
+
+/*
+ * Print a tree block (applies to both node and leaf).
+ *
+ * @eb:		Tree block
+ * @follow:	Set true to print all its children.
+ * @traverse:	The traverse order. Support DFS and BFS.
+ *		Will fallback to DFS for unknown order.
+ */
+#define BTRFS_PRINT_TREE_DFS		0
+#define BTRFS_PRINT_TREE_BFS		1
+#define BTRFS_PRINT_TREE_DEFAULT	BTRFS_PRINT_TREE_DFS
+void btrfs_print_tree(struct extent_buffer *eb, bool follow, int traverse);
+
 void btrfs_print_key(struct btrfs_disk_key *disk_key);
 void print_chunk_item(struct extent_buffer *eb, struct btrfs_chunk *chunk);
 void print_extent_item(struct extent_buffer *eb, int slot, int metadata);
diff --git a/props.c b/props.c
index e4edba06..efa11180 100644
--- a/props.c
+++ b/props.c
@@ -110,7 +110,7 @@ static int prop_compression(enum prop_object_type type,
 	fd = open_file_or_dir3(object, &dirstream, open_flags);
 	if (fd == -1) {
 		ret = -errno;
-		error("failed to open %s: %s", object, strerror(-ret));
+		error("failed to open %s: %m", object);
 		goto out;
 	}
 
@@ -133,8 +133,8 @@ static int prop_compression(enum prop_object_type type,
 	if (sret < 0) {
 		ret = -errno;
 		if (ret != -ENOATTR)
-			error("failed to %s compression for %s: %s",
-			      value ? "set" : "get", object, strerror(-ret));
+			error("failed to %s compression for %s: %m",
+			      value ? "set" : "get", object);
 		else
 			ret = 0;
 		goto out;
@@ -150,8 +150,7 @@ static int prop_compression(enum prop_object_type type,
 		sret = fgetxattr(fd, xattr_name, buf, len);
 		if (sret < 0) {
 			ret = -errno;
-			error("failed to get compression for %s: %s",
-			      object, strerror(-ret));
+			error("failed to get compression for %s: %m", object);
 			goto out;
 		}
 		fprintf(stdout, "compression=%.*s\n", (int)len, buf);
diff --git a/qgroup-verify.c b/qgroup-verify.c
index e2332be2..3f2ce14a 100644
--- a/qgroup-verify.c
+++ b/qgroup-verify.c
@@ -34,6 +34,13 @@
 
 #include "qgroup-verify.h"
 
+static u64 *qgroup_item_count;
+
+void qgroup_set_item_count_ptr(u64 *item_count_ptr)
+{
+	qgroup_item_count = item_count_ptr;
+}
+
 /*#define QGROUP_VERIFY_DEBUG*/
 static unsigned long tot_extents_scanned = 0;
 
@@ -77,6 +84,7 @@ static struct counts_tree {
 	unsigned int		num_groups;
 	unsigned int		rescan_running:1;
 	unsigned int		qgroup_inconsist:1;
+	u64			scan_progress;
 } counts = { .root = RB_ROOT };
 
 static LIST_HEAD(bad_qgroups);
@@ -735,6 +743,7 @@ static int travel_tree(struct btrfs_fs_info *info, struct btrfs_root *root,
 	 */
 	nr = btrfs_header_nritems(eb);
 	for (i = 0; i < nr; i++) {
+		(*qgroup_item_count)++;
 		new_bytenr = btrfs_node_blockptr(eb, i);
 		new_num_bytes = info->nodesize;
 
@@ -914,6 +923,7 @@ static void read_qgroup_status(struct extent_buffer *eb, int slot,
 	counts->qgroup_inconsist = !!(flags &
 			BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT);
 	counts->rescan_running = !!(flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN);
+	counts->scan_progress = btrfs_qgroup_status_rescan(eb, status_item);
 }
 
 static int load_quota_info(struct btrfs_fs_info *info)
@@ -1311,6 +1321,7 @@ int report_qgroups(int all)
 	struct rb_node *node;
 	struct qgroup_count *c;
 	bool found_err = false;
+	bool skip_err = false;
 
 	if (!repair && counts.rescan_running) {
 		if (all) {
@@ -1322,6 +1333,15 @@ int report_qgroups(int all)
 			return 0;
 		}
 	}
+	/*
+	 * It's possible that rescan hasn't been initialized yet.
+	 */
+	if (counts.qgroup_inconsist && !counts.rescan_running &&
+	    counts.rescan_running == 0) {
+		printf(
+"Rescan hasn't been initialized, a difference in qgroup accounting is expected\n");
+		skip_err = true;
+	}
 	if (counts.qgroup_inconsist && !counts.rescan_running)
 		fprintf(stderr, "Qgroup are marked as inconsistent.\n");
 	node = rb_first(&counts.root);
@@ -1335,7 +1355,7 @@ int report_qgroups(int all)
 
 		node = rb_next(node);
 	}
-	if (found_err)
+	if (found_err && !skip_err)
 		return -EUCLEAN;
 	return 0;
 }
diff --git a/qgroup-verify.h b/qgroup-verify.h
index 14d36bbf..20e93708 100644
--- a/qgroup-verify.h
+++ b/qgroup-verify.h
@@ -30,4 +30,6 @@ int print_extent_state(struct btrfs_fs_info *info, u64 subvol);
 
 void free_qgroup_counts(void);
 
+void qgroup_set_item_count_ptr(u64 *item_count_ptr);
+
 #endif
diff --git a/qgroup.c b/qgroup.c
index 267cd7f1..a035579e 100644
--- a/qgroup.c
+++ b/qgroup.c
@@ -267,7 +267,7 @@ static void print_single_qgroup_table(struct btrfs_qgroup *qgroup)
 			continue;
 		print_qgroup_column(qgroup, i);
 
-		if (i != BTRFS_QGROUP_CHILD)
+		if (i != BTRFS_QGROUP_ALL - 1)
 			printf(" ");
 	}
 	printf("\n");
@@ -610,8 +610,9 @@ static struct btrfs_qgroup *get_or_add_qgroup(
 
 	ret = qgroup_tree_insert(qgroup_lookup, bq);
 	if (ret) {
-		error("failed to insert %llu into tree: %s",
-		       (unsigned long long)bq->qgroupid, strerror(-ret));
+		errno = -ret;
+		error("failed to insert %llu into tree: %m",
+		       (unsigned long long)bq->qgroupid);
 		free(bq);
 		return ERR_PTR(ret);
 	}
@@ -1072,8 +1073,7 @@ static int __qgroups_search(int fd, struct qgroup_lookup *qgroup_lookup)
 				error("can't list qgroups: quotas not enabled");
 				ret = -ENOTTY;
 			} else {
-				error("can't list qgroups: %s",
-				       strerror(errno));
+				error("can't list qgroups: %m");
 				ret = -errno;
 			}
 
diff --git a/quick-test.c b/quick-test.c
index 5da47c32..1712adc4 100644
--- a/quick-test.c
+++ b/quick-test.c
@@ -94,7 +94,7 @@ int main(int ac, char **av) {
 			fprintf(stderr, "search %d:%d\n", num, i);
 		ret = btrfs_search_slot(NULL, root, &ins, &path, 0, 0);
 		if (ret) {
-			btrfs_print_tree(root, root->node, 1);
+			btrfs_print_tree(root->node, 1);
 			printf("unable to find %d\n", num);
 			exit(1);
 		}
@@ -171,7 +171,7 @@ int main(int ac, char **av) {
 			fprintf(stderr, "search %d:%d\n", num, i);
 		ret = btrfs_search_slot(NULL, root, &ins, &path, 0, 0);
 		if (ret) {
-			btrfs_print_tree(root, root->node, 1);
+			btrfs_print_tree(root->node, 1);
 			printf("unable to find %d\n", num);
 			exit(1);
 		}
@@ -220,7 +220,7 @@ int main(int ac, char **av) {
 	btrfs_commit_transaction(trans, root);
 	printf("tree size is now %d\n", tree_size);
 	printf("root %p commit root %p\n", root->node, root->commit_root);
-	btrfs_print_tree(root, root->node, 1);
+	btrfs_print_tree(root->node, 1);
 	close_ctree(root);
 	return 0;
 }
diff --git a/send-dump.c b/send-dump.c
index 1591e0cc..11236444 100644
--- a/send-dump.c
+++ b/send-dump.c
@@ -47,7 +47,7 @@
 })
 
 /*
- * Print path and escape chaacters (in a C way) that could break the line.
+ * Print path and escape characters (in a C way) that could break the line.
  * Returns the length of the escaped characters. Unprintable characters are
  * escaped as octals.
  */
diff --git a/send-stream.c b/send-stream.c
index 78f2571a..cb942b86 100644
--- a/send-stream.c
+++ b/send-stream.c
@@ -35,7 +35,7 @@ struct btrfs_send_stream {
 
 	/*
 	 * end of last successful read, equivalent to start of current
-	 * malformated part of block
+	 * malformed part of block
 	 */
 	size_t stream_pos;
 
@@ -61,8 +61,7 @@ static int read_buf(struct btrfs_send_stream *sctx, char *buf, size_t len)
 		rbytes = read(sctx->fd, buf + pos, len - pos);
 		if (rbytes < 0) {
 			ret = -errno;
-			error("read from stream failed: %s",
-					strerror(-ret));
+			error("read from stream failed: %m");
 			goto out;
 		}
 		if (rbytes == 0) {
diff --git a/send-utils.c b/send-utils.c
index 3ecbdea6..31ac9a7a 100644
--- a/send-utils.c
+++ b/send-utils.c
@@ -40,8 +40,7 @@ static int btrfs_get_root_id_by_sub_path(int mnt_fd, const char *sub_path,
 	subvol_fd = openat(mnt_fd, sub_path, O_RDONLY);
 	if (subvol_fd < 0) {
 		ret = -errno;
-		fprintf(stderr, "ERROR: open %s failed. %s\n", sub_path,
-			strerror(-ret));
+		fprintf(stderr, "ERROR: open %s failed: %m\n", sub_path);
 		return ret;
 	}
 
diff --git a/super-recover.c b/super-recover.c
index 880fd771..86b3df98 100644
--- a/super-recover.c
+++ b/super-recover.c
@@ -292,9 +292,6 @@ int btrfs_recover_superblocks(const char *dname,
 no_recover:
 	recover_err_str(ret);
 	free_recover_superblock(&recover);
-	/* check if we have freed fs_devices in close_ctree() */
-	if (!root)
-		btrfs_close_devices(recover.fs_devices);
 	return ret;
 }
 
diff --git a/task-utils.c b/task-utils.c
index 12b00027..e4dcd36b 100644
--- a/task-utils.c
+++ b/task-utils.c
@@ -19,6 +19,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
+#include <time.h>
 
 #include "task-utils.h"
 
@@ -37,7 +38,7 @@ struct task_info *task_init(void *(*threadfn)(void *), int (*postfn)(void *),
 	return info;
 }
 
-int task_start(struct task_info *info)
+int task_start(struct task_info *info, time_t *start_time, u64 *item_count)
 {
 	int ret;
 
@@ -47,6 +48,11 @@ int task_start(struct task_info *info)
 	if (!info->threadfn)
 		return -1;
 
+	if (start_time)
+		*start_time = time(NULL);
+	if (item_count)
+		*item_count = 0;
+
 	ret = pthread_create(&info->id, NULL, info->threadfn,
 			     info->private_data);
 
@@ -61,7 +67,7 @@ void task_stop(struct task_info *info)
 	if (!info)
 		return;
 
-	if (info->id > 0) {
+	if (info->id) {
 		pthread_cancel(info->id);
 		pthread_join(info->id, NULL);
 		info->id = 0;
@@ -102,7 +108,7 @@ int task_period_start(struct task_info *info, unsigned int period_ms)
 	info->periodic.wakeups_missed = 0;
 
 	sec = period_ms / 1000;
-	ns = (period_ms - (sec * 1000)) * 1000;
+	ns = (period_ms - (sec * 1000)) * 1000 * 1000;
 	itval.it_interval.tv_sec = sec;
 	itval.it_interval.tv_nsec = ns;
 	itval.it_value.tv_sec = sec;
diff --git a/task-utils.h b/task-utils.h
index 91d5a646..bbb0f1fd 100644
--- a/task-utils.h
+++ b/task-utils.h
@@ -17,6 +17,7 @@
 #ifndef __TASK_UTILS_H__
 #define __TASK_UTILS_H__
 
+#include "kerncompat.h"
 #include <pthread.h>
 
 struct periodic_info {
@@ -35,7 +36,7 @@ struct task_info {
 /* task life cycle */
 struct task_info *task_init(void *(*threadfn)(void *), int (*postfn)(void *),
 			    void *thread_private);
-int task_start(struct task_info *info);
+int task_start(struct task_info *info, time_t *start_time, u64 *item_count);
 void task_stop(struct task_info *info);
 void task_deinit(struct task_info *info);
 
diff --git a/tests/README.md b/tests/README.md
index d14d727c..94c5533d 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -170,7 +170,7 @@ how to do mkfs, mount, unmount, check, loop device management etc.
 and join by dashes `-`. This will become the directory name, eg. `012-subvolume-sync-must-wait`.
 
 3. Write a short description of the bug and how it's tested to the comment at the
-begining of `test.sh`. You don't need to add the file to git yet. Don't forget
+beginning of `test.sh`. You don't need to add the file to git yet. Don't forget
 to make the file executable, otherwise it's not going to be executed by the
 infrastructure.
 
@@ -194,7 +194,7 @@ $ TEST=012\* ./misc-tests.sh           # from tests/
 
 Most tests should be able to create the test images from scratch, using regular
 commands and file operation. The commands also document the testcase and use
-the teste code and kernel of the environment.
+the test code and kernel of the environment.
 
 In other cases, a pre-created image may be the right way if the above does not
 work (eg. comparing output, requesting an exact layout or some intermediate
@@ -244,7 +244,7 @@ There are some utilities that are not distributed but are necessary for the
 tests. They are in the top level directory of the testsuite and their path
 cannot be set.
 
-The tests assume write acesss to their directories.
+The tests assume write access to their directories.
 
 
 # Coding style, best practices
diff --git a/tests/cli-tests/007-check-force/test.sh b/tests/cli-tests/007-check-force/test.sh
index 597f2d60..deee96c8 100755
--- a/tests/cli-tests/007-check-force/test.sh
+++ b/tests/cli-tests/007-check-force/test.sh
@@ -10,7 +10,7 @@ check_prereq btrfs
 setup_root_helper
 
 # we need to use a real block device, because the check opens the device in
-# exclusive mode, that unfortunatelly behaves differently for direct file
+# exclusive mode, that unfortunately behaves differently for direct file
 # access and for the real /dev/loop0 device
 setup_loopdevs 1
 prepare_loopdevs
diff --git a/tests/common b/tests/common
index 7e4e09df..1575ae38 100644
--- a/tests/common
+++ b/tests/common
@@ -171,7 +171,7 @@ run_check_stdout()
 }
 
 # same as run_check but does not fail the test if it's handled gracefully by
-# the tool, unexpected failure like segfault or abor will exit forcibly
+# the tool, unexpected failure like segfault or abort will exit forcibly
 # output is logged
 run_mayfail()
 {
diff --git a/tests/fsck-tests/020-extent-ref-cases/keyed_data_ref_with_reloc_leaf.img b/tests/fsck-tests/020-extent-ref-cases/keyed_data_ref_with_reloc_leaf.img
new file mode 100644
index 00000000..80345cf9
--- /dev/null
+++ b/tests/fsck-tests/020-extent-ref-cases/keyed_data_ref_with_reloc_leaf.img
diff --git a/tests/fsck-tests/025-file-extents/test.sh b/tests/fsck-tests/025-file-extents/test.sh
index 95707596..db066ee1 100755
--- a/tests/fsck-tests/025-file-extents/test.sh
+++ b/tests/fsck-tests/025-file-extents/test.sh
@@ -15,7 +15,7 @@ prepare_test_dev 128M
 
 # Do some write into a large prealloc range
 # Lowmem mode can report missing csum due to wrong csum range
-test_paritical_write_into_prealloc()
+test_partial_write_into_prealloc()
 {
 	run_check $SUDO_HELPER "$TOP/mkfs.btrfs" -f "$TEST_DEV"
 	run_check_mount_test_dev
@@ -55,6 +55,6 @@ test_hole_extent_with_no_holes_flag()
 	run_check "$TOP/btrfs" check "$TEST_DEV"
 }
 
-test_paritical_write_into_prealloc
+test_partial_write_into_prealloc
 test_compressed_inline_extent
 test_hole_extent_with_no_holes_flag
diff --git a/tests/fsck-tests/031-metadatadump-check-data-csum/test.sh b/tests/fsck-tests/031-metadatadump-check-data-csum/test.sh
index e9b2d5c6..7f3872e6 100755
--- a/tests/fsck-tests/031-metadatadump-check-data-csum/test.sh
+++ b/tests/fsck-tests/031-metadatadump-check-data-csum/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# To check if "btrfs check" can detect metadata dump (restored by btrfs-iamge)
+# To check if "btrfs check" can detect metadata dump (restored by btrfs-image)
 # and ignore --check-data-csum option
 
 source "$TEST_TOP/common"
@@ -21,7 +21,7 @@ chmod a+w restored_image
 run_check $SUDO_HELPER "$TOP/btrfs-image" "$TEST_DEV" "restored_image"
 
 # use prepare_test_dev() to wipe all existing data on $TEST_DEV
-# so there is no way that restored image could have mathcing data csum
+# so there is no way that restored image could have matching data csum
 prepare_test_dev
 
 run_check $SUDO_HELPER "$TOP/btrfs-image" -r "restored_image" "$TEST_DEV"
diff --git a/tests/fsck-tests/035-inline-bad-ram-bytes/offset_by_one.img b/tests/fsck-tests/035-inline-bad-ram-bytes/offset_by_one.img
new file mode 100644
index 00000000..2f58208e
--- /dev/null
+++ b/tests/fsck-tests/035-inline-bad-ram-bytes/offset_by_one.img
diff --git a/tests/fsck-tests/035-inline-bad-ram-bytes/test.sh b/tests/fsck-tests/035-inline-bad-ram-bytes/test.sh
new file mode 100755
index 00000000..6f6e2a5e
--- /dev/null
+++ b/tests/fsck-tests/035-inline-bad-ram-bytes/test.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# Around 2014, btrfs kernel has a regression that create inline extent
+# with ram_bytes offset by one.
+# This old regression could be caught by tree-check code.
+# This test case will check if btrfs check could detect and repair it.
+
+source "$TEST_TOP/common"
+
+check_prereq btrfs
+
+check_all_images
diff --git a/tests/fsck-tests/036-rescan-not-kicked-in/no_rescan_kicked_in.img b/tests/fsck-tests/036-rescan-not-kicked-in/no_rescan_kicked_in.img
new file mode 100644
index 00000000..b855a72f
--- /dev/null
+++ b/tests/fsck-tests/036-rescan-not-kicked-in/no_rescan_kicked_in.img
diff --git a/tests/fsck-tests/036-rescan-not-kicked-in/test.sh b/tests/fsck-tests/036-rescan-not-kicked-in/test.sh
new file mode 100755
index 00000000..eafd599d
--- /dev/null
+++ b/tests/fsck-tests/036-rescan-not-kicked-in/test.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# Under certain power loss case, btrfs quota tree can be initialized but
+# rescan not kicked in. Can be also reproduced by fstests/btrfs/166 but with
+# low probability.
+#
+# This test case verifies a special case when 'btrfs check' does not report
+# qgroup accounting difference as an error, thus no false alert for btrfs/166.
+
+source "$TEST_TOP/common"
+
+check_prereq btrfs
+
+check_image() {
+	run_check "$TOP/btrfs" check "$1"
+}
+
+check_all_images
diff --git a/tests/fsck-tests/037-freespacetree-repair/test.sh b/tests/fsck-tests/037-freespacetree-repair/test.sh
new file mode 100755
index 00000000..261d7ccb
--- /dev/null
+++ b/tests/fsck-tests/037-freespacetree-repair/test.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+# Corrupt a filesystem that is using freespace tree and then ensure that
+# btrfs check is able to repair it. This tests correct detection/repair of
+# both a FREE_SPACE_EXTENT based FST and a FREE_SPACE_BITMAP based FST.
+
+source "$TEST_TOP/common"
+
+setup_root_helper
+prepare_test_dev 256M
+
+check_prereq btrfs
+check_prereq mkfs.btrfs
+check_global_prereq grep
+check_global_prereq tail
+check_global_prereq head
+check_global_prereq cut
+
+# wrapper for btrfs-corrupt-item
+# $1: Type of item we want to corrupt - extent or bitmap
+corrupt_fst_item()
+{
+	local type
+	local objectid
+	local offset
+	type="$1"
+
+	if [[ $type == "bitmap" ]]; then
+		type=200
+		objectid=$("$TOP/btrfs" inspect-internal dump-tree -t 10 "$TEST_DEV" | \
+			grep -o "[[:digit:]]* FREE_SPACE_BITMAP [[:digit:]]*" | \
+			cut -d' ' -f1 | tail -2 | head -1)
+		offset=$("$TOP/btrfs" inspect-internal dump-tree -t 10 "$TEST_DEV" | \
+			grep -o "[[:digit:]]* FREE_SPACE_BITMAP [[:digit:]]*" | \
+			cut -d' ' -f3 | tail -2 | head -1)
+		echo "Corrupting $objectid,FREE_SPACE_BITMAP,$offset" >> "$RESULTS"
+	elif [[ $type == "extent" ]]; then
+		type=199
+		objectid=$("$TOP/btrfs" inspect-internal dump-tree -t 10 "$TEST_DEV" | \
+			grep -o "[[:digit:]]* FREE_SPACE_EXTENT [[:digit:]]*" | \
+			cut -d' ' -f1 | tail -2 | head -1)
+		offset=$("$TOP/btrfs" inspect-internal dump-tree -t 10 "$TEST_DEV" | \
+			grep -o "[[:digit:]]* FREE_SPACE_EXTENT [[:digit:]]*" | \
+			cut -d' ' -f3 | tail -2 | head -1)
+		echo "Corrupting $objectid,FREE_SPACE_EXTENT,$offset" >> "$RESULTS"
+	else
+		_fail "Unknown item type for corruption"
+	fi
+
+	run_check "$TOP/btrfs-corrupt-block" -r 10 -K "$objectid,$type,$offset" \
+		-f offset "$TEST_DEV"
+}
+
+if ! [ -f "/sys/fs/btrfs/features/free_space_tree" ]; then
+	_not_run "kernel does not support free-space-tree feature"
+	exit
+fi
+
+run_check "$TOP/mkfs.btrfs" -n 4k -f "$TEST_DEV"
+run_check_mount_test_dev -oclear_cache,space_cache=v2
+
+# create files which will populate the FST
+for i in {1..3000}; do
+	run_check $SUDO_HELPER fallocate -l 4k "$TEST_MNT/file.$i"
+done
+
+run_check_umount_test_dev
+
+# now corrupt one of the bitmap items
+corrupt_fst_item "bitmap"
+check_image "$TEST_DEV"
+
+# change the freespace such that we now have at least one free_space_extent
+# object
+run_check_mount_test_dev
+rm -rf "$TEST_MNT/file.*"
+run_check $SUDO_HELPER fallocate -l 50m "$TEST_MNT/file"
+run_check_umount_test_dev
+
+# now corrupt an extent
+corrupt_fst_item "extent"
+check_image "$TEST_DEV"
diff --git a/tests/fuzz-tests/images/bko-199833-reloc-recovery-crash.raw.xz b/tests/fuzz-tests/images/bko-199833-reloc-recovery-crash.raw.xz
new file mode 100644
index 00000000..7d291041
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-199833-reloc-recovery-crash.raw.xz
diff --git a/tests/fuzz-tests/images/bko-199833-reloc-recovery-crash.txt b/tests/fuzz-tests/images/bko-199833-reloc-recovery-crash.txt
new file mode 100644
index 00000000..a54992ee
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-199833-reloc-recovery-crash.txt
@@ -0,0 +1,113 @@
+URL: https://bugzilla.kernel.org/show_bug.cgi?id=199833
+Wen Xu 2018-05-26 02:27:26 UTC
+
+The (compressed) crafted image which causes crash
+
+- Overview
+Invalid pointer dereference in __del_reloc_root() when mounting a crafted btrfs image
+
+- Reproduce 
+# mkdir mnt
+# mount -t btrfs 82.img mnt
+(Reproduced on Linux 4.17-rc5)
+
+- Comment
+https://elixir.bootlin.com/linux/v4.17-rc5/source/fs/btrfs/relocation.c#L1324
+
+static void __del_reloc_root(struct btrfs_root *root)
+{
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct rb_node *rb_node;
+	struct mapping_node *node = NULL;
+	struct reloc_control *rc = fs_info->reloc_ctl;
+
+	spin_lock(&rc->reloc_root_tree.lock);
+
+rc can be NULL, which means that reloc_ctl may be not initialized
+
+- Kernel message
+[  208.623313] BUG: unable to handle kernel NULL pointer dereference at 0000000000000570
+[  208.624890] PGD 80000001e9495067 P4D 80000001e9495067 PUD 1f0d81067 PMD 0
+[  208.626285] Oops: 0002 [#1] SMP KASAN PTI
+[  208.632054] BTRFS info (device loop0): delayed_refs has NO entry
+[  208.636502] CPU: 1 PID: 1330 Comm: mount Tainted: G    B   W         4.17.0-rc5+ #6
+[  208.639306] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  208.641177] RIP: 0010:_raw_spin_lock+0x1e/0x40
+[  208.642200] RSP: 0018:ffff8801df437338 EFLAGS: 00010246
+[  208.643240] RAX: 0000000000000000 RBX: 0000000000000570 RCX: 0000000000000000
+[  208.644643] RDX: 0000000000000001 RSI: 0000000000000297 RDI: 0000000000000297
+[  208.646058] RBP: ffff8801df437340 R08: ffffed003ee23ebb R09: ffffed003ee23ebb
+[  208.647464] R10: 0000000000000001 R11: ffffed003ee23eba R12: ffff8801f2e8c400
+[  208.648870] R13: 0000000000000000 R14: ffff8801e3a28000 R15: 0000000000000568
+[  208.650286] FS:  00007fd41a0a7840(0000) GS:ffff8801f7100000(0000) knlGS:0000000000000000
+[  208.651872] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  208.653006] CR2: 0000000000000570 CR3: 00000001e16e6000 CR4: 00000000000006e0
+[  208.654449] Call Trace:
+[  208.654961]  __del_reloc_root+0x5a/0x190
+[  208.655755]  free_reloc_roots+0x40/0xb0
+[  208.656531]  btrfs_recover_relocation+0x2fa/0x750
+[  208.657487]  ? btrfs_cleanup_fs_roots+0x351/0x3b0
+[  208.658428]  ? btrfs_relocate_block_group+0x370/0x370
+[  208.659433]  ? qgroup_reserve+0x650/0x650
+[  208.660237]  ? migrate_swap_stop+0x2e0/0x2e0
+[  208.661090]  ? btrfs_check_rw_degradable+0xb0/0x240
+[  208.662077]  open_ctree+0x37c4/0x3ce9
+[  208.662822]  ? close_ctree+0x4a0/0x4a0
+[  208.663580]  ? bdi_register_va+0x44/0x50
+[  208.664371]  ? super_setup_bdi_name+0x11b/0x1a0
+[  208.665302]  ? kill_block_super+0x80/0x80
+[  208.666111]  ? snprintf+0x96/0xd0
+[  208.666787]  btrfs_mount_root+0xae6/0xc60
+[  208.667596]  ? btrfs_mount_root+0xae6/0xc60
+[  208.668449]  ? pcpu_block_update_hint_alloc+0x1f5/0x2a0
+[  208.669505]  ? btrfs_decode_error+0x40/0x40
+[  208.670345]  ? find_next_bit+0x57/0x90
+[  208.671101]  ? cpumask_next+0x1a/0x20
+[  208.671837]  ? pcpu_alloc+0x449/0x8c0
+[  208.672577]  ? pcpu_free_area+0x410/0x410
+[  208.673393]  ? memcg_kmem_put_cache+0x1b/0xa0
+[  208.674267]  ? memcpy+0x45/0x50
+[  208.674905]  mount_fs+0x60/0x1a0
+[  208.675562]  ? btrfs_decode_error+0x40/0x40
+[  208.676399]  ? mount_fs+0x60/0x1a0
+[  208.677088]  ? alloc_vfsmnt+0x309/0x360
+[  208.677880]  vfs_kern_mount+0x6b/0x1a0
+[  208.678634]  ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  208.679671]  btrfs_mount+0x209/0xb71
+[  208.680390]  ? pcpu_block_update_hint_alloc+0x1f5/0x2a0
+[  208.681442]  ? btrfs_remount+0x8e0/0x8e0
+[  208.682247]  ? find_next_zero_bit+0x2c/0xa0
+[  208.683119]  ? find_next_bit+0x57/0x90
+[  208.683876]  ? cpumask_next+0x1a/0x20
+[  208.684619]  ? pcpu_alloc+0x449/0x8c0
+[  208.685371]  ? pcpu_free_area+0x410/0x410
+[  208.686177]  ? memcg_kmem_put_cache+0x1b/0xa0
+[  208.687046]  ? memcpy+0x45/0x50
+[  208.687685]  mount_fs+0x60/0x1a0
+[  208.688337]  ? btrfs_remount+0x8e0/0x8e0
+[  208.689121]  ? mount_fs+0x60/0x1a0
+[  208.689828]  ? alloc_vfsmnt+0x309/0x360
+[  208.690599]  vfs_kern_mount+0x6b/0x1a0
+[  208.691352]  do_mount+0x34a/0x18a0
+[  208.692039]  ? lockref_put_or_lock+0xcf/0x160
+[  208.692909]  ? copy_mount_string+0x20/0x20
+[  208.693742]  ? memcg_kmem_put_cache+0x1b/0xa0
+[  208.694615]  ? kasan_check_write+0x14/0x20
+[  208.695437]  ? _copy_from_user+0x6a/0x90
+[  208.696226]  ? memdup_user+0x42/0x60
+[  208.696948]  ksys_mount+0x83/0xd0
+[  208.697631]  __x64_sys_mount+0x67/0x80
+[  208.698385]  do_syscall_64+0x78/0x170
+[  208.699122]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  208.700124] RIP: 0033:0x7fd419987b9a
+[  208.700842] RSP: 002b:00007fff30668b88 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
+[  208.702345] RAX: ffffffffffffffda RBX: 0000000001829030 RCX: 00007fd419987b9a
+[  208.703742] RDX: 0000000001829210 RSI: 000000000182af30 RDI: 0000000001831ec0
+[  208.705134] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000014
+[  208.706533] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001831ec0
+[  208.707931] R13: 0000000001829210 R14: 0000000000000000 R15: 0000000000000003
+[  208.713050] RIP: _raw_spin_lock+0x1e/0x40 RSP: ffff8801df437338
+[  208.714238] CR2: 0000000000000570
+[  208.714985] ---[ end trace be56bf4112c4e5e3 ]---
+
+Found by Wen Xu and Po-Ning Tseng from SSLab, Gatech.
diff --git a/tests/fuzz-tests/images/bko-199839.raw.txt b/tests/fuzz-tests/images/bko-199839.raw.txt
new file mode 100644
index 00000000..3e4b273d
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-199839.raw.txt
@@ -0,0 +1,198 @@
+URL: https://bugzilla.kernel.org/show_bug.cgi?id=199839
+Wen Xu 2018-05-26 04:18:45 UTC
+
+Created attachment 276197 [details]
+The (compressed) crafted image which causes crash
+
+- Overview
+use-after-free in try_merge_free_space() when mounting a crafted btrfs image
+
+- Reproduce (4.17 KASAN build)
+# mkdir mnt
+# mount -t btrfs 8.img mnt
+
+- Kernel Message
+[  449.751861] BTRFS: device fsid 12b338de-a2e9-40fa-a4b0-90e53b7c5773 devid 1 transid 8 /dev/loop0
+[  449.757216] BTRFS info (device loop0): disk space caching is enabled
+[  449.757221] BTRFS info (device loop0): has skinny extents
+[  449.785096] BTRFS error (device loop0): bad tree block start 0 29396992
+[  449.788629] BTRFS info (device loop0): read error corrected: ino 0 off 29396992 (dev /dev/loop0 sector 73800)
+[  449.792965] BTRFS error (device loop0): bad fsid on block 29409280
+[  449.795193] BTRFS info (device loop0): read error corrected: ino 0 off 29409280 (dev /dev/loop0 sector 73824)
+[  449.795401] BTRFS info (device loop0): creating UUID tree
+[  449.883426] ==================================================================
+[  449.886228] BUG: KASAN: use-after-free in try_merge_free_space+0xc0/0x2e0
+[  449.888344] Read of size 8 at addr ffff8801ed10f030 by task mount/1291
+
+[  449.889947] CPU: 1 PID: 1291 Comm: mount Not tainted 4.17.0-rc5+ #6
+[  449.889951] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  449.889953] Call Trace:
+[  449.889976]  dump_stack+0x7b/0xb5
+[  449.890274]  print_address_description+0x70/0x290
+[  449.890286]  kasan_report+0x291/0x390
+[  449.890296]  ? try_merge_free_space+0xc0/0x2e0
+[  449.890303]  __asan_load8+0x54/0x90
+[  449.890310]  try_merge_free_space+0xc0/0x2e0
+[  449.890318]  __btrfs_add_free_space+0x96/0x5e0
+[  449.890324]  ? kasan_check_write+0x14/0x20
+[  449.890331]  ? btrfs_get_block_group+0x1e/0x30
+[  449.890337]  ? block_group_cache_tree_search+0xef/0x150
+[  449.890343]  unpin_extent_range+0x376/0x670
+[  449.890350]  ? __exclude_logged_extent+0x160/0x160
+[  449.890358]  btrfs_finish_extent_commit+0x15b/0x490
+[  449.890371]  ? __find_get_block+0x106/0x400
+[  449.890378]  ? btrfs_prepare_extent_commit+0x1a0/0x1a0
+[  449.890384]  ? write_all_supers+0x714/0x1420
+[  449.890394]  btrfs_commit_transaction+0xaf4/0xfa0
+[  449.890402]  ? btrfs_apply_pending_changes+0xa0/0xa0
+[  449.890407]  ? start_transaction+0x153/0x640
+[  449.890414]  btrfs_create_uuid_tree+0x6a/0x170
+[  449.890419]  open_ctree+0x3b26/0x3ce9
+[  449.890429]  ? close_ctree+0x4a0/0x4a0
+[  449.890441]  ? bdi_register_va+0x44/0x50
+[  449.890451]  ? super_setup_bdi_name+0x11b/0x1a0
+[  449.890457]  ? kill_block_super+0x80/0x80
+[  449.890468]  ? snprintf+0x96/0xd0
+[  449.890479]  btrfs_mount_root+0xae6/0xc60
+[  449.890485]  ? btrfs_mount_root+0xae6/0xc60
+[  449.890491]  ? pcpu_block_update_hint_alloc+0x1f5/0x2a0
+[  449.890498]  ? btrfs_decode_error+0x40/0x40
+[  449.890510]  ? find_next_bit+0x57/0x90
+[  449.890517]  ? cpumask_next+0x1a/0x20
+[  449.890522]  ? pcpu_alloc+0x449/0x8c0
+[  449.890528]  ? pcpu_free_area+0x410/0x410
+[  449.890534]  ? memcg_kmem_put_cache+0x1b/0xa0
+[  449.890540]  ? memcpy+0x45/0x50
+[  449.890547]  mount_fs+0x60/0x1a0
+[  449.890553]  ? btrfs_decode_error+0x40/0x40
+[  449.890558]  ? mount_fs+0x60/0x1a0
+[  449.890565]  ? alloc_vfsmnt+0x309/0x360
+[  449.890570]  vfs_kern_mount+0x6b/0x1a0
+[  449.890576]  ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  449.890583]  btrfs_mount+0x209/0xb71
+[  449.890589]  ? pcpu_block_update_hint_alloc+0x1f5/0x2a0
+[  449.890595]  ? btrfs_remount+0x8e0/0x8e0
+[  449.890601]  ? find_next_zero_bit+0x2c/0xa0
+[  449.890608]  ? find_next_bit+0x57/0x90
+[  449.890613]  ? cpumask_next+0x1a/0x20
+[  449.890617]  ? pcpu_alloc+0x449/0x8c0
+[  449.890624]  ? pcpu_free_area+0x410/0x410
+[  449.890629]  ? memcg_kmem_put_cache+0x1b/0xa0
+[  449.890634]  ? memcpy+0x45/0x50
+[  449.890641]  mount_fs+0x60/0x1a0
+[  449.890646]  ? btrfs_remount+0x8e0/0x8e0
+[  449.890652]  ? mount_fs+0x60/0x1a0
+[  449.890656]  ? alloc_vfsmnt+0x309/0x360
+[  449.890662]  vfs_kern_mount+0x6b/0x1a0
+[  449.890668]  do_mount+0x34a/0x18a0
+[  449.890673]  ? lockref_put_or_lock+0xcf/0x160
+[  449.890680]  ? copy_mount_string+0x20/0x20
+[  449.890685]  ? memcg_kmem_put_cache+0x1b/0xa0
+[  449.890691]  ? kasan_check_write+0x14/0x20
+[  449.890696]  ? _copy_from_user+0x6a/0x90
+[  449.890702]  ? memdup_user+0x42/0x60
+[  449.890708]  ksys_mount+0x83/0xd0
+[  449.890714]  __x64_sys_mount+0x67/0x80
+[  449.890723]  do_syscall_64+0x78/0x170
+[  449.890729]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  449.890734] RIP: 0033:0x7fc36964fb9a
+[  449.890737] RSP: 002b:00007ffd268892f8 EFLAGS: 00000202 ORIG_RAX: 00000000000000a5
+[  449.890744] RAX: ffffffffffffffda RBX: 0000000000e7f030 RCX: 00007fc36964fb9a
+[  449.890747] RDX: 0000000000e7f210 RSI: 0000000000e80f30 RDI: 0000000000e87ec0
+[  449.890750] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000014
+[  449.890753] R10: 00000000c0ed0000 R11: 0000000000000202 R12: 0000000000e87ec0
+[  449.890756] R13: 0000000000e7f210 R14: 0000000000000000 R15: 0000000000000003
+
+[  449.891109] Allocated by task 1291:
+[  449.891832]  save_stack+0x46/0xd0
+[  449.891838]  kasan_kmalloc+0xad/0xe0
+[  449.891843]  kasan_slab_alloc+0x11/0x20
+[  449.891848]  kmem_cache_alloc+0xd1/0x1e0
+[  449.891854]  __btrfs_add_free_space+0x43/0x5e0
+[  449.891859]  add_new_free_space+0x22b/0x240
+[  449.891864]  btrfs_read_block_groups+0xae3/0xc60
+[  449.891868]  open_ctree+0x2cfc/0x3ce9
+[  449.891873]  btrfs_mount_root+0xae6/0xc60
+[  449.891878]  mount_fs+0x60/0x1a0
+[  449.891883]  vfs_kern_mount+0x6b/0x1a0
+[  449.891888]  btrfs_mount+0x209/0xb71
+[  449.891893]  mount_fs+0x60/0x1a0
+[  449.891897]  vfs_kern_mount+0x6b/0x1a0
+[  449.891902]  do_mount+0x34a/0x18a0
+[  449.891906]  ksys_mount+0x83/0xd0
+[  449.891911]  __x64_sys_mount+0x67/0x80
+[  449.891916]  do_syscall_64+0x78/0x170
+[  449.891921]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+[  449.892235] Freed by task 1291:
+[  449.892866]  save_stack+0x46/0xd0
+[  449.892872]  __kasan_slab_free+0x13c/0x1a0
+[  449.892877]  kasan_slab_free+0xe/0x10
+[  449.892882]  kmem_cache_free+0x89/0x1e0
+[  449.892888]  try_merge_free_space+0x274/0x2e0
+[  449.892894]  __btrfs_add_free_space+0x96/0x5e0
+[  449.892898]  unpin_extent_range+0x376/0x670
+[  449.892904]  btrfs_finish_extent_commit+0x15b/0x490
+[  449.892909]  btrfs_commit_transaction+0xaf4/0xfa0
+[  449.892913]  btrfs_create_uuid_tree+0x6a/0x170
+[  449.892917]  open_ctree+0x3b26/0x3ce9
+[  449.892922]  btrfs_mount_root+0xae6/0xc60
+[  449.892927]  mount_fs+0x60/0x1a0
+[  449.892932]  vfs_kern_mount+0x6b/0x1a0
+[  449.892937]  btrfs_mount+0x209/0xb71
+[  449.892942]  mount_fs+0x60/0x1a0
+[  449.892946]  vfs_kern_mount+0x6b/0x1a0
+[  449.892951]  do_mount+0x34a/0x18a0
+[  449.892955]  ksys_mount+0x83/0xd0
+[  449.892960]  __x64_sys_mount+0x67/0x80
+[  449.892965]  do_syscall_64+0x78/0x170
+[  449.892970]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+[  449.893286] The buggy address belongs to the object at ffff8801ed10f000
+                which belongs to the cache btrfs_free_space of size 72
+[  449.895793] The buggy address is located 48 bytes inside of
+                72-byte region [ffff8801ed10f000, ffff8801ed10f048)
+[  449.898035] The buggy address belongs to the page:
+[  449.898979] page:ffffea0007b443c0 count:1 mapcount:0 mapping:0000000000000000 index:0x0
+[  449.900562] flags: 0x2ffff0000000100(slab)
+[  449.901379] raw: 02ffff0000000100 0000000000000000 0000000000000000 0000000180270027
+[  449.902881] raw: dead000000000100 dead000000000200 ffff8801e0a676c0 0000000000000000
+[  449.904396] page dumped because: kasan: bad access detected
+
+[  449.905800] Memory state around the buggy address:
+[  449.906748]  ffff8801ed10ef00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[  449.908165]  ffff8801ed10ef80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[  449.909577] >ffff8801ed10f000: fb fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc
+[  449.910969]                                      ^
+[  449.911933]  ffff8801ed10f080: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[  449.913328]  ffff8801ed10f100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[  449.914720] ==================================================================
+[  449.916119] Disabling lock debugging due to kernel taint
+
+No kernel crash on plain kernel.
+
+- Reason
+https://elixir.bootlin.com/linux/v4.17-rc5/source/fs/btrfs/free-space-cache.c#L2161
+
+	if (left_info && !left_info->bitmap &&
+	    left_info->offset + left_info->bytes == offset) {
+		if (update_stat)
+			unlink_free_space(ctl, left_info);
+		else
+			__unlink_free_space(ctl, left_info);
+		info->offset = left_info->offset;
+		info->bytes += left_info->bytes;
+		kmem_cache_free(btrfs_free_space_cachep, left_info);
+		merged = true;
+	}
+
+	return merged;
+
+Regarding KASAN report, left_info is already freed but referenced (->bitmap). It is in fact freed just several lines after, namely kmem_cache_free(btrfs_free_space_cachep, left_info);
+
+Found by Wen Xu and Po-Ning Tseng from SSLab, Gatech.
+
+===== Extra info for btrfs-progs =====
+This image could cause btrfs-progs to BUG_ON() when opening the image.
+Fixed by "btrfs-progs: Don't BUG_ON() if we failed to load one device or one
+chunk".
diff --git a/tests/fuzz-tests/images/bko-199839.raw.xz b/tests/fuzz-tests/images/bko-199839.raw.xz
new file mode 100644
index 00000000..c06d9540
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-199839.raw.xz
diff --git a/tests/fuzz-tests/images/bko-200403.raw.txt b/tests/fuzz-tests/images/bko-200403.raw.txt
new file mode 100644
index 00000000..aae8ea48
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-200403.raw.txt
@@ -0,0 +1,93 @@
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=200403
+Wen Xu 2018-07-04 17:21:58 UTC
+
+Created attachment 277167 [details]
+The (compressed) crafted image which causes crash
+
+- Reproduce
+# mkdir mnt
+# mount -t btrfs 0.img mnt
+# gcc -o poc poc.c
+# ./poc ./mnt
+# umount mnt
+
+- Kernel message
+[  230.611533] BTRFS: device fsid 3381d111-94a3-4ac7-8f39-611bbbdab7e6 devid 1 transid 8 /dev/loop0
+[  230.632922] BTRFS info (device loop0): disk space caching is enabled
+[  230.632935] BTRFS info (device loop0): has skinny extents
+[  230.647496] BTRFS info (device loop0): creating UUID tree
+[  237.692643] ------------[ cut here ]------------
+[  237.692654] kernel BUG at fs/btrfs/volumes.c:1625!
+[  237.693822] invalid opcode: 0000 [#1] SMP KASAN PTI
+[  237.694867] CPU: 1 PID: 1387 Comm: umount Not tainted 4.18.0-rc1+ #8
+[  237.696177] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  237.698177] RIP: 0010:btrfs_remove_chunk+0x37a/0xd60
+[  237.699209] Code: e0 48 39 85 28 ff ff ff 77 20 0f b6 85 27 ff ff ff 4d 89 6f 80 4c 89 f7 4d 89 67 89 41 88 47 88 e8 0b 01 f7 ff e9 f5 fe ff ff <0f> 0b 0f 85 5c 08 00 00 4d 8d 66 40 4c 89 f7 e8 42 f9 b6 ff 4c 89
+[  237.703034] RSP: 0018:ffff8801f0b0fad8 EFLAGS: 00010206
+[  237.704122] RAX: 0000000008000000 RBX: ffff8801ef4d7c38 RCX: 0000000000000000
+[  237.705572] RDX: ffffed003e161f30 RSI: 0000000000000e70 RDI: ffff8801f2a6ae70
+[  237.707035] RBP: ffff8801f0b0fc38 R08: ffff8801f0b0f9e0 R09: ffff8801f0b0fa20
+[  237.708485] R10: 0000000000000003 R11: ffffed003e161f7c R12: 0000000007400000
+[  237.709929] R13: 0000000000000001 R14: ffff8801f2bf0a50 R15: ffff8801f0b0fc10
+[  237.711391] FS:  00007f691b770840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000
+[  237.713034] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  237.714206] CR2: 0000000000cb0348 CR3: 00000001f26f8000 CR4: 00000000000006e0
+[  237.719741] Call Trace:
+[  237.720274]  ? btrfs_grow_device+0x240/0x240
+[  237.721193]  ? kasan_check_read+0x11/0x20
+[  237.722080]  ? mutex_lock+0x99/0xf0
+[  237.722854]  btrfs_delete_unused_bgs+0x4b6/0x5c0
+[  237.723836]  close_ctree+0x40a/0x460
+[  237.724586]  ? transaction_kthread+0x250/0x250
+[  237.725523]  ? dispose_list+0xa0/0xa0
+[  237.726303]  btrfs_put_super+0x25/0x30
+[  237.727110]  generic_shutdown_super+0xb9/0x1c0
+[  237.728032]  kill_anon_super+0x24/0x40
+[  237.728814]  btrfs_kill_super+0x31/0x220
+[  237.729630]  deactivate_locked_super+0x6f/0xa0
+[  237.730548]  deactivate_super+0x5e/0x80
+[  237.731352]  cleanup_mnt+0x61/0xa0
+[  237.732060]  __cleanup_mnt+0x12/0x20
+[  237.732835]  task_work_run+0xc8/0xf0
+[  237.733605]  exit_to_usermode_loop+0x125/0x130
+[  237.734530]  do_syscall_64+0x138/0x170
+[  237.735331]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  237.736676] RIP: 0033:0x7f691b050487
+[  237.737457] Code: 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 00 00 00 b8 a6 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e1 c9 2b 00 f7 d8 64 89 01 48
+[  237.741327] RSP: 002b:00007ffdf3a06d98 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
+[  237.742889] RAX: 0000000000000000 RBX: 0000000000ca7030 RCX: 00007f691b050487
+[  237.744351] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000cae1e0
+[  237.745814] RBP: 0000000000cae1e0 R08: 0000000000000000 R09: 0000000000000015
+[  237.747289] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007f691b55983c
+[  237.748750] R13: 0000000000000000 R14: 0000000000ca7210 R15: 00007ffdf3a07020
+[  237.750224] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too qxl drm_kms_helper crct10dif_pclmul syscopyarea sysfillrect sysimgblt fb_sys_fops ttm crc32_pclmul aesni_intel drm aes_x86_64 crypto_simd cryptd glue_helper 8139cp mii pata_acpi floppy
+[  237.760666] ---[ end trace 2e85051acb5f6dc1 ]---
+[  237.761718] RIP: 0010:btrfs_remove_chunk+0x37a/0xd60
+[  237.762827] Code: e0 48 39 85 28 ff ff ff 77 20 0f b6 85 27 ff ff ff 4d 89 6f 80 4c 89 f7 4d 89 67 89 41 88 47 88 e8 0b 01 f7 ff e9 f5 fe ff ff <0f> 0b 0f 85 5c 08 00 00 4d 8d 66 40 4c 89 f7 e8 42 f9 b6 ff 4c 89
+[  237.766977] RSP: 0018:ffff8801f0b0fad8 EFLAGS: 00010206
+[  237.768157] RAX: 0000000008000000 RBX: ffff8801ef4d7c38 RCX: 0000000000000000
+[  237.769672] RDX: ffffed003e161f30 RSI: 0000000000000e70 RDI: ffff8801f2a6ae70
+[  237.771147] RBP: ffff8801f0b0fc38 R08: ffff8801f0b0f9e0 R09: ffff8801f0b0fa20
+[  237.772650] R10: 0000000000000003 R11: ffffed003e161f7c R12: 0000000007400000
+[  237.774119] R13: 0000000000000001 R14: ffff8801f2bf0a50 R15: ffff8801f0b0fc10
+[  237.775598] FS:  00007f691b770840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000
+[  237.777297] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  237.778496] CR2: 0000000000cb0348 CR3: 00000001f26f8000 CR4: 00000000000006e0
+
+
+===== Extra info for btrfs-progs ======
+It has one corrupted root item, (41 ROOT_ITEM 0) referring tree block
+29364224, which is also UUID tree root.
+It would cause original mode to hit BUG_ON().
+Checking filesystem on /home/adam/btrfs/crafted_images/runtime/0.img
+UUID: 3381d111-94a3-4ac7-8f39-611bbbdab7e6
+checking extents
+check/main.c:3677: check_owner_ref: BUG_ON `rec->is_root` triggered, value 1
+btrfs(+0x572c2)[0x562d65da72c2]
+btrfs(+0x6098d)[0x562d65db098d]
+btrfs(+0x60bb6)[0x562d65db0bb6]
+btrfs(+0x6179b)[0x562d65db179b]
+btrfs(cmd_check+0x1199)[0x562d65db5589]
+btrfs(main+0x88)[0x562d65d62768]
+/usr/lib/libc.so.6(__libc_start_main+0xeb)[0x7f4fcbb1b06b]
+btrfs(_start+0x2a)[0x562d65d6288a]
diff --git a/tests/fuzz-tests/images/bko-200403.raw.xz b/tests/fuzz-tests/images/bko-200403.raw.xz
new file mode 100644
index 00000000..56959457
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-200403.raw.xz
diff --git a/tests/fuzz-tests/images/bko-200409.raw.txt b/tests/fuzz-tests/images/bko-200409.raw.txt
new file mode 100644
index 00000000..7df79243
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-200409.raw.txt
@@ -0,0 +1,125 @@
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=200409
+Wen Xu 2018-07-04 17:47:09 UTC
+
+Created attachment 277173 [details]
+The (compressed) crafted image which causes crash
+
+- Reproduce
+# mkdir mnt
+# mount -t btrfs 5.img mnt
+
+- Kernel message
+[  333.770743] BTRFS: device fsid 3381d111-94a3-4ac7-8f39-611bbbdab7e6 devid 1 transid 8 /dev/loop0
+[  333.779221] BTRFS info (device loop0): disk space caching is enabled
+[  333.779234] BTRFS info (device loop0): has skinny extents
+[  333.798081] ------------[ cut here ]------------
+[  333.798090] kernel BUG at fs/btrfs/volumes.c:6564!
+[  333.799293] invalid opcode: 0000 [#1] SMP KASAN PTI
+[  333.800355] CPU: 0 PID: 1353 Comm: mount Not tainted 4.18.0-rc1+ #8
+[  333.801652] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  333.803658] RIP: 0010:read_one_chunk+0x77c/0x880
+[  333.804630] Code: e8 a9 82 fd ff 48 8b 95 70 ff ff ff 48 8b bd 60 ff ff ff b9 01 00 00 00 4c 89 f6 e8 2e 14 ff ff b8 fe ff ff ff e9 cb fe ff ff <0f> 0b 48 8b bd 38 ff ff ff e8 76 82 fd ff e9 35 ff ff ff 48 8b 95
+[  333.808462] RSP: 0018:ffff8801eedf7230 EFLAGS: 00010282
+[  333.809542] RAX: ffff8801f2df2100 RBX: 00000000ffffffef RCX: ffffffffa5839143
+[  333.810991] RDX: 1ffff1003e5be444 RSI: e300000001c00000 RDI: ffff8801f2df2220
+[  333.812451] RBP: ffff8801eedf7310 R08: ffffed003e5be445 R09: ffffed003e5be445
+[  333.813905] R10: 0000000000000001 R11: ffffed003e5be444 R12: ffff8801e6788158
+[  333.815357] R13: 0000000000000001 R14: 0000000000000001 R15: ffff8801f2df2220
+[  333.846990] FS:  00007f2013519840(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000
+[  333.848645] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  333.849816] CR2: 00007f88a3c6b760 CR3: 00000001e655e000 CR4: 00000000000006f0
+[  333.851304] Call Trace:
+[  333.851864]  ? add_missing_dev+0xc0/0xc0
+[  333.852715]  ? read_extent_buffer+0xe9/0x130
+[  333.853604]  btrfs_read_chunk_tree+0x957/0xd20
+[  333.854551]  ? free_root_pointers+0xb0/0xb0
+[  333.855435]  ? btrfs_check_rw_degradable+0x240/0x240
+[  333.856491]  ? btree_read_extent_buffer_pages+0x1e0/0x3b0
+[  333.857617]  ? run_one_async_done+0xb0/0xb0
+[  333.858498]  ? cache_state.part.32+0x10/0x40
+[  333.859430]  ? unlock_page+0x16/0x40
+[  333.860202]  ? alloc_extent_buffer+0x4a1/0x4e0
+[  333.861149]  ? memcpy+0x45/0x50
+[  333.861818]  ? read_extent_buffer+0xe9/0x130
+[  333.862711]  open_ctree+0x246c/0x35c6
+[  333.863488]  ? close_ctree+0x460/0x460
+[  333.864302]  ? bdi_register_va+0x44/0x50
+[  333.865142]  ? super_setup_bdi_name+0x11b/0x1a0
+[  333.866089]  ? kill_block_super+0x80/0x80
+[  333.866970]  ? snprintf+0x96/0xd0
+[  333.867704]  btrfs_mount_root+0xae6/0xc60
+[  333.868550]  ? btrfs_mount_root+0xae6/0xc60
+[  333.869419]  ? pcpu_block_update_hint_alloc+0x1d2/0x2a0
+[  333.870492]  ? btrfs_decode_error+0x40/0x40
+[  333.871389]  ? find_next_bit+0x57/0x90
+[  333.872206]  ? cpumask_next+0x1a/0x20
+[  333.872986]  ? pcpu_alloc+0x449/0x8c0
+[  333.873761]  ? pcpu_free_area+0x410/0x410
+[  333.874614]  ? memcg_kmem_put_cache+0x1b/0xa0
+[  333.875531]  ? memcpy+0x45/0x50
+[  333.876209]  mount_fs+0x60/0x1a0
+[  333.876892]  ? btrfs_decode_error+0x40/0x40
+[  333.877763]  ? mount_fs+0x60/0x1a0
+[  333.878492]  ? alloc_vfsmnt+0x309/0x360
+[  333.879303]  vfs_kern_mount+0x6b/0x1a0
+[  333.880121]  ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  333.881209]  btrfs_mount+0x209/0xb71
+[  333.881962]  ? pcpu_block_update_hint_alloc+0x1d2/0x2a0
+[  333.883044]  ? btrfs_remount+0x8e0/0x8e0
+[  333.883878]  ? find_next_zero_bit+0x2c/0xa0
+[  333.884753]  ? find_next_bit+0x57/0x90
+[  333.885538]  ? cpumask_next+0x1a/0x20
+[  333.886307]  ? pcpu_alloc+0x449/0x8c0
+[  333.887078]  ? pcpu_free_area+0x410/0x410
+[  333.887930]  ? memcg_kmem_put_cache+0x1b/0xa0
+[  333.888836]  ? memcpy+0x45/0x50
+[  333.889500]  mount_fs+0x60/0x1a0
+[  333.890182]  ? btrfs_remount+0x8e0/0x8e0
+[  333.891001]  ? mount_fs+0x60/0x1a0
+[  333.891728]  ? alloc_vfsmnt+0x309/0x360
+[  333.892533]  vfs_kern_mount+0x6b/0x1a0
+[  333.893323]  do_mount+0x34a/0x18c0
+[  333.894042]  ? copy_mount_string+0x20/0x20
+[  333.894898]  ? memcg_kmem_put_cache+0x1b/0xa0
+[  333.895832]  ? kasan_check_write+0x14/0x20
+[  333.896704]  ? _copy_from_user+0x6a/0x90
+[  333.897542]  ? memdup_user+0x42/0x60
+[  333.898300]  ksys_mount+0x83/0xd0
+[  333.899003]  __x64_sys_mount+0x67/0x80
+[  333.899831]  do_syscall_64+0x78/0x170
+[  333.900610]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  333.901682] RIP: 0033:0x7f2012df9b9a
+[  333.902430] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
+[  333.906311] RSP: 002b:00007ffd77e261b8 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
+[  333.907874] RAX: ffffffffffffffda RBX: 00000000019e7030 RCX: 00007f2012df9b9a
+[  333.909341] RDX: 00000000019e7210 RSI: 00000000019e8f30 RDI: 00000000019efec0
+[  333.910804] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000014
+[  333.912281] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000019efec0
+[  333.913747] R13: 00000000019e7210 R14: 0000000000000000 R15: 0000000000000003
+[  333.915224] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too qxl drm_kms_helper crct10dif_pclmul syscopyarea sysfillrect sysimgblt fb_sys_fops ttm crc32_pclmul aesni_intel drm aes_x86_64 crypto_simd cryptd glue_helper 8139cp mii pata_acpi floppy
+[  333.932460] ---[ end trace 2e85051acb5f6dc1 ]---
+[  333.933448] RIP: 0010:read_one_chunk+0x77c/0x880
+[  333.934397] Code: e8 a9 82 fd ff 48 8b 95 70 ff ff ff 48 8b bd 60 ff ff ff b9 01 00 00 00 4c 89 f6 e8 2e 14 ff ff b8 fe ff ff ff e9 cb fe ff ff <0f> 0b 48 8b bd 38 ff ff ff e8 76 82 fd ff e9 35 ff ff ff 48 8b 95
+[  333.938283] RSP: 0018:ffff8801eedf7230 EFLAGS: 00010282
+[  333.939361] RAX: ffff8801f2df2100 RBX: 00000000ffffffef RCX: ffffffffa5839143
+[  333.940846] RDX: 1ffff1003e5be444 RSI: e300000001c00000 RDI: ffff8801f2df2220
+[  333.942318] RBP: ffff8801eedf7310 R08: ffffed003e5be445 R09: ffffed003e5be445
+[  333.943878] R10: 0000000000000001 R11: ffffed003e5be444 R12: ffff8801e6788158
+[  333.945371] R13: 0000000000000001 R14: 0000000000000001 R15: ffff8801f2df2220
+[  333.946839] FS:  00007f2013519840(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000
+[  333.948526] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  333.949711] CR2: 00007f88a3c6b760 CR3: 00000001e655e000 CR4: 00000000000006f0
+
+- Location
+https://elixir.bootlin.com/linux/v4.18-rc3/source/fs/btrfs/volumes.c#L6564
+	write_lock(&map_tree->map_tree.lock);
+	ret = add_extent_mapping(&map_tree->map_tree, em, 0);
+	write_unlock(&map_tree->map_tree.lock);
+	BUG_ON(ret); /* Tree corruption */ <---
+	free_extent_map(em);
+
+Found by Wen Xu and Po-Ning Tseng from SSLab at Gatech.
+
+====== Extra info for btrfs-progs ======
+Btrfs-progs has the exact BUG_ON() in read_one_chunk().
+Fixed by "btrfs-progs: Exit gracefully when overlap chunks are detected".
diff --git a/tests/fuzz-tests/images/bko-200409.raw.xz b/tests/fuzz-tests/images/bko-200409.raw.xz
new file mode 100644
index 00000000..8ec29cfd
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-200409.raw.xz
diff --git a/tests/misc-tests/006-image-on-missing-device/test.sh b/tests/misc-tests/006-image-on-missing-device/test.sh
index 2b222340..8c81e6fb 100755
--- a/tests/misc-tests/006-image-on-missing-device/test.sh
+++ b/tests/misc-tests/006-image-on-missing-device/test.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # test btrfs-image with a missing device (uses loop devices)
 #
-# - btrfs-image must not loop indefinetelly
+# - btrfs-image must not loop indefinitely
 # - btrfs-image will expectedly fail to produce the dump
 
 source "$TEST_TOP/common"
diff --git a/tests/misc-tests/011-delete-missing-device/test.sh b/tests/misc-tests/011-delete-missing-device/test.sh
index 4c976421..a6b2e29c 100755
--- a/tests/misc-tests/011-delete-missing-device/test.sh
+++ b/tests/misc-tests/011-delete-missing-device/test.sh
@@ -44,6 +44,22 @@ test_delete_missing()
 	run_check_umount_test_dev
 }
 
+test_missing_error()
+{
+	local out
+
+	run_check $SUDO_HELPER "$TOP/mkfs.btrfs" -f "$TEST_DEV"
+	run_check_mount_test_dev
+	out=$(run_mustfail_stdout "device remove succeeded" \
+		$SUDO_HELPER "$TOP/btrfs" device remove missing "$TEST_MNT")
+
+	if ! echo "$out" | grep -q "no missing devices found to remove"; then
+		_fail "IOCTL returned unexpected error value"
+	fi
+
+	run_check_umount_test_dev
+}
+
 setup_loopdevs 4
 prepare_loopdevs
 dev1=${loopdevs[1]}
@@ -53,5 +69,6 @@ TEST_DEV=$dev1
 test_do_mkfs -m raid1 -d raid1
 test_wipefs
 test_delete_missing
+test_missing_error
 
 cleanup_loopdevs
diff --git a/tests/misc-tests/029-send-p-different-mountpoints/test.sh b/tests/misc-tests/029-send-p-different-mountpoints/test.sh
index 0b42b772..a59a585e 100755
--- a/tests/misc-tests/029-send-p-different-mountpoints/test.sh
+++ b/tests/misc-tests/029-send-p-different-mountpoints/test.sh
@@ -10,11 +10,11 @@ check_prereq mkfs.btrfs
 setup_root_helper
 prepare_test_dev
 
-# we need two mount points, cannot nest the subvoolume under TEST_MNT
+# we need two mount points, cannot nest the subvolume under TEST_MNT
 SUBVOL_MNT="$TEST_MNT/subvol"
 TOPLEVEL_MNT="$TEST_MNT/toplevel"
 TEST_MNT="$TOPLEVEL_MNT"
-mkdir -p "$TOPLEVEL_MNT" "$SUBVOL_MNT"
+run_check $SUDO_HELPER mkdir -p "$TOPLEVEL_MNT" "$SUBVOL_MNT"
 
 run_check $SUDO_HELPER "$TOP/mkfs.btrfs" -f "$TEST_DEV"
 run_check_mount_test_dev
@@ -47,5 +47,5 @@ run_mustfail_stdout "send -p on 2 mount points" \
 run_check_umount_test_dev "$SUBVOL_MNT"
 run_check_umount_test_dev "$TOPLEVEL_MNT"
 
-rmdir "$SUBVOL_MNT"
-rmdir "$TOPLEVEL_MNT"
+run_check $SUDO_HELPER rmdir "$SUBVOL_MNT"
+run_check $SUDO_HELPER rmdir "$TOPLEVEL_MNT"
diff --git a/tests/misc-tests/033-filename-length-limit/test.sh b/tests/misc-tests/033-filename-length-limit/test.sh
new file mode 100755
index 00000000..673138e2
--- /dev/null
+++ b/tests/misc-tests/033-filename-length-limit/test.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+#
+# test file name length limits for subvolumes
+
+source "$TEST_TOP/common"
+
+check_prereq mkfs.btrfs
+check_prereq btrfs
+
+setup_root_helper
+prepare_test_dev
+
+run_check "$TOP/mkfs.btrfs" -f "$TEST_DEV"
+run_check_mount_test_dev
+run_check $SUDO_HELPER chmod a+rw "$TEST_MNT"
+
+cd "$TEST_MNT"
+
+longname=\
+0123456789\
+0123456789\
+0123456789\
+0123456789\
+0123456789\
+\
+0123456789\
+0123456789\
+0123456789\
+0123456789\
+0123456789\
+\
+0123456789\
+0123456789\
+0123456789\
+0123456789\
+0123456789\
+\
+0123456789\
+0123456789\
+0123456789\
+0123456789\
+0123456789\
+\
+0123456789\
+0123456789\
+0123456789\
+0123456789\
+0123456789\
+\
+01234
+
+# subvolume name length limit test
+
+# short name test
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create subvol
+# 255
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create "$longname"
+# 256, must fail
+run_mustfail "subvolume with name 256 bytes long succeeded" \
+	$SUDO_HELPER "$TOP/btrfs" subvolume create "$longname"5
+# 255*2, must fail
+run_mustfail "subvolume with name 2 * 255 bytes long succeeded" \
+	$SUDO_HELPER "$TOP/btrfs" subvolume create "$longname$longname"
+
+# snapshot name length limit test
+
+run_check $SUDO_HELPER mkdir snaps
+
+# short name test
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume snapshot subvol snaps/snap
+# 255
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume snapshot subvol snaps/"$longname"
+# 256, must fail
+run_mustfail "snapshot with name 256 bytes long succeeded" \
+	$SUDO_HELPER "$TOP/btrfs" subvolume snapshot subvol snaps/"$longname"5
+# 255*2, must fail
+run_mustfail "subvolume with name 2 * 255 bytes long succeeded" \
+	$SUDO_HELPER "$TOP/btrfs" subvolume snapshot subvol snaps/"$longname$longname"
+
+cd ..
+
+run_check_umount_test_dev
diff --git a/transaction.c b/transaction.c
index 9619265e..e756db33 100644
--- a/transaction.c
+++ b/transaction.c
@@ -32,7 +32,7 @@ struct btrfs_trans_handle* btrfs_start_transaction(struct btrfs_root *root,
 	if (!h)
 		return ERR_PTR(-ENOMEM);
 	if (root->commit_root) {
-		error("commit_root aleady set when starting transaction");
+		error("commit_root already set when starting transaction");
 		kfree(h);
 		return ERR_PTR(-EINVAL);
 	}
@@ -46,6 +46,7 @@ struct btrfs_trans_handle* btrfs_start_transaction(struct btrfs_root *root,
 	fs_info->generation++;
 	h->transid = fs_info->generation;
 	h->blocks_reserved = num_blocks;
+	h->reinit_extent_tree = false;
 	root->last_trans = h->transid;
 	root->commit_root = root->node;
 	extent_buffer_get(root->node);
@@ -60,7 +61,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
 	u64 old_root_bytenr;
 	struct btrfs_root *tree_root = root->fs_info->tree_root;
 
-	btrfs_write_dirty_block_groups(trans, root);
 	while(1) {
 		old_root_bytenr = btrfs_root_bytenr(&root->root_item);
 		if (old_root_bytenr == root->node->start)
@@ -73,8 +73,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
 		ret = btrfs_update_root(trans, tree_root,
 					&root->root_key,
 					&root->root_item);
-		BUG_ON(ret);
-		btrfs_write_dirty_block_groups(trans, root);
+		if (ret < 0)
+			return ret;
+		btrfs_write_dirty_block_groups(trans);
 	}
 	return 0;
 }
@@ -97,13 +98,26 @@ int commit_tree_roots(struct btrfs_trans_handle *trans,
 	if (ret)
 		return ret;
 
+	/*
+	 * If the above CoW is the first one to dirty the current tree_root,
+	 * delayed refs for it won't be run until after this function has
+	 * finished executing, meaning we won't process the extent tree root,
+	 * which will have been added to ->dirty_cowonly_roots.  So run
+	 * delayed refs here as well.
+	 */
+	ret = btrfs_run_delayed_refs(trans, -1);
+	if (ret)
+		return ret;
+
 	while(!list_empty(&fs_info->dirty_cowonly_roots)) {
 		next = fs_info->dirty_cowonly_roots.next;
 		list_del_init(next);
 		root = list_entry(next, struct btrfs_root, dirty_list);
-		update_cowonly_root(trans, root);
+		ret = update_cowonly_root(trans, root);
 		free_extent_buffer(root->commit_root);
 		root->commit_root = NULL;
+		if (ret < 0)
+			return ret;
 	}
 
 	return 0;
@@ -146,6 +160,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
 	if (trans->fs_info->transaction_aborted)
 		return -EROFS;
+	/*
+	 * Flush all accumulated delayed refs so that root-tree updates are
+	 * consistent
+	 */
+	ret = btrfs_run_delayed_refs(trans, -1);
+	BUG_ON(ret);
 
 	if (root->commit_root == root->node)
 		goto commit_tree;
@@ -163,11 +183,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	ret = btrfs_update_root(trans, root->fs_info->tree_root,
 				&root->root_key, &root->root_item);
 	BUG_ON(ret);
+
 commit_tree:
 	ret = commit_tree_roots(trans, fs_info);
 	BUG_ON(ret);
-	ret = __commit_transaction(trans, root);
+	/*
+	 * Ensure that all committed roots are properly accounted in the
+	 * extent tree
+	 */
+	ret = btrfs_run_delayed_refs(trans, -1);
 	BUG_ON(ret);
+	btrfs_write_dirty_block_groups(trans);
+	__commit_transaction(trans, root);
+	if (ret < 0)
+		goto out;
 	write_ctree_super(trans);
 	btrfs_finish_extent_commit(trans, fs_info->extent_root,
 			           &fs_info->pinned_extents);
@@ -176,7 +205,8 @@ commit_tree:
 	root->commit_root = NULL;
 	fs_info->running_transaction = NULL;
 	fs_info->last_trans_committed = transid;
-	return 0;
+out:
+	return ret;
 }
 
 void btrfs_abort_transaction(struct btrfs_trans_handle *trans, int error)
diff --git a/transaction.h b/transaction.h
index 470ee3de..34060252 100644
--- a/transaction.h
+++ b/transaction.h
@@ -21,15 +21,20 @@
 
 #include "kerncompat.h"
 #include "ctree.h"
+#include "delayed-ref.h"
 
 struct btrfs_trans_handle {
 	struct btrfs_fs_info *fs_info;
 	u64 transid;
 	u64 alloc_exclude_start;
 	u64 alloc_exclude_nr;
+	bool reinit_extent_tree;
+	u64 delayed_ref_updates;
 	unsigned long blocks_reserved;
 	unsigned long blocks_used;
 	struct btrfs_block_group_cache *block_group;
+	struct btrfs_delayed_ref_root delayed_refs;
+
 };
 
 struct btrfs_trans_handle* btrfs_start_transaction(struct btrfs_root *root,
diff --git a/travis/build-dep-zstd b/travis/build-dep-zstd
index eae0be63..8996f4a2 100755
--- a/travis/build-dep-zstd
+++ b/travis/build-dep-zstd
@@ -1,7 +1,7 @@
 #!/bin/sh
 # download, build and install the zstd library
 
-version=1.3.3
+version=1.3.7
 dir=tmp-cached-zstd
 stamp="$dir/.last-build-zstd"
 here=`pwd`
diff --git a/utils-lib.c b/utils-lib.c
index 044f93fc..2ac421be 100644
--- a/utils-lib.c
+++ b/utils-lib.c
@@ -5,6 +5,8 @@
 #include <sys/ioctl.h>
 #include <ioctl.h>
 
+#include "utils.h"
+
 #if BTRFS_FLAT_INCLUDES
 #include "ctree.h"
 #else
diff --git a/utils.c b/utils.c
index 21de09d3..3a4bc92a 100644
--- a/utils.c
+++ b/utils.c
@@ -60,7 +60,7 @@
 
 static int btrfs_scan_done = 0;
 
-static int rand_seed_initlized = 0;
+static int rand_seed_initialized = 0;
 static unsigned short rand_seed[3];
 
 struct btrfs_config bconf;
@@ -357,7 +357,8 @@ int btrfs_prepare_device(int fd, const char *file, u64 *block_count_ret,
 				       ZERO_DEV_BYTES, block_count);
 
 	if (ret < 0) {
-		error("failed to zero device '%s': %s", file, strerror(-ret));
+		errno = -ret;
+		error("failed to zero device '%s': %m", file);
 		return 1;
 	}
 
@@ -502,6 +503,8 @@ int check_arg_type(const char *input)
 			return BTRFS_ARG_REG;
 
 		return BTRFS_ARG_UNKNOWN;
+	} else {
+		return -errno;
 	}
 
 	if (strlen(input) == (BTRFS_UUID_UNPARSED_SIZE - 1) &&
@@ -528,7 +531,8 @@ int get_btrfs_mount(const char *dev, char *mp, size_t mp_size)
 			error("not a block device: %s", dev);
 			ret = -EINVAL;
 		} else {
-			error("cannot check %s: %s", dev, strerror(-ret));
+			errno = -ret;
+			error("cannot check %s: %m", dev);
 		}
 		goto out;
 	}
@@ -540,7 +544,7 @@ int get_btrfs_mount(const char *dev, char *mp, size_t mp_size)
 		goto out;
 	}
 
-	ret = check_mounted_where(fd, dev, mp, mp_size, NULL);
+	ret = check_mounted_where(fd, dev, mp, mp_size, NULL, SBREAD_DEFAULT);
 	if (!ret) {
 		ret = -EINVAL;
 	} else { /* mounted, all good */
@@ -831,7 +835,7 @@ static int blk_file_in_dev_list(struct btrfs_fs_devices* fs_devices,
 /*
  * Resolve a pathname to a device mapper node to /dev/mapper/<name>
  * Returns NULL on invalid input or malloc failure; Other failures
- * will be handled by the caller using the input pathame.
+ * will be handled by the caller using the input pathname.
  */
 char *canonicalize_dm_name(const char *ptname)
 {
@@ -862,7 +866,7 @@ char *canonicalize_dm_name(const char *ptname)
  * Resolve a pathname to a canonical device node, e.g. /dev/sda1 or
  * to a device mapper pathname.
  * Returns NULL on invalid input or malloc failure; Other failures
- * will be handled by the caller using the input pathame.
+ * will be handled by the caller using the input pathname.
  */
 char *canonicalize_path(const char *path)
 {
@@ -901,14 +905,14 @@ int check_mounted(const char* file)
 		return -errno;
 	}
 
-	ret =  check_mounted_where(fd, file, NULL, 0, NULL);
+	ret =  check_mounted_where(fd, file, NULL, 0, NULL, SBREAD_DEFAULT);
 	close(fd);
 
 	return ret;
 }
 
 int check_mounted_where(int fd, const char *file, char *where, int size,
-			struct btrfs_fs_devices **fs_dev_ret)
+			struct btrfs_fs_devices **fs_dev_ret, unsigned sbflags)
 {
 	int ret;
 	u64 total_devs = 1;
@@ -919,7 +923,7 @@ int check_mounted_where(int fd, const char *file, char *where, int size,
 
 	/* scan the initial device */
 	ret = btrfs_scan_one_device(fd, file, &fs_devices_mnt,
-		    &total_devs, BTRFS_SUPER_INFO_OFFSET, SBREAD_DEFAULT);
+		    &total_devs, BTRFS_SUPER_INFO_OFFSET, sbflags);
 	is_btrfs = (ret >= 0);
 
 	/* scan other devices */
@@ -1490,8 +1494,10 @@ path:
 	if (fd < 0)
 		goto err;
 	ret = lookup_path_rootid(fd, &id);
-	if (ret)
-		error("failed to lookup root id: %s", strerror(-ret));
+	if (ret) {
+		errno = -ret;
+		error("failed to lookup root id: %m");
+	}
 	close(fd);
 	if (ret < 0)
 		goto err;
@@ -1675,7 +1681,7 @@ int get_fs_info(const char *path, struct btrfs_ioctl_fs_info_args *fi_args,
 			goto out;
 		}
 		ret = check_mounted_where(fd, path, mp, sizeof(mp),
-					  &fs_devices_mnt);
+					  &fs_devices_mnt, SBREAD_DEFAULT);
 		if (!ret) {
 			ret = -EINVAL;
 			goto out;
@@ -1780,8 +1786,7 @@ int get_fsid(const char *path, u8 *fsid, int silent)
 	if (fd < 0) {
 		ret = -errno;
 		if (!silent)
-			error("failed to open %s: %s", path,
-				strerror(-ret));
+			error("failed to open %s: %m", path);
 		goto out;
 	}
 
@@ -1998,7 +2003,8 @@ int btrfs_scan_devices(void)
 				&num_devices, BTRFS_SUPER_INFO_OFFSET,
 				SBREAD_DEFAULT);
 		if (ret) {
-			error("cannot scan %s: %s", path, strerror(-ret));
+			errno = -ret;
+			error("cannot scan %s: %m", path);
 			close (fd);
 			continue;
 		}
@@ -2481,7 +2487,7 @@ void init_rand_seed(u64 seed)
 		rand_seed[i] = (unsigned short)(seed ^ (unsigned short)(-1));
 		seed >>= 16;
 	}
-	rand_seed_initlized = 1;
+	rand_seed_initialized = 1;
 }
 
 static void __init_seed(void)
@@ -2490,7 +2496,7 @@ static void __init_seed(void)
 	int ret;
 	int fd;
 
-	if(rand_seed_initlized)
+	if(rand_seed_initialized)
 		return;
 	/* Use urandom as primary seed source. */
 	fd = open("/dev/urandom", O_RDONLY);
@@ -2508,14 +2514,14 @@ fallback:
 		rand_seed[1] = getppid() ^ (tv.tv_usec & 0xFFFF);
 		rand_seed[2] = (tv.tv_sec ^ tv.tv_usec) >> 16;
 	}
-	rand_seed_initlized = 1;
+	rand_seed_initialized = 1;
 }
 
 u32 rand_u32(void)
 {
 	__init_seed();
 	/*
-	 * Don't use nrand48, its range is [0,2^31) The highest bit will alwasy
+	 * Don't use nrand48, its range is [0,2^31) The highest bit will always
 	 * be 0.  Use jrand48 to include the highest bit.
 	 */
 	return (u32)jrand48(rand_seed);
diff --git a/utils.h b/utils.h
index 403de481..7c5eb798 100644
--- a/utils.h
+++ b/utils.h
@@ -29,6 +29,7 @@
 #include "sizes.h"
 #include "messages.h"
 #include "ioctl.h"
+#include "fsfeatures.h"
 
 #define BTRFS_SCAN_MOUNTED	(1ULL << 0)
 #define BTRFS_SCAN_LBLKID	(1ULL << 1)
@@ -92,7 +93,7 @@ char *canonicalize_dm_name(const char *ptname);
 char *canonicalize_path(const char *path);
 int check_mounted(const char *devicename);
 int check_mounted_where(int fd, const char *file, char *where, int size,
-			struct btrfs_fs_devices **fs_devices_mnt);
+		struct btrfs_fs_devices **fs_devices_mnt, unsigned sbflags);
 int btrfs_device_already_in_root(struct btrfs_root *root, int fd,
 				 int super_offset);
 
diff --git a/volumes.c b/volumes.c
index 9379d2f6..30090ce5 100644
--- a/volumes.c
+++ b/volumes.c
@@ -254,7 +254,7 @@ again:
 		device = list_entry(fs_devices->devices.next,
 				    struct btrfs_device, dev_list);
 		if (device->fd != -1) {
-			if (fsync(device->fd) == -1) {
+			if (device->writeable && fsync(device->fd) == -1) {
 				warning("fsync on device %llu failed: %m",
 					device->devid);
 				ret = -errno;
@@ -561,7 +561,8 @@ static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
 	key.type = BTRFS_DEV_EXTENT_KEY;
 	ret = btrfs_insert_empty_item(trans, root, path, &key,
 				      sizeof(*extent));
-	BUG_ON(ret);
+	if (ret < 0)
+		goto err;
 
 	leaf = path->nodes[0];
 	extent = btrfs_item_ptr(leaf, path->slots[0],
@@ -1924,9 +1925,13 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
 
 	}
 	ret = insert_cache_extent(&map_tree->cache_tree, &map->ce);
-	BUG_ON(ret);
+	if (ret < 0) {
+		errno = -ret;
+		error("failed to add chunk map start=%llu len=%llu: %d (%m)",
+		      map->ce.start, map->ce.size, ret);
+	}
 
-	return 0;
+	return ret;
 }
 
 static int fill_device_from_item(struct extent_buffer *leaf,
@@ -2158,13 +2163,15 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
 			dev_item = btrfs_item_ptr(leaf, slot,
 						  struct btrfs_dev_item);
 			ret = read_one_dev(fs_info, leaf, dev_item);
-			BUG_ON(ret);
+			if (ret < 0)
+				goto error;
 		} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
 			struct btrfs_chunk *chunk;
 			chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
 			ret = read_one_chunk(fs_info, &found_key, leaf, chunk,
 					     slot);
-			BUG_ON(ret);
+			if (ret < 0)
+				goto error;
 		}
 		path->slots[0]++;
 	}
@@ -2442,8 +2449,8 @@ int btrfs_fix_device_size(struct btrfs_fs_info *fs_info,
 	trans = btrfs_start_transaction(chunk_root, 1);
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
-		error("error starting transaction: %d (%s)",
-		      ret, strerror(-ret));
+		errno = -ret;
+		error("error starting transaction: %d (%m)", ret);
 		return ret;
 	}
 
@@ -2455,8 +2462,8 @@ int btrfs_fix_device_size(struct btrfs_fs_info *fs_info,
 		goto err;
 	}
 	if (ret < 0) {
-		error("failed to search chunk root: %d (%s)",
-			ret, strerror(-ret));
+		errno = -ret;
+		error("failed to search chunk root: %d (%m)", ret);
 		goto err;
 	}
 	di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dev_item);
@@ -2464,8 +2471,8 @@ int btrfs_fix_device_size(struct btrfs_fs_info *fs_info,
 	btrfs_mark_buffer_dirty(path.nodes[0]);
 	ret = btrfs_commit_transaction(trans, chunk_root);
 	if (ret < 0) {
-		error("failed to commit current transaction: %d (%s)",
-			ret, strerror(-ret));
+		errno = -ret;
+		error("failed to commit current transaction: %d (%m)", ret);
 		btrfs_release_path(&path);
 		return ret;
 	}
@@ -2518,14 +2525,14 @@ int btrfs_fix_super_size(struct btrfs_fs_info *fs_info)
 	trans = btrfs_start_transaction(fs_info->tree_root, 1);
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
-		error("error starting transaction:  %d (%s)",
-		      ret, strerror(-ret));
+		errno = -ret;
+		error("error starting transaction: %d (%m)", ret);
 		return ret;
 	}
 	ret = btrfs_commit_transaction(trans, fs_info->tree_root);
 	if (ret < 0) {
-		error("failed to commit current transaction: %d (%s)",
-			ret, strerror(-ret));
+		errno = -ret;
+		error("failed to commit current transaction: %d (%m)", ret);
 		return ret;
 	}
 	printf("Fixed super total bytes, old size: %llu new size: %llu\n",