summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas D Steeves <nsteeves@gmail.com>2016-04-23 00:41:30 +0100
committerNicholas D Steeves <nsteeves@gmail.com>2016-04-23 00:41:30 +0100
commitcec572daccafa1e912cbed363df6f84687778c6f (patch)
tree7d99ab9f73d25c1ed8eaf6393f6374edf5316b03
btrfs-progs (4.4.1-1.1) unstable; urgency=medium
* Non-maintainer upload. * New upstream release. * Rename package to btrfs-progs (Closes: #780081) * Update standards version to 3.9.7 (no changes needed). * debian/control: Add "Breaks" per Gianfranco Costamagna's suggestion * Change lintian override to reflect package rename * Switch from using postinst and postrm to using triggers per Christian Seiler's recommendation. # imported from the archive
-rw-r--r--.gitignore75
-rw-r--r--Android.mk111
-rw-r--r--COPYING341
-rw-r--r--Documentation/Makefile.in133
-rw-r--r--Documentation/asciidoc.conf48
-rw-r--r--Documentation/btrfs-balance.asciidoc217
-rw-r--r--Documentation/btrfs-check.asciidoc60
-rw-r--r--Documentation/btrfs-convert.asciidoc98
-rw-r--r--Documentation/btrfs-debug-tree.asciidoc38
-rw-r--r--Documentation/btrfs-device.asciidoc145
-rw-r--r--Documentation/btrfs-filesystem.asciidoc338
-rw-r--r--Documentation/btrfs-find-root.asciidoc35
-rw-r--r--Documentation/btrfs-image.asciidoc66
-rw-r--r--Documentation/btrfs-inspect-internal.asciidoc84
-rw-r--r--Documentation/btrfs-man5.asciidoc406
-rw-r--r--Documentation/btrfs-map-logical.asciidoc37
-rw-r--r--Documentation/btrfs-property.asciidoc73
-rw-r--r--Documentation/btrfs-qgroup.asciidoc145
-rw-r--r--Documentation/btrfs-quota.asciidoc56
-rw-r--r--Documentation/btrfs-receive.asciidoc67
-rw-r--r--Documentation/btrfs-replace.asciidoc76
-rw-r--r--Documentation/btrfs-rescue.asciidoc90
-rw-r--r--Documentation/btrfs-restore.asciidoc91
-rw-r--r--Documentation/btrfs-scrub.asciidoc94
-rw-r--r--Documentation/btrfs-select-super.asciidoc29
-rw-r--r--Documentation/btrfs-send.asciidoc63
-rw-r--r--Documentation/btrfs-show-super.asciidoc54
-rw-r--r--Documentation/btrfs-subvolume.asciidoc182
-rw-r--r--Documentation/btrfs.asciidoc127
-rw-r--r--Documentation/btrfstune.asciidoc75
-rw-r--r--Documentation/fsck.btrfs.asciidoc51
-rw-r--r--Documentation/manpage-base.xsl35
-rw-r--r--Documentation/manpage-bold-literal.xsl17
-rw-r--r--Documentation/manpage-normal.xsl13
-rw-r--r--Documentation/mkfs.btrfs.asciidoc341
-rw-r--r--INSTALL79
-rw-r--r--Makefile.extrawarn90
-rw-r--r--Makefile.in399
-rw-r--r--README.md41
-rw-r--r--androidcompat.h28
-rwxr-xr-xautogen.sh99
-rw-r--r--backref.c1651
-rw-r--r--backref.h73
-rwxr-xr-xbcp159
-rw-r--r--bitops.h224
-rw-r--r--btrfs-calc-size.c506
-rw-r--r--btrfs-completion152
-rw-r--r--btrfs-convert.c3206
-rw-r--r--btrfs-corrupt-block.c1310
-rw-r--r--btrfs-crc.c90
-rw-r--r--btrfs-debug-tree.c462
-rwxr-xr-xbtrfs-debugfs296
-rw-r--r--btrfs-find-root.c222
-rw-r--r--btrfs-fragments.c452
-rw-r--r--btrfs-image.c2880
-rw-r--r--btrfs-list.c1914
-rw-r--r--btrfs-list.h178
-rw-r--r--btrfs-map-logical.c364
-rw-r--r--btrfs-select-super.c108
-rw-r--r--btrfs-show-super.c542
-rw-r--r--btrfs-zero-log.c76
-rw-r--r--btrfs.c248
-rw-r--r--btrfsck.h203
-rw-r--r--btrfstune.c554
-rw-r--r--chunk-recover.c2407
-rw-r--r--cmds-balance.c857
-rw-r--r--cmds-check.c9844
-rw-r--r--cmds-device.c543
-rw-r--r--cmds-fi-usage.c1036
-rw-r--r--cmds-fi-usage.h57
-rw-r--r--cmds-filesystem.c1295
-rw-r--r--cmds-inspect.c644
-rw-r--r--cmds-property.c478
-rw-r--r--cmds-qgroup.c492
-rw-r--r--cmds-quota.c189
-rw-r--r--cmds-receive.c1306
-rw-r--r--cmds-replace.c561
-rw-r--r--cmds-rescue.c219
-rw-r--r--cmds-restore.c1612
-rw-r--r--cmds-scrub.c1803
-rw-r--r--cmds-send.c741
-rw-r--r--cmds-subvolume.c1347
-rw-r--r--commands.h134
-rw-r--r--config.h.in129
-rwxr-xr-xconfig/config.guess1568
-rwxr-xr-xconfig/config.sub1788
-rwxr-xr-xconfig/install-sh527
-rwxr-xr-xconfigure7246
-rw-r--r--configure.ac174
-rw-r--r--crc32c.c222
-rw-r--r--crc32c.h33
-rw-r--r--ctree.c2940
-rw-r--r--ctree.h2570
-rw-r--r--debian/btrfs-progs-udeb.install2
-rw-r--r--debian/btrfs-progs.install3
-rw-r--r--debian/btrfs-progs.lintian-overrides4
-rw-r--r--debian/btrfs-progs.triggers1
-rw-r--r--debian/changelog672
-rw-r--r--debian/compat1
-rw-r--r--debian/control65
-rw-r--r--debian/copyright41
-rw-r--r--debian/local/btrfs-lvm.udev6
-rw-r--r--debian/local/btrfs.hook29
-rw-r--r--debian/local/btrfs.local-premount23
-rw-r--r--debian/local/btrfs.udev7
-rwxr-xr-xdebian/rules32
-rw-r--r--debian/source/format1
-rw-r--r--debian/source/options2
-rw-r--r--debian/watch2
-rw-r--r--dir-item.c351
-rw-r--r--dir-test.c517
-rw-r--r--disk-io.c1750
-rw-r--r--disk-io.h166
-rw-r--r--extent-cache.c341
-rw-r--r--extent-cache.h121
-rw-r--r--extent-tree.c4122
-rw-r--r--extent_io.c892
-rw-r--r--extent_io.h159
-rw-r--r--file-item.c486
-rw-r--r--file.c162
-rw-r--r--find-root.c150
-rw-r--r--find-root.h87
-rw-r--r--free-space-cache.c878
-rw-r--r--free-space-cache.h62
-rw-r--r--free-space-tree.c273
-rw-r--r--free-space-tree.h25
-rwxr-xr-xfsck.btrfs39
-rw-r--r--hash.h28
-rw-r--r--hasher.c42
-rw-r--r--help.c271
-rw-r--r--inode-item.c460
-rw-r--r--inode-map.c100
-rw-r--r--inode.c536
-rw-r--r--interval_tree_generic.h193
-rw-r--r--ioctl-test.c37
-rw-r--r--ioctl.h716
-rw-r--r--kerncompat.h374
-rw-r--r--library-test.c69
-rw-r--r--list.h486
-rw-r--r--list_sort.c144
-rw-r--r--list_sort.h14
-rw-r--r--mkfs.c1844
-rw-r--r--print-tree.c1137
-rw-r--r--print-tree.h27
-rw-r--r--props.c198
-rw-r--r--props.h43
-rw-r--r--qgroup-verify.c1179
-rw-r--r--qgroup-verify.h30
-rw-r--r--qgroup.c1390
-rw-r--r--qgroup.h101
-rw-r--r--quick-test.c221
-rw-r--r--radix-tree.c849
-rw-r--r--radix-tree.h97
-rw-r--r--raid6.c101
-rw-r--r--random-test.c433
-rw-r--r--rbtree-utils.c82
-rw-r--r--rbtree-utils.h53
-rw-r--r--rbtree.c548
-rw-r--r--rbtree.h118
-rw-r--r--rbtree_augmented.h249
-rw-r--r--repair.c50
-rw-r--r--repair.h34
-rw-r--r--root-tree.c192
-rw-r--r--send-stream.c499
-rw-r--r--send-stream.h80
-rw-r--r--send-test.c447
-rw-r--r--send-utils.c771
-rw-r--r--send-utils.h108
-rw-r--r--send.h147
-rwxr-xr-xshow-blocks325
-rw-r--r--string-table.c151
-rw-r--r--string-table.h34
-rw-r--r--super-recover.c336
-rw-r--r--task-utils.c143
-rw-r--r--task-utils.h47
-rw-r--r--tests/README.md136
-rwxr-xr-xtests/clean-tests.sh24
-rw-r--r--tests/common240
-rw-r--r--tests/convert-tests.sh86
-rwxr-xr-xtests/fsck-tests.sh68
-rw-r--r--tests/fsck-tests/001-bad-file-extent-bytenr/default_case.imgbin0 -> 4096 bytes
-rw-r--r--tests/fsck-tests/002-bad-transid/default_case.imgbin0 -> 4096 bytes
-rw-r--r--tests/fsck-tests/003-shift-offsets/default_case.imgbin0 -> 4096 bytes
-rw-r--r--tests/fsck-tests/004-no-dir-index/default_case.imgbin0 -> 4096 bytes
-rw-r--r--tests/fsck-tests/005-bad-item-offset/default_case.imgbin0 -> 398336 bytes
-rw-r--r--tests/fsck-tests/006-bad-root-items/default_case.tar.xzbin0 -> 24980 bytes
-rw-r--r--tests/fsck-tests/006-bad-root-items/skinny_case.tar.xzbin0 -> 26520 bytes
-rwxr-xr-xtests/fsck-tests/006-bad-root-items/test.sh17
-rw-r--r--tests/fsck-tests/007-bad-offset-snapshots/default_case.imgbin0 -> 249856 bytes
-rw-r--r--tests/fsck-tests/008-bad-dir-index-name/default_case.imgbin0 -> 4096 bytes
-rw-r--r--tests/fsck-tests/009-no-dir-item-or-index/default_case.imgbin0 -> 4953088 bytes
-rw-r--r--tests/fsck-tests/010-no-rootdir-inode-item/default_case.imgbin0 -> 398336 bytes
-rw-r--r--tests/fsck-tests/011-no-inode-item/default_case.imgbin0 -> 398336 bytes
-rw-r--r--tests/fsck-tests/012-leaf-corruption/no_data_extent.tar.xzbin0 -> 130260 bytes
-rwxr-xr-xtests/fsck-tests/012-leaf-corruption/test.sh117
-rwxr-xr-xtests/fsck-tests/013-extent-tree-rebuild/test.sh44
-rw-r--r--tests/fsck-tests/014-no-extent-info/default_case.imgbin0 -> 4096 bytes
-rw-r--r--tests/fsck-tests/015-check-bad-memory-access/bko-97171-btrfs-image.raw.txt254
-rw-r--r--tests/fsck-tests/015-check-bad-memory-access/bko-97171-btrfs-image.raw.xzbin0 -> 6748 bytes
-rw-r--r--tests/fsck-tests/016-wrong-inode-nbytes/default_case.img.xzbin0 -> 1996 bytes
-rw-r--r--tests/fsck-tests/017-missing-all-file-extent/default_case.img.xzbin0 -> 1104 bytes
-rw-r--r--tests/fsck-tests/018-leaf-crossing-stripes/default_case.raw.xzbin0 -> 105064 bytes
-rwxr-xr-xtests/fsck-tests/018-leaf-crossing-stripes/test.sh12
-rw-r--r--tests/fsck-tests/019-non-skinny-false-alert/default_case.img.xzbin0 -> 15236 bytes
-rwxr-xr-xtests/fsck-tests/019-non-skinny-false-alert/test.sh23
-rwxr-xr-xtests/fuzz-tests.sh41
-rwxr-xr-xtests/fuzz-tests/001-simple-unmounted/test.sh20
-rw-r--r--tests/fuzz-tests/images/bad-superblock-1.raw.xzbin0 -> 228 bytes
-rw-r--r--tests/fuzz-tests/images/bad-superblock-2.raw.xzbin0 -> 228 bytes
-rw-r--r--tests/fuzz-tests/images/bad-superblock-3.raw.xzbin0 -> 228 bytes
-rw-r--r--tests/fuzz-tests/images/bad-superblock.txt17
-rw-r--r--tests/fuzz-tests/images/bko-104131-fsck-oob-read.raw.xzbin0 -> 192 bytes
-rw-r--r--tests/fuzz-tests/images/bko-104131-fsck-oob-read.txt31
-rw-r--r--tests/fuzz-tests/images/bko-104141-fsck-exception.raw.xzbin0 -> 196 bytes
-rw-r--r--tests/fuzz-tests/images/bko-104141-fsck-exception.txt9
-rw-r--r--tests/fuzz-tests/images/bko-97191-btrfs-image.raw.txt137
-rw-r--r--tests/fuzz-tests/images/bko-97191-btrfs-image.raw.xzbin0 -> 7076 bytes
-rw-r--r--tests/fuzz-tests/images/bko-97271-btrfs-image.raw.txt54
-rw-r--r--tests/fuzz-tests/images/bko-97271-btrfs-image.raw.xzbin0 -> 6580 bytes
-rw-r--r--tests/fuzz-tests/images/sys-array-num-stripes-0.raw.txt30
-rw-r--r--tests/fuzz-tests/images/sys-array-num-stripes-0.raw.xzbin0 -> 8364 bytes
-rwxr-xr-xtests/misc-tests.sh47
-rwxr-xr-xtests/misc-tests/001-btrfstune-features/test.sh46
-rwxr-xr-xtests/misc-tests/002-uuid-rewrite/test.sh66
-rwxr-xr-xtests/misc-tests/003-zero-log/test.sh52
-rwxr-xr-xtests/misc-tests/004-shrink-fs/test.sh72
-rwxr-xr-xtests/misc-tests/005-convert-progress-thread-crash/test.sh15
-rwxr-xr-xtests/misc-tests/006-image-on-missing-device/test.sh78
-rwxr-xr-xtests/misc-tests/007-subvolume-sync/test.sh32
-rwxr-xr-xtests/misc-tests/008-leaf-crossing-stripes/test.sh25
-rwxr-xr-xtests/misc-tests/009-subvolume-sync-must-wait/test.sh52
-rwxr-xr-xtests/misc-tests/010-convert-delete-ext2-subvol/test.sh26
-rwxr-xr-xtests/misc-tests/011-delete-missing-device/test.sh83
-rw-r--r--tests/misc-tests/012-find-root-no-result/first_meta_chunk.btrfs-imagebin0 -> 4096 bytes
-rwxr-xr-xtests/misc-tests/012-find-root-no-result/test.sh24
-rwxr-xr-xtests/mkfs-tests.sh44
-rwxr-xr-xtests/mkfs-tests/001-basic-profiles/test.sh91
-rwxr-xr-xtests/mkfs-tests/002-no-force-mixed-on-small-volume/test.sh13
-rwxr-xr-xtests/mkfs-tests/003-mixed-with-wrong-nodesize/test.sh12
-rwxr-xr-xtests/mkfs-tests/004-rootdir-keeps-size/test.sh31
-rwxr-xr-xtests/mkfs-tests/005-long-device-name-for-ssd/test.sh40
-rw-r--r--tests/mkfs-tests/006-partitioned-loopdev/partition-1g-1gbin0 -> 512 bytes
-rwxr-xr-xtests/mkfs-tests/006-partitioned-loopdev/test.sh27
-rwxr-xr-xtests/mkfs-tests/007-mix-nodesize-sectorsize/test.sh36
-rwxr-xr-xtests/mkfs-tests/008-secorsize-nodesize-combination/test.sh50
-rw-r--r--transaction.h64
-rw-r--r--ulist.c253
-rw-r--r--ulist.h81
-rw-r--r--utils-lib.c40
-rw-r--r--utils.c3150
-rw-r--r--utils.h338
-rw-r--r--uuid-tree.c105
-rw-r--r--version.h.in14
-rwxr-xr-xversion.sh37
-rw-r--r--volumes.c2133
-rw-r--r--volumes.h229
256 files changed, 99051 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..a27cb0d9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,75 @@
+*.o
+*.static.o
+*.o.d
+tags
+.cc-defines.h
+version.h
+version
+man/*.gz
+Documentation/*.gz
+Documentation/*.html
+btrfs
+btrfs.static
+btrfs-debug-tree
+btrfs-map-logical
+btrfs-fragments
+btrfsck
+calc-size
+ioctl-test
+dir-test
+send-test
+quick-test
+find-root
+mkfs.btrfs
+mkfs.btrfs.static
+repair
+restore
+btrfs-convert
+btrfs-find-root
+btrfs-find-root.static
+btrfs-image
+btrfs-show-super
+btrfs-zero-log
+btrfs-corrupt-block
+btrfs-select-super
+btrfs-calc-size
+btrfstune
+libbtrfs.a
+libbtrfs.so
+libbtrfs.so.0
+libbtrfs.so.0.1
+library-test
+library-test-static
+
+/tests/*-tests-results.txt
+/tests/test.img
+
+aclocal.m4
+autom4te.cache
+compile
+config.cache
+config.guess
+config.h
+config.h.in
+config.h.in~
+config.log
+config.rpath
+config.status
+config.sub
+config/ltmain.sh
+config/py-compile
+config/test-driver
+configure
+cscope.out
+depcomp
+libtool
+m4/*.m4
+Makefile
+Documentation/Makefile
+missing
+mkinstalldirs
+stamp-h
+stamp-h.in
+stamp-h1
+config/*
+
diff --git a/Android.mk b/Android.mk
new file mode 100644
index 00000000..fe3209b6
--- /dev/null
+++ b/Android.mk
@@ -0,0 +1,111 @@
+LOCAL_PATH:= $(call my-dir)
+
+#include $(call all-subdir-makefiles)
+
+CFLAGS := -g -O1 -Wall -D_FORTIFY_SOURCE=2 -include config.h \
+ -DBTRFS_FLAT_INCLUDES -D_XOPEN_SOURCE=700 -fno-strict-aliasing -fPIC
+
+LDFLAGS := -static -rdynamic
+
+LIBS := -luuid -lblkid -lz -llzo2 -L. -lpthread
+LIBBTRFS_LIBS := $(LIBS)
+
+STATIC_CFLAGS := $(CFLAGS) -ffunction-sections -fdata-sections
+STATIC_LDFLAGS := -static -Wl,--gc-sections
+STATIC_LIBS := -luuid -lblkid -luuid -lz -llzo2 -L. -pthread
+
+btrfs_shared_libraries := libext2_uuid \
+ libext2_blkid
+
+objects := ctree.c disk-io.c radix-tree.c extent-tree.c print-tree.c \
+ root-tree.c dir-item.c file-item.c inode-item.c inode-map.c \
+ extent-cache.c extent_io.c volumes.c utils.c repair.c \
+ qgroup.c raid6.c free-space-cache.c list_sort.c props.c \
+ ulist.c qgroup-verify.c backref.c string-table.c task-utils.c \
+ inode.c file.c find-root.c
+cmds_objects := cmds-subvolume.c cmds-filesystem.c cmds-device.c cmds-scrub.c \
+ cmds-inspect.c cmds-balance.c cmds-send.c cmds-receive.c \
+ cmds-quota.c cmds-qgroup.c cmds-replace.c cmds-check.c \
+ cmds-restore.c cmds-rescue.c chunk-recover.c super-recover.c \
+ cmds-property.c cmds-fi-usage.c
+libbtrfs_objects := send-stream.c send-utils.c rbtree.c btrfs-list.c crc32c.c \
+ uuid-tree.c utils-lib.c rbtree-utils.c
+libbtrfs_headers := send-stream.h send-utils.h send.h rbtree.h btrfs-list.h \
+ crc32c.h list.h kerncompat.h radix-tree.h extent-cache.h \
+ extent_io.h ioctl.h ctree.h btrfsck.h version.h
+TESTS := fsck-tests.sh convert-tests.sh
+blkid_objects := partition/ superblocks/ topology/
+
+
+# external/e2fsprogs/lib is needed for uuid/uuid.h
+common_C_INCLUDES := $(LOCAL_PATH) external/e2fsprogs/lib/ external/lzo/include/ external/zlib/
+
+#----------------------------------------------------------
+include $(CLEAR_VARS)
+LOCAL_SRC_FILES := $(libbtrfs_objects)
+LOCAL_CFLAGS := $(STATIC_CFLAGS)
+LOCAL_MODULE := libbtrfs
+LOCAL_C_INCLUDES := $(common_C_INCLUDES)
+include $(BUILD_STATIC_LIBRARY)
+
+#----------------------------------------------------------
+include $(CLEAR_VARS)
+LOCAL_MODULE := btrfs
+#LOCAL_FORCE_STATIC_EXECUTABLE := true
+LOCAL_SRC_FILES := \
+ $(objects) \
+ $(cmds_objects) \
+ btrfs.c \
+ help.c \
+
+LOCAL_C_INCLUDES := $(common_C_INCLUDES)
+LOCAL_CFLAGS := $(STATIC_CFLAGS)
+#LOCAL_LDLIBS := $(LIBBTRFS_LIBS)
+#LOCAL_LDFLAGS := $(STATIC_LDFLAGS)
+LOCAL_SHARED_LIBRARIES := $(btrfs_shared_libraries)
+LOCAL_STATIC_LIBRARIES := libbtrfs liblzo-static libz
+LOCAL_SYSTEM_SHARED_LIBRARIES := libc libcutils
+
+LOCAL_EXPORT_C_INCLUDES := $(common_C_INCLUDES)
+#LOCAL_MODULE_TAGS := optional
+include $(BUILD_EXECUTABLE)
+
+#----------------------------------------------------------
+include $(CLEAR_VARS)
+LOCAL_MODULE := mkfs.btrfs
+LOCAL_SRC_FILES := \
+ $(objects) \
+ mkfs.c
+
+LOCAL_C_INCLUDES := $(common_C_INCLUDES)
+LOCAL_CFLAGS := $(STATIC_CFLAGS)
+#LOCAL_LDLIBS := $(LIBBTRFS_LIBS)
+#LOCAL_LDFLAGS := $(STATIC_LDFLAGS)
+LOCAL_SHARED_LIBRARIES := $(btrfs_shared_libraries)
+LOCAL_STATIC_LIBRARIES := libbtrfs liblzo-static
+LOCAL_SYSTEM_SHARED_LIBRARIES := libc libcutils
+
+LOCAL_EXPORT_C_INCLUDES := $(common_C_INCLUDES)
+#LOCAL_MODULE_TAGS := optional
+include $(BUILD_EXECUTABLE)
+
+#---------------------------------------------------------------
+include $(CLEAR_VARS)
+LOCAL_MODULE := btrfstune
+LOCAL_SRC_FILES := \
+ $(objects) \
+ btrfstune.c
+
+LOCAL_C_INCLUDES := $(common_C_INCLUDES)
+LOCAL_CFLAGS := $(STATIC_CFLAGS)
+LOCAL_SHARED_LIBRARIES := $(btrfs_shared_libraries)
+#LOCAL_LDLIBS := $(LIBBTRFS_LIBS)
+#LOCAL_LDFLAGS := $(STATIC_LDFLAGS)
+LOCAL_SHARED_LIBRARIES := $(btrfs_shared_libraries)
+LOCAL_STATIC_LIBRARIES := libbtrfs liblzo-static
+LOCAL_SYSTEM_SHARED_LIBRARIES := libc libcutils
+
+LOCAL_EXPORT_C_INCLUDES := $(common_C_INCLUDES)
+LOCAL_MODULE_TAGS := optional
+include $(BUILD_EXECUTABLE)
+#--------------------------------------------------------------
diff --git a/COPYING b/COPYING
new file mode 100644
index 00000000..10828e06
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,341 @@
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/Documentation/Makefile.in b/Documentation/Makefile.in
new file mode 100644
index 00000000..f046abd5
--- /dev/null
+++ b/Documentation/Makefile.in
@@ -0,0 +1,133 @@
+# Guard against environment variables
+MAN8_TXT =
+
+# Top level commands
+MAN8_TXT += btrfs.asciidoc
+MAN8_TXT += btrfs-convert.asciidoc
+MAN8_TXT += btrfs-debug-tree.asciidoc
+MAN8_TXT += btrfs-find-root.asciidoc
+MAN8_TXT += btrfs-image.asciidoc
+MAN8_TXT += btrfs-map-logical.asciidoc
+MAN8_TXT += btrfs-show-super.asciidoc
+MAN8_TXT += btrfs-select-super.asciidoc
+MAN8_TXT += btrfstune.asciidoc
+MAN8_TXT += fsck.btrfs.asciidoc
+MAN8_TXT += mkfs.btrfs.asciidoc
+
+# Sub commands for btrfs
+MAN8_TXT += btrfs-subvolume.asciidoc
+MAN8_TXT += btrfs-filesystem.asciidoc
+MAN8_TXT += btrfs-balance.asciidoc
+MAN8_TXT += btrfs-device.asciidoc
+MAN8_TXT += btrfs-scrub.asciidoc
+MAN8_TXT += btrfs-check.asciidoc
+MAN8_TXT += btrfs-rescue.asciidoc
+MAN8_TXT += btrfs-inspect-internal.asciidoc
+MAN8_TXT += btrfs-send.asciidoc
+MAN8_TXT += btrfs-receive.asciidoc
+MAN8_TXT += btrfs-quota.asciidoc
+MAN8_TXT += btrfs-qgroup.asciidoc
+MAN8_TXT += btrfs-replace.asciidoc
+MAN8_TXT += btrfs-restore.asciidoc
+MAN8_TXT += btrfs-property.asciidoc
+
+# Category 5 manual page
+MAN5_TXT += btrfs-man5.asciidoc
+
+MAN_TXT = $(MAN8_TXT) $(MAN5_TXT)
+MAN_XML = $(patsubst %.asciidoc,%.xml,$(MAN_TXT))
+MAN_HTML = $(patsubst %.asciidoc,%.html,$(MAN_TXT))
+
+DOC_MAN5 = $(patsubst %.asciidoc,%.5,$(MAN5_TXT))
+GZ_MAN5 = $(patsubst %.asciidoc,%.5.gz,$(MAN5_TXT))
+
+DOC_MAN8 = $(patsubst %.asciidoc,%.8,$(MAN8_TXT))
+GZ_MAN8 = $(patsubst %.asciidoc,%.8.gz,$(MAN8_TXT))
+
+mandir ?= $(prefix)/share/man
+man8dir = $(mandir)/man8
+man5dir = $(mandir)/man5
+
+ASCIIDOC = @ASCIIDOC@
+ASCIIDOC_EXTRA =
+MANPAGE_XSL = manpage-normal.xsl
+XMLTO = @XMLTO@
+XMLTO_EXTRA =
+XMLTO_EXTRA = -m manpage-bold-literal.xsl
+GZIPCMD = @GZIP@
+INSTALL = @INSTALL@
+RM = @RM@
+RMDIR = @RMDIR@
+LN_S = @LN_S@
+MV = @MV@
+SED = @SED@
+BTRFS_VERSION = $(shell $(SED) -n 's/.*PACKAGE_VERSION "\(.*\)"/\1/p'\
+ ../config.h)
+
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+ifndef V
+ QUIET_RM = @
+ QUIET_ASCIIDOC = @echo " [ASCII] $@";
+ QUIET_XMLTO = @echo " [XMLTO] $@";
+ QUIET_GZIP = @echo " [GZ] $@";
+ QUIET_STDERR = 2> /dev/null
+ QUIET_SUBDIR0 = +@subdir=
+ QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \
+ $(MAKE) $(PRINT_DIR) -C $$subdir
+ export V
+endif
+endif
+
+all: man
+man: man5 man8
+man5: $(GZ_MAN5)
+man8: $(GZ_MAN8)
+html: $(MAN_HTML)
+
+install: install-man
+
+install-man: man
+ $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir)
+ $(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
+ $(INSTALL) -m 644 $(GZ_MAN5) $(DESTDIR)$(man5dir)
+ # the source file name of btrfs.5 clashes with section 8 page, but we
+ # want to keep the code generic
+ $(MV) $(DESTDIR)$(man5dir)/btrfs-man5.5.gz $(DESTDIR)$(man5dir)/btrfs.5.gz
+ $(INSTALL) -m 644 $(GZ_MAN8) $(DESTDIR)$(man8dir)
+ $(LN_S) -f btrfs-check.8.gz $(DESTDIR)$(man8dir)/btrfsck.8.gz
+ $(LN_S) -f btrfs-rescue.8.gz $(DESTDIR)$(man8dir)/btrfs-zero-log.8.gz
+
+uninstall:
+ cd $(DESTDIR)$(man8dir); rm -f btrfs-check.8.gz $(GZ_MAN8)
+ $(RMDIR) -p --ignore-fail-on-non-empty $(DESTDIR)$(man8dir)
+
+clean:
+ $(QUIET_RM)$(RM) -f *.xml *.xml+ *.5 *.5.gz *.8 *.8.gz *.html
+
+%.5.gz : %.5
+ $(QUIET_GZIP)$(GZIPCMD) -n -c $< > $@
+
+%.8.gz : %.8
+ $(QUIET_GZIP)$(GZIPCMD) -n -c $< > $@
+
+%.5 : %.xml
+ $(QUIET_XMLTO)$(RM) -f $@ && \
+ $(XMLTO) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+
+%.8 : %.xml
+ $(QUIET_XMLTO)$(RM) -f $@ && \
+ $(XMLTO) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+
+%.xml : %.asciidoc asciidoc.conf
+ $(QUIET_ASCIIDOC)$(RM) -f $@+ $@ && \
+ $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
+ $(ASCIIDOC_EXTRA) -abtrfs_version=$(BTRFS_VERSION) \
+ -o $@+ $< && \
+ $(MV) $@+ $@
+
+%.html : %.asciidoc asciidoc.conf
+ $(QUIET_ASCIIDOC)$(RM) -f $@+ $@ && \
+ $(ASCIIDOC) -b html -d article -f asciidoc.conf \
+ $(ASCIIDOC_EXTRA) -abtrfs_version=$(BTRFS_VERSION) \
+ -o $@+ $< && \
+ $(MV) $@+ $@
diff --git a/Documentation/asciidoc.conf b/Documentation/asciidoc.conf
new file mode 100644
index 00000000..1ea74591
--- /dev/null
+++ b/Documentation/asciidoc.conf
@@ -0,0 +1,48 @@
+## linkbtrfs: macro
+#
+# Usage: linkbtrfs:command[manpage-section]
+#
+# Note, {0} is the manpage section, while {target} is the command.
+#
+# Show Btrfslink as: <command>(<section>); if section is defined, else just show
+# the command.
+
+[macros]
+(?su)[\\]?(?P<name>linkbtrfs):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
+
+[tags]
+bracket-emphasis={1?[{1}]}<emphasis>&lt;|&gt;</emphasis>
+
+[quotes]
+&lt;|&gt;=#bracket-emphasis
+
+[attributes]
+asterisk=&#42;
+plus=&#43;
+caret=&#94;
+startsb=&#91;
+endsb=&#93;
+backslash=&#92;
+tilde=&#126;
+apostrophe=&#39;
+backtick=&#96;
+litdd=&#45;&#45;
+
+ifdef::doctype-manpage[]
+ifdef::backend-docbook[]
+[header]
+template::[header-declarations]
+<refentry>
+<refmeta>
+<refentrytitle>{mantitle}</refentrytitle>
+<manvolnum>{manvolnum}</manvolnum>
+<refmiscinfo class="source">Btrfs</refmiscinfo>
+<refmiscinfo class="version">{btrfs_version}</refmiscinfo>
+<refmiscinfo class="manual">Btrfs Manual</refmiscinfo>
+</refmeta>
+<refnamediv>
+ <refname>{manname}</refname>
+ <refpurpose>{manpurpose}</refpurpose>
+</refnamediv>
+endif::backend-docbook[]
+endif::doctype-manpage[]
diff --git a/Documentation/btrfs-balance.asciidoc b/Documentation/btrfs-balance.asciidoc
new file mode 100644
index 00000000..c8407419
--- /dev/null
+++ b/Documentation/btrfs-balance.asciidoc
@@ -0,0 +1,217 @@
+btrfs-balance(8)
+================
+
+NAME
+----
+btrfs-balance - balance block groups on a btrfs filesystem
+
+SYNOPSIS
+--------
+*btrfs balance* <subcommand> <args>
+
+DESCRIPTION
+-----------
+The primary purpose of the balance feature is to spread block groups accross
+all devices so they match constraints defined by the respective profiles. See
+`mkfs.btrfs`(8) section 'PROFILES' for more details.
+The scope of the balancing process can be further tuned by use of filters that
+can select the block groups to process. Balance works only on a mounted
+filesystem.
+
+The balance operation is cancellable by the user. The on-disk state of the
+filesystem is always consistent so an unexpected interruption (eg. system crash,
+reboot) does not corrupt the filesystem. The progress of the balance operation
+is temporarily stored and will be resumed upon mount, unless the mount option
+'skip_balance' is specified.
+
+WARNING: running balance without filters will take a lot of time as it basically
+rewrites the entire filesystem and needs to update all block pointers.
+
+The filters can be used to perform following actions:
+
+- convert block group profiles (filter 'convert')
+- make block group usage more compact (filter 'usage')
+- perform actions only on a given device (filters 'devid', 'drange')
+
+The filters can be applied to a combination of block group types (data,
+metadata, system). Note that changing 'system' needs the force option.
+
+NOTE: the balance operation needs enough work space, ie. space that is
+completely unused in the filesystem, otherwise this may lead to ENOSPC reports.
+See the section 'ENOSPC' for more details.
+
+COMPATIBILITY
+-------------
+
+NOTE: The balance subcommand also exists under the *btrfs filesystem*
+namespace. This still works for backward compatibility but is deprecated and
+should not be used anymore.
+
+NOTE: A short syntax *btrfs balance <path>* works due to backward compatibility
+but is deprecated and should not be used anymore. Use *btrfs balance start*
+command instead.
+
+SUBCOMMAND
+----------
+*cancel* <path>::
+cancel running or paused balance
+
+*pause* <path>::
+pause running balance operation, this will store the state of the balance
+progress and used filters to the filesystem
+
+*resume* <path>::
+resume interrupted balance
+
+*start* [options] <path>::
+start the balance operation according to the specified filters, no filters
+will rewrite the entire filesystem. The process runs in the foreground.
++
+`Options`
++
+-d[<filters>]::::
+act on data block groups, see `FILTERS` section for details about 'filters'
+-m[<filters>]::::
+act on metadata chunks, see `FILTERS` section for details about 'filters'
+-s[<filters>]::::
+act on system chunks (requires '-f'), see `FILTERS` section for details about 'filters'.
+-v::::
+be verbose and print balance filter arguments
+-f::::
+force reducing of metadata integrity, eg. when going from 'raid1' to 'single'
+
+*status* [-v] <path>::
+Show status of running or paused balance.
++
+If '-v' option is given, output will be verbose.
+
+FILTERS
+-------
+From kernel 3.3 onwards, btrfs balance can limit its action to a subset of the
+full filesystem, and can be used to change the replication configuration (e.g.
+moving data from single to RAID1). This functionality is accessed through the
+'-d', '-m' or '-s' options to btrfs balance start, which filter on data,
+metadata and system blocks respectively.
+
+A filter has the following stucture: 'type'[='params'][,'type'=...]
+
+The available types are:
+
+*profiles=<profiles>*::
+Balances only block groups with the given profiles. Parameters
+are a list of profile names separated by "'|'" (pipe).
+
+*usage=<percent>*::
+*usage=<range>*::
+Balances only block groups with usage under the given percentage. The
+value of 0 is allowed and will clean up completely unused block groups, this
+should not require any new work space allocated. You may want to use 'usage=0'
+in case balance is returnin ENOSPC and your filesystem is not too full.
++
+The argument may be a single value or a range. The single value 'N' means 'at
+most N percent used', equivalent to '..N' range syntax. Kernels prior to 4.4
+accept only the single value format.
+The minimum range boundary is inclusive, maximum is exclusive.
+
+*devid=<id>*::
+Balances only block groups which have at least one chunk on the given
+device. To list devices with ids use *btrfs fi show*.
+
+*drange=<range>*::
+Balance only block groups which overlap with the given byte range on any
+device. Use in conjunction with 'devid' to filter on a specific device. The
+parameter is a range specified as 'start..end'.
+
+*vrange=<range>*::
+Balance only block groups which overlap with the given byte range in the
+filesystem's internal virtual address space. This is the address space that
+most reports from btrfs in the kernel log use. The parameter is a range
+specified as 'start..end'.
+
+*convert=<profile>*::
+Convert each selected block group to the given profile name identified by
+parameters.
++
+NOTE: starting with kernel 4.5, the 'data' chunks can be converted to/from the
+'DUP' profile on a single device.
+
+*limit=<number>*::
+*limit=<range>*::
+Process only given number of chunks, after all filters are applied. This can be
+used to specifically target a chunk in connection with other filters ('drange',
+'vrange') or just simply limit the amount of work done by a single balance run.
++
+The argument may be a single value or a range. The single value 'N' means 'at
+most N chunks', equivalent to '..N' range syntax. Kernels prior to 4.4 accept
+only the single value format. The range minimum and maximum are inclusive.
+
+*stripes=<range>*::
+Balance only block groups which have the given number of stripes. The parameter
+is a range specified as 'start..end'. Makes sense fo block group profiles that
+utilize striping, ie. RAID0/10/5/6. The range minimum and maximum are
+inclusive.
+
+*soft*::
+Takes no parameters. Only has meaning when converting between profiles.
+When doing convert from one profile to another and soft mode is on,
+chunks that already have the target profile are left untouched.
+This is useful e.g. when half of the filesystem was converted earlier but got
+cancelled.
++
+The soft mode switch is (like every other filter) per-type.
+For example, this means that we can convert metadata chunks the "hard" way
+while converting data chunks selectively with soft switch.
+
+Profile names, used in 'profiles' and 'convert' are one of: 'raid0', 'raid1',
+'raid10', 'raid5', 'raid6', 'dup', 'single'. The mixed data/metadata profiles
+can be converted in the same way, but it's conversion between mixed and non-mixed
+is not implemented. For the constraints of the profiles please refer to `mkfs.btrfs`(8),
+section 'PROFILES'.
+
+ENOSPC
+------
+
+The way balance operates, it usually needs to temporarily create a new block
+group and move the old data there. For that it needs work space, otherwise
+it fails for ENOSPC reasons.
+This is not the same ENOSPC as if the free space is exhausted. This refers to
+the space on the level of block groups.
+
+The free work space can be calculated from the output of the *btrfs filesystem show*
+command:
+
+------------------------------
+ Label: 'BTRFS' uuid: 8a9d72cd-ead3-469d-b371-9c7203276265
+ Total devices 2 FS bytes used 77.03GiB
+ devid 1 size 53.90GiB used 51.90GiB path /dev/sdc2
+ devid 2 size 53.90GiB used 51.90GiB path /dev/sde1
+------------------------------
+
+'size' - 'used' = 'free work space' +
+'53.90GiB' - '51.90GiB' = '2.00GiB'
+
+An example of a filter that does not require workspace is 'usage=0'. This will
+scan through all unused block groups of a given type and will reclaim the
+space. Ater that it might be possible to run other filters.
+
+**CONVERSIONS ON MULTIPLE DEVICES**
+
+Conversion to profiles based on striping (RAID0, RAID5/6) require the work
+space on each device. An interrupted balance may leave partially filled block
+groups that might consume the work space.
+
+EXIT STATUS
+-----------
+*btrfs balance* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-device`(8)
diff --git a/Documentation/btrfs-check.asciidoc b/Documentation/btrfs-check.asciidoc
new file mode 100644
index 00000000..327a45d6
--- /dev/null
+++ b/Documentation/btrfs-check.asciidoc
@@ -0,0 +1,60 @@
+btrfs-check(8)
+==============
+
+NAME
+----
+btrfs-check - check or repair an unmounted btrfs filesystem
+
+SYNOPSIS
+--------
+*btrfs check* [options] <device>
+
+DESCRIPTION
+-----------
+*btrfs check* is used to check or repair an unmounted btrfs filesystem.
+
+NOTE: Since btrfs is under development, the *btrfs check* capabilities are
+continuously enhanced. It's highly recommended to read the following btrfs
+wiki before executing *btrfs check* with '--repair' option: +
+https://btrfs.wiki.kernel.org/index.php/Btrfsck
+
+*btrfsck* is an alias of *btrfs check* command and is now deprecated.
+
+OPTIONS
+-------
+-s|--super <superblock>::
+use <superblock>th superblock copy, valid values are 0 up to 2 if the
+respective superblock offset is within the filesystem
+--repair::
+try to repair the filesystem
+--init-csum-tree::
+create a new CRC tree and recalculate all checksums
+--init-extent-tree::
+create a new extent tree
+--check-data-csum::
+verify checksums of data blocks
+-p|--progress::
+indicate progress at various checking phases
+--qgroup-report::
+verify qgroup accounting and compare against filesystem accounting
+--subvol-extents <subvolid>::
+show extent state for a subvolume
+--tree-root <bytenr>::
+use the given bytenr for the tree root
+
+EXIT STATUS
+-----------
+*btrfs check* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-scrub`(8),
+`btrfs-rescue`(8)
diff --git a/Documentation/btrfs-convert.asciidoc b/Documentation/btrfs-convert.asciidoc
new file mode 100644
index 00000000..ca3417f4
--- /dev/null
+++ b/Documentation/btrfs-convert.asciidoc
@@ -0,0 +1,98 @@
+btrfs-convert(8)
+================
+
+NAME
+----
+btrfs-convert - convert from ext2/3/4 filesystem to btrfs
+
+SYNOPSIS
+--------
+*btrfs-convert* [options] <device>
+
+DESCRIPTION
+-----------
+*btrfs-convert* is used to convert existing ext2/3/4 filesystem image to a
+btrfs filesystem in-place. The original filesystem image is accessible
+subvolume named 'ext2_saved' as file 'image'.
+
+WARNING: If you are going to perform rollback to ext2/3/4, you should not
+execute *btrfs balance* command on the converted filesystem. This will change
+the extent layout and make *btrfs-convert* unable to rollback.
+
+The conversion utilizes free space of the original filesystem. The exact
+estimate of the required space cannot be foretold. The final btrfs metadata
+might occupy several gigabytes on a hundreds-gigabyte filesystem.
+
+If you decide not to rollback anymore, it is recommended to perform a few more
+steps to transform the btrfs filesystem to a more compact layout. The
+conversion inherits the original data block fragmentation and the metadata
+blocks are bound to the original free space layout.
+
+**REMOVE THE ORIGINAL FILESYSTEM METADATA**
+
+By removing the 'ext2_saved' subvolume, all metadata of the original filesystem
+will be removed:
+
+ # btrfs subvolume delete /mnt/ext2_saved
+
+At this point it's not possible to do rollback. The filesystem is usable but may
+be impacted by the fragmentation.
+
+**MAKE FILE DATA MORE CONTIGUOUS**
+
+An optional but recommended step is to run defragmentation on the entire
+filesystem. This will attempt to make file extents more contiguous.
+
+ # btrfs filesystem defrag -v -r -f -t 32M /mnt/btrfs
+
+Verbose recursive defragmentation ('-v', '-r'), flush data per-file ('-f') with target
+extent size 32M ('-t').
+
+**ATTEMPT TO MAKE BTRFS METADATA MORE COMPACT**
+
+Optional but recommended step.
+
+The metadata block groups after conversion may be smaller than the default size
+(256MiB or 1GiB). Running a balance will attempt to merge the block groups.
+This depends on the free space layout (and fragmentation) and may fail. This is
+a soft error leaving the filesystem usable but the block group layout may
+remain unchanged.
+
+Note that balance operation takes a lot of time.
+
+ # btrfs balance start -m /mnt/btrfs
+
+OPTIONS
+-------
+-d|--no-datasum::
+disable data checksum calculations and set NODATASUM file flag, this can speed
+up the conversion
+-i|--no-xattr::
+ignore xattrs and ACLs of files
+-n|--no-inline::
+disable inlining of small files to metadata blocks, this will decrease the metadata
+consumption and may help to convert a filesystem with low free space
+-N|--nodesize <SIZE>::
+set filesystem nodesize, the tree block size in which btrfs stores its metadata.
+The default value is 16KB (16384) or the page size, whichever is bigger.
+Must be a multiple of the sectorsize, but not larger than 65536. Se
+`mkfs.btrfs`(8) for more details.
+-r|--rollback::
+rollback to the original ext2/3/4 filesystem if possible
+-l|--label <LABEL>::
+set filesystem label during conversion
+-L|--copy-label::
+use label from the converted filesystem
+-p|--progress::
+show progress of conversion, on by default
+--no-progress::
+disable detailed progress and show only the main phases of conversion
+
+EXIT STATUS
+-----------
+*btrfs-convert* will return 0 if no error happened.
+If any problems happened, 1 will be returned.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8)
diff --git a/Documentation/btrfs-debug-tree.asciidoc b/Documentation/btrfs-debug-tree.asciidoc
new file mode 100644
index 00000000..23fc1156
--- /dev/null
+++ b/Documentation/btrfs-debug-tree.asciidoc
@@ -0,0 +1,38 @@
+btrfs-debug-tree(8)
+===================
+
+NAME
+----
+btrfs-debug-tree - dump btrfs filesystem metadata into stdout
+
+SYNOPSIS
+--------
+*btrfs-debug-tree* [options] <device>
+
+DESCRIPTION
+-----------
+*btrfs-debug-tree* is used to dump the whole tree of the given device.
+
+This is maybe useful for analyzing filesystem state or inconsistence and has
+a positive educational effect on understanding the internal structure.
+<device> is the device file where the filesystem is stored.
+
+OPTIONS
+-------
+-e::
+Print detailed extents info.
+-d::
+Print info of btrfs device and root tree dirs only.
+-r::
+Print info of roots only.
+-b <block_num>::
+Print info of the specified block only.
+
+EXIT STATUS
+-----------
+*btrfs-debug-tree* will return 0 if no error happened.
+If any problems happened, 1 will be returned.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8)
diff --git a/Documentation/btrfs-device.asciidoc b/Documentation/btrfs-device.asciidoc
new file mode 100644
index 00000000..2827598a
--- /dev/null
+++ b/Documentation/btrfs-device.asciidoc
@@ -0,0 +1,145 @@
+btrfs-device(8)
+===============
+
+NAME
+----
+btrfs-device - control btrfs devices
+
+SYNOPSIS
+--------
+*btrfs device* <subcommand> <args>
+
+DESCRIPTION
+-----------
+*btrfs device* is used to control the btrfs devices, since btrfs can be used
+across several devices, *btrfs device* is used for multiple device management.
+
+DEVICE MANAGEMENT
+-----------------
+Btrfs filesystem is capable to manage multiple devices.
+
+Btrfs filesystem uses different profiles to manage different RAID level, and
+use balance to rebuild chunks, also devices can be added/removed/replace
+online.
+
+Profile::
+Btrfs filesystem uses data/metadata profiles to manage allocation/duplication
+mechanism. +
+Profiles like RAID level can be assigned to data and metadata separately.
++
+See `mkfs.btrfs`(8) for more details.
+
+RAID level::
+Btrfs filesystem supports most of the standard RAID level: 0/1/5/6/10. +
+RAID levels can be assigned at mkfs time or online.
++
+See `mkfs.btrfs`(8) for mkfs time RAID level assign and `btrfs-balance`(8) for
+online RAID level assign.
++
+NOTE: Since btrfs is under heavy development especially the RAID5/6 support,
+it is *highly* recommended to read the follow btrfs wiki page to get more
+updated details on RAID5/6: +
+https://btrfs.wiki.kernel.org/index.php/RAID56
+
+Balance::
+`btrfs-balance`(8) subcommand can be used to balance or rebuild chunks to the
+desired profile.
++
+Due to the fact that balance can rebuild/recovery chunks according to its RAID
+duplication if possible, so when using RAID1/5/6/10 with some devices failed
+and you just added a new device to btrfs using `btrfs-device`(8), you should
+run `btrfs-balance`(8) to rebuild the chunks.
++
+See `btrfs-balance`(8) for more details.
+
+Device add/remove/replace::
+Device can be added/removed using `btrfs-device`(8) subcommand and replaced
+using `btrfs-replace`(8).
++
+When device is removed or replaced, btrfs will do the chunk rebuild if needed.
++
+See `btrfs-replace`(8) man page for more details on device replace.
+
+SUBCOMMAND
+----------
+*add* [-Kf] <dev> [<dev>...] <path>::
+Add device(s) to the filesystem identified by <path>.
++
+If applicable, a whole device discard (TRIM) operation is performed.
++
+`Options`
++
+-K|--nodiscard::::
+do not perform discard by default
+-f|--force::::
+force overwrite of existing filesystem on the given disk(s)
+
+*remove* <dev> [<dev>...] <path>::
+Remove device(s) from a filesystem identified by <path>.
+
+*delete* <dev> [<dev>...] <path>::
+Alias of remove kept for backwards compatability
+
+*ready* <device>::
+Check device to see if it has all of it's devices in cache for mounting.
+
+*scan* [(--all-devices|-d)|<device> [<device>...]]::
+Scan devices for a btrfs filesystem.
++
+If one or more devices are passed, these are scanned for a btrfs filesystem.
+If no devices are passed, btrfs uses block devices containing btrfs
+filesystem as listed by blkid.
+Finally, if '--all-devices' or '-d' is passed, all the devices under /dev are
+scanned.
+
+*stats* [-z] <path>|<device>::
+Read and print the device IO stats for all mounted devices of the filesystem
+identified by <path> or for a single <device>.
++
+`Options`
++
+-z::::
+Reset stats to zero after reading them.
+
+*usage* [options] <path> [<path>...]::
+Show detailed information about internal allocations in devices.
++
+`Options`
++
+-b|--raw::::
+raw numbers in bytes, without the 'B' suffix
+-h|--human-readable::::
+print human friendly numbers, base 1024, this is the default
+-H::::
+print human friendly numbers, base 1000
+--iec::::
+select the 1024 base for the following options, according to the IEC standard
+--si::::
+select the 1000 base for the following options, according to the SI standard
+-k|--kbytes::::
+show sizes in KiB, or kB with --si
+-m|--mbytes::::
+show sizes in MiB, or MB with --si
+-g|--gbytes::::
+show sizes in GiB, or GB with --si
+-t|--tbytes::::
+show sizes in TiB, or TB with --si
+
+If conflicting options are passed, the last one takes precedence.
+
+EXIT STATUS
+-----------
+*btrfs device* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-replace`(8),
+`btrfs-balance`(8)
diff --git a/Documentation/btrfs-filesystem.asciidoc b/Documentation/btrfs-filesystem.asciidoc
new file mode 100644
index 00000000..26126175
--- /dev/null
+++ b/Documentation/btrfs-filesystem.asciidoc
@@ -0,0 +1,338 @@
+btrfs-filesystem(8)
+===================
+
+NAME
+----
+btrfs-filesystem - command group of btrfs that usually work on the whole filesystem
+
+SYNOPSIS
+--------
+*btrfs filesystem* <subcommand> <args>
+
+DESCRIPTION
+-----------
+*btrfs filesystem* is used to do the whole filesystem level tasks, including
+all the regular filesystem operations like resizing, space stats, label
+setting/getting, and defragmentation.
+
+SUBCOMMAND
+----------
+*df* [options] <path>::
+Show a terse summary information about allocation of block group types of a given
+mount point. The original purpose of this command was a debugging helper. The
+output needs to be further interpreted and is not suitable for quick overview.
++
+--
+An example with description:
+
+* device size: '1.9TiB', one device, no RAID
+* filesystem size: '1.9TiB'
+* created with: 'mkfs.btrfs -d single -m single'
+--
++
+------------------------------
+$ btrfs filesystem df /path
+Data, single: total=1.15TiB, used=1.13TiB
+System, single: total=32.00MiB, used=144.00KiB
+Metadata, single: total=12.00GiB, used=6.45GiB
+GlobalReserve, single: total=512.00MiB, used=0.00B
+------------------------------
++
+--
+* 'Data', 'System' and 'Metadata' are separeate block group types.
+'GlobalReserve' is an artificial and internal emergency space, see below.
+* 'single' -- the allocation profile, defined at mkfs time
+* 'total' -- sum of space reserved for
+all allocation profiles of the given type, ie. all Data/single. Note that it's
+not total size of filesystem.
+* 'used' -- sum of used space of the above, ie. file extents, metadata blocks
+--
++
+'GlobalReserve' is an artificial and internal emergency space. It is used eg.
+when the filesystem is full. Its 'total' size is dynamic based on the
+filesystem size, usually not larger than 512MiB, 'used' may fluctuate.
++
+The global block reserve is accounted within Metadata. In case the filesystem
+metadata are exhausted, 'GlobalReserve/total + Metadata/used = Metadata/total'.
++
+`Options`
++
+-b|--raw::::
+raw numbers in bytes, without the 'B' suffix
+-h|--human-readable::::
+print human friendly numbers, base 1024, this is the default
+-H::::
+print human friendly numbers, base 1000
+--iec::::
+select the 1024 base for the following options, according to the IEC standard
+--si::::
+select the 1000 base for the following options, according to the SI standard
+-k|--kbytes::::
+show sizes in KiB, or kB with --si
+-m|--mbytes::::
+show sizes in MiB, or MB with --si
+-g|--gbytes::::
+show sizes in GiB, or GB with --si
+-t|--tbytes::::
+show sizes in TiB, or TB with --si
++
+If conflicting options are passed, the last one takes precedence.
+
+*defragment* [options] <file>|<dir> [<file>|<dir>...]::
+Defragment file data on a mounted filesytem.
++
+If '-r' is passed, files in dir will be defragmented recursively.
+The start position and the number of bytes to defragment can be specified by
+start and len using '-s' and '-l' options below.
+Extents bigger than value given by '-t' will be skipped, otherwise this value
+is used as a target extent size, but is only advisory and may not be reached
+if the free space is too fragmented.
+Use 0 to take the kernel default, which is 256kB but may change in the future.
+You can also turn on compression in defragment operations.
++
+WARNING: Defragmenting with Linux kernel versions < 3.9 or ≥ 3.14-rc2 as well as
+with Linux stable kernel versions ≥ 3.10.31, ≥ 3.12.12 or ≥ 3.13.4 will break up
+the ref-links of COW data (for example files copied with `cp --reflink`,
+snapshots or de-duplicated data).
+This may cause considerable increase of space usage depending on the broken up
+ref-links.
++
+`Options`
++
+-v::::
+be verbose, print file names as they're submitted for defragmentation
+-c[<algo>]::::
+compress file contents while defragmenting. Optional argument selects the compression
+algorithm, 'zlib' (default) or 'lzo'. Currently it's not possible to select no
+compression. See also section 'EXAMPLES'.
+-r::::
+defragment files recursively in given directories
+-f::::
+flush data for each file before going to the next file. This will limit the amount
+of dirty data to current file, otherwise the amount cumulates from several files
+and may increase system load.
+-s <start>[kKmMgGtTpPeE]::::
+defragmentation will start from the given offset, default is beginning of a file
+-l <len>[kKmMgGtTpPeE]::::
+defragment only up to 'len' bytes, default is the file size
+-t <size>[kKmMgGtTpPeE]::::
+target extent size, do not touch extents bigger than 'size'
++
+For 'start', 'len', 'size' it is possible to append
+units designator: \'K', \'M', \'G', \'T', \'P', or \'E', which represent
+KiB, MiB, GiB, TiB, PiB, or EiB, respectively (case does not matter).
++
+NOTE: Directory arguments without '-r' do not defragment files recursively but will
+defragment certain internal trees (extent tree and the subvolume tree). This has been
+confusing and could be removed in the future.
+
+*label* [<dev>|<mountpoint>] [<newlabel>]::
+Show or update the label of a filesystem. This works on a mounted filesystem or
+a filesystem image.
++
+The 'newlabel' argument is optional. Current label is printed if the the argument
+is omitted.
++
+NOTE: the maximum allowable length shall be less than 256 chars and must not contain
+a newline. The trailing newline is stripped automatically.
+
+// Some wording are extracted by the resize2fs man page
+*resize* [<devid>:][+/-]<size>[kKmMgGtTpPeE]|[<devid>:]max <path>::
+Resize a mounted filesystem identified by 'path'. A particular device
+can be resized by specifying a 'devid'.
++
+WARNING: If 'path' is a file containing a BTRFS image then resize does not work
+as expected and does not resize the image. This would resize the underlying
+filesystem instead.
++
+The 'devid' can be found in the output of *btrfs filesystem show* and
+defaults to 1 if not specified.
+The 'size' parameter specifies the new size of the filesystem.
+If the prefix '+' or '-' is present the size is increased or decreased
+by the quantity 'size'.
+If no units are specified, bytes are assumed for 'size'.
+Optionally, the size parameter may be suffixed by one of the following
+units designators: \'K', \'M', \'G', \'T', \'P', or \'E', which represent
+KiB, MiB, GiB, TiB, PiB, or EiB, respectively (case does not matter).
++
+If 'max' is passed, the filesystem will occupy all available space on the
+device respecting 'devid' (remember, devid 1 by default).
++
+The resize command does not manipulate the size of underlying
+partition. If you wish to enlarge/reduce a filesystem, you must make sure you
+can expand the partition before enlarging the filesystem and shrink the
+partition after reducing the size of the filesystem. This can done using
+`fdisk`(8) or `parted`(8) to delete the existing partition and recreate
+it with the new desired size. When recreating the partition make sure to use
+the same starting partition offset as before.
++
+Growing is usually instant as it only updates the size. However, shrinking could
+take a long time if there are data in the device area that's beyond the new
+end. Relocation of the data takes time.
++
+See also section 'EXAMPLES'.
+
+*show* [options] [<path>|<uuid>|<device>|<label>]::
+Show the btrfs filesystem with some additional info about devices and space
+allocation.
++
+If no option none of 'path'/'uuid'/'device'/'label' is passed, information
+about all the BTRFS filesystems is shown, both mounted and unmounted.
++
+`Options`
++
+-m|--mounted::::
+probe kernel for mounted BTRFS filesystems
+-d|--all-devices::::
+scan all devices under /dev, otherwise the devices list is extracted from the
+/proc/partitions file. This is a fallback option if there's no device node
+manager (like udev) available in the system.
+--raw::::
+raw numbers in bytes, without the 'B' suffix
+--human-readable::::
+print human friendly numbers, base 1024, this is the default
+--iec::::
+select the 1024 base for the following options, according to the IEC standard
+--si::::
+select the 1000 base for the following options, according to the SI standard
+--kbytes::::
+show sizes in KiB, or kB with --si
+--mbytes::::
+show sizes in MiB, or MB with --si
+--gbytes::::
+show sizes in GiB, or GB with --si
+--tbytes::::
+show sizes in TiB, or TB with --si
+
+*sync* <path>::
+Force a sync of the filesystem at 'path'. This is done via a special ioctl and
+will also trigger cleaning of deleted subvolumes. Besides that it's equivalent
+to the `sync`(1) command.
+
+*usage* [options] <path> [<path>...]::
+Show detailed information about internal filesystem usage. This is supposed to
+replace the *btrfs filesystem df* command in the long run.
++
+The level of detail can differ if the command is run under a regular or the
+root user (due to use of restricted ioctl). For both there's a summary section
+with information about space usage:
++
+-------------------------
+$ btrfs fi usage /path
+WARNING: cannot read detailed chunk info, RAID5/6 numbers will be incorrect, run as root
+Overall:
+ Device size: 1.82TiB
+ Device allocated: 1.17TiB
+ Device unallocated: 669.99GiB
+ Device missing: 0.00B
+ Used: 1.14TiB
+ Free (estimated): 692.57GiB (min: 692.57GiB)
+ Data ratio: 1.00
+ Metadata ratio: 1.00
+ Global reserve: 512.00MiB (used: 0.00B)
+-------------------------
++
+The root user will also see stats broken down by block group types:
++
+-------------------------
+Data,single: Size:1.15TiB, Used:1.13TiB
+ /dev/sdb 1.15TiB
+
+Metadata,single: Size:12.00GiB, Used:6.45GiB
+ /dev/sdb 12.00GiB
+
+System,single: Size:32.00MiB, Used:144.00KiB
+ /dev/sdb 32.00MiB
+
+Unallocated:
+ /dev/sdb 669.99GiB
+-------------------------
++
+`Options`
++
+-b|--raw::::
+raw numbers in bytes, without the 'B' suffix
+-h|--human-readable::::
+print human friendly numbers, base 1024, this is the default
+-H::::
+print human friendly numbers, base 1000
+--iec::::
+select the 1024 base for the following options, according to the IEC standard
+--si::::
+select the 1000 base for the following options, according to the SI standard
+-k|--kbytes::::
+show sizes in KiB, or kB with --si
+-m|--mbytes::::
+show sizes in MiB, or MB with --si
+-g|--gbytes::::
+show sizes in GiB, or GB with --si
+-t|--tbytes::::
+show sizes in TiB, or TB with --si
+-T::::
+show data in tabular format
++
+If conflicting options are passed, the last one takes precedence.
+
+EXAMPLES
+--------
+
+*$ btrfs filesystem defrag -v -r dir/*
+
+Recursively defragment files under 'dir/', print files as they are processed.
+The file names will be printed in batches, similarly the amount of data triggered
+by defragmentation will be proportional to last N printed files. The system dirty
+memory throttling will slow down the defragmentation but there can still be a lot
+of IO load and the system may stall for a moment.
+
+*$ btrfs filesystem defrag -v -r -f dir/*
+
+Recusively defragment files under 'dir/', be verbose and wait until all blocks
+are flushed before processing next file. You can note slower progress of the
+output and lower IO load (proportional to currently defragmented file).
+
+*$ btrfs filesystem defrag -v -r -f -clzo dir/*
+
+Recusively defragment files under 'dir/', be verbose, wait until all blocks are
+flushed and force file compression.
+
+*$ btrfs filesystem defrag -v -r -t 64M dir/*
+
+Recusively defragment files under 'dir/', be verbose and try to merge extents
+to be about 64MiB. As stated above, the success rate depends on actual free
+space fragmentation and the final result is not guaranteed to meet the target
+even if run repeatedly.
+
+*$ btrfs filesystem resize -1G /path*
+
+*$ btrfs filesystem resize 1:-1G /path*
+
+Shrink size of the filesystem's device id 1 by 1GiB. The first syntax expects a
+device with id 1 to exist, otherwise fails. The second is equivalent and more
+explicit. For a single-device filesystem it's typically not necessary to
+specify the devid though.
+
+*$ btrfs filesystem resize max /path*
+
+*$ btrfs filesystem resize 1:max /path*
+
+Let's assume that devid 1 exists, the filesystem does not occupy the whole block
+device, eg. it has been enlarged and we wan the grow the filesystem. Simply using
+'max' as size we will achieve that.
+
+NOTE: There are two ways to minimize the filesystem on a given device. The
+*btrfs inspect-internal min-dev-size* command, or iteratively shrink in steps.
+
+EXIT STATUS
+-----------
+*btrfs filesystem* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
diff --git a/Documentation/btrfs-find-root.asciidoc b/Documentation/btrfs-find-root.asciidoc
new file mode 100644
index 00000000..e04cd3e8
--- /dev/null
+++ b/Documentation/btrfs-find-root.asciidoc
@@ -0,0 +1,35 @@
+btrfs-find-root(8)
+==================
+
+NAME
+----
+btrfs-find-root - filter to find btrfs root
+
+SYNOPSIS
+--------
+*btrfs-find-root* [options] <dev>
+
+DESCRIPTION
+-----------
+*btrfs-find-root* is used to find the satisfied root, you can filter by
+root tree's objectid, generation, level.
+
+OPTIONS
+-------
+-a::
+Search through all the metadata extents, even the root is already found.
+-g <generation>::
+Filter root tree by it's original transaction id, tree root's generation in default.
+-o <objectid>::
+Filter root tree by it's objectid,tree root's objectid in default.
+-l <level>::
+Filter root tree by B-+ tree's level, level 0 in default.
+
+EXIT STATUS
+-----------
+*btrfs-find-root* will return 0 if no error happened.
+If any problems happened, 1 will be returned.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8)
diff --git a/Documentation/btrfs-image.asciidoc b/Documentation/btrfs-image.asciidoc
new file mode 100644
index 00000000..38265171
--- /dev/null
+++ b/Documentation/btrfs-image.asciidoc
@@ -0,0 +1,66 @@
+btrfs-image(8)
+==============
+
+NAME
+----
+btrfs-image - create/restore an image of the filesystem
+
+SYNOPSIS
+--------
+*btrfs-image* [options] <source> <target>
+
+DESCRIPTION
+-----------
+*btrfs-image* is used to create an image of a btrfs filesystem.
+All data will be zeroed, but metadata and the like is preserved.
+Mainly used for debugging purposes.
+
+In the dump mode, source is the btrfs device/file and target is the output
+file (use '-' for stdout).
+
+In the restore mode (option -r), source is the dumped image and target is the btrfs device/file.
+
+
+OPTIONS
+-------
+-r::
+Restore metadump image. By default, this fixes super's chunk tree, by
+using 1 stripe pointing to primary device, so that file system can be
+restored by running tree log reply if possible. To restore without
+changing number of stripes in chunk tree check -o option.
+
+-c <value>::
+Compression level (0 ~ 9).
+
+-t <value>::
+Number of threads (1 ~ 32) to be used to process the image dump or restore.
+
+-o::
+Use the old restore method, this does not fixup the chunk tree so the restored
+file system will not be able to be mounted.
+
+-s::
+Sanitize the file names when generating the image. One -s means just
+generate random garbage, which means that the directory indexes won't match up
+since the hashes won't match with the garbage filenames. Using -ss will
+calculate a collision for the filename so that the hashes match, and if it
+can't calculate a collision then it will just generate garbage. The collision
+calculator is very time and CPU intensive so only use it if you are having
+problems with your file system tree and need to have it mostly working.
+
+-w::
+Walk all the trees manually and copy any blocks that are referenced. Use this
+option if your extent tree is corrupted to make sure that all of the metadata is
+captured.
+
+-m::
+Restore for multiple devices, more than 1 device should be provided.
+
+EXIT STATUS
+-----------
+*btrfs-image* will return 0 if no error happened.
+If any problems happened, 1 will be returned.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8)
diff --git a/Documentation/btrfs-inspect-internal.asciidoc b/Documentation/btrfs-inspect-internal.asciidoc
new file mode 100644
index 00000000..1c7c3611
--- /dev/null
+++ b/Documentation/btrfs-inspect-internal.asciidoc
@@ -0,0 +1,84 @@
+btrfs-inspect-internal(8)
+=========================
+
+NAME
+----
+btrfs-inspect-internal - query various internal information
+
+SYNOPSIS
+--------
+*btrfs inspect-internal* <subcommand> <args>
+
+DESCRIPTION
+-----------
+
+This command group provides an interface to query internal information. The
+functionality ranges from a simple UI to an ioctl or a more complex query that
+assembles the result from several internal structures. The latter usually
+requires calls to privileged ioctls.
+
+SUBCOMMAND
+----------
+*inode-resolve* [-v] <ino> <path>::
+(needs root privileges)
++
+resolve paths to all files with given inode number 'ino' in a given subvolume
+at 'path', ie. all hardlinks
++
+`Options`
++
+-v::::
+verbose mode, print count of returned paths and ioctl() return value
+
+*logical-resolve* [-Pv] [-s <bufsize>] <logical> <path>::
+(needs root privileges)
++
+resolve paths to all files at given 'logical' address in the linear filesystem space
++
+`Options`
++
+-P::::
+skip the path resolving and print the inodes instead
+-v::::
+verbose mode, print count of returned paths and all ioctl() return values
+-s <bufsize>::::
+set internal buffer for storing the file names to 'bufsize', default is 4096, maximum 64k
+
+*min-dev-size* [options] <path>::
+(needs root privileges)
++
+return the minimum size the device can be shrunk to, without performing any
+resize operation, this may be useful before executing the actual resize operation
++
+`Options`
++
+--id <id>::::
+specify the device 'id' to query, default is 1 if this option is not used
+
+*rootid* <path>::
+for a given file or directory, return the containing tree root id, for a
+subvolume itself return it's own tree id (ie. subvol id)
++
+NOTE: The result is undefined for the so-called empty subvolumes (identified by
+inode number 2), but such subvolume does not contain any files anyway
+
+*subvolid-resolve* <subvolid> <path>::
+(needs root privileges)
++
+resolve the absolute path of a the subvolume id 'subvolid'
+
+EXIT STATUS
+-----------
+*btrfs inspect-internal* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-debug-tree`(8)
diff --git a/Documentation/btrfs-man5.asciidoc b/Documentation/btrfs-man5.asciidoc
new file mode 100644
index 00000000..d4323917
--- /dev/null
+++ b/Documentation/btrfs-man5.asciidoc
@@ -0,0 +1,406 @@
+btrfs-man5(5)
+==============
+
+NAME
+----
+btrfs-man5 - topics about the BTRFS filesystem (mount options, supported file attributes and other)
+
+DESCRIPTION
+-----------
+This document describes topics related to BTRFS that are not specific to the
+tools. Currently covers:
+
+1. mount options
+
+2. file attributes
+
+MOUNT OPTIONS
+-------------
+
+This section describes mount options specific to BTRFS. For the generic mount
+options please refer to `mount`(8) manpage. The options are sorted alphabetically
+(discarding the 'no' prefix).
+
+*acl*::
+*noacl*::
+(default: on)
++
+Enable/disable support for Posix Access Control Lists (ACLs). See the
+`acl`(5) manual page for more information about ACLs.
+
+*alloc_start='bytes'*::
+(default: 1M, minimum: 1M)
++
+Debugging option to force all block allocations above a certain
+byte threshold on each block device. The value is specified in
+bytes, optionally with a K, M, or G suffix (case insensitive).
++
+This option was used for testing and has not practial use, it's slated to be
+removed in the future.
+
+*autodefrag*::
+*noautodefrag*::
+(since: 3.0, default: off)
++
+Enable automatic file defragmentation.
+When enabled, small random writes into files (in a range of tens of kilobytes,
+currently it's 64K) are detected and queued up for the defragmentation process.
+Not well suited for large database workloads.
++
+The read latency may increase due to reading the adjacent blocks that make up the
+range for defragmentation, successive write will merge the blocks in the new
+location.
++
+WARNING: Defragmenting with Linux kernel versions < 3.9 or ≥ 3.14-rc2 as
+well as with Linux stable kernel versions ≥ 3.10.31, ≥ 3.12.12 or
+≥ 3.13.4 will break up the ref-links of CoW data (for example files
+copied with `cp --reflink`, snapshots or de-duplicated data).
+This may cause considerable increase of space usage depending on the
+broken up ref-links.
+
+*barrier*::
+*nobarrier*::
+(default: on)
++
+Ensure that all IO write operations make it through the device cache and are stored
+permanently when the filesystem is at it's consistency checkpoint. This
+typically means that a flush command is sent to the device that will
+synchronize all pending data and ordinary metadata blocks, then writes the
+superblock and issues another flush.
++
+The write flushes incur a slight hit and also prevent the IO block
+scheduler to reorder requests in more effective way. Disabling barriers gets
+rid of that penalty but will most certainly lead to a corrupted filesystem in
+case of a crash or power loss. The ordinary metadata blocks could be yet
+unwrittent at the time the new superblock is stored permanently, expecting that
+the block pointers to metadata were stored permanently before.
++
+On a device with a volatile battery-backed write-back cache, the 'nobarrier'
+option will not lead to filesystem corruption as the pending blocks are
+supposed to make it to the permanent storage.
+
+*check_int*::
+*check_int_data*::
+*check_int_print_mask='value'*::
+(since: 3.0, default: off)
++
+These debugging options control the behavior of the integrity checking
+module (the BTRFS_FS_CHECK_INTEGRITY config option required). +
++
+`check_int` enables the integrity checker module, which examines all
+block write requests to ensure on-disk consistency, at a large
+memory and CPU cost. +
++
+`check_int_data` includes extent data in the integrity checks, and
+implies the check_int option. +
++
+`check_int_print_mask` takes a bitmask of BTRFSIC_PRINT_MASK_* values
+as defined in 'fs/btrfs/check-integrity.c', to control the integrity
+checker module behavior. +
++
+See comments at the top of 'fs/btrfs/check-integrity.c'
+for more info.
+
+*clear_cache*::
+Force clearing and rebuilding of the disk space cache if something
+has gone wrong. See also: 'space_cache'.
+
+*commit='seconds'*::
+(since: 3.12, default: 30)
++
+Set the interval of periodic commit. Higher
+values defer data being synced to permanent storage with obvious
+consequences when the system crashes. The upper bound is not forced,
+but a warning is printed if it's more than 300 seconds (5 minutes).
+
+*compress*::
+*compress='type'*::
+*compress-force*::
+*compress-force='type'*::
+(default: off)
++
+Control BTRFS file data compression. Type may be specified as 'zlib',
+'lzo' or 'no' (for no compression, used for remounting). If no type
+is specified, 'zlib' is used. If compress-force is specified,
+all files will be compressed, whether or not they compress well.
++
+NOTE: If compression is enabled, 'nodatacow' and 'nodatasum' are disabled.
+
+*datacow*::
+*nodatacow*::
+(default: on)
++
+Enable data copy-on-write for newly created files.
+'Nodatacow' implies 'nodatasum', and disables 'compression'. All files created
+under 'nodatacow' are also set the NOCOW file attribute (see `chattr`(1)).
+
+*datasum*::
+*nodatasum*::
+(default: on)
++
+Enable data checksumming for newly created files.
+'Datasum' implies 'datacow', ie. the normal mode of operation. All files created
+under 'nodatasum' inherit the "no checksums" property, however there's no
+corresponding file attribute (see `chattr`(1)).
+
+*degraded*::
+(default: off)
++
+Allow mounts with less devices than the raid profile constraints
+require. A read-write mount (or remount) may fail with too many devices
+missing, for example if a stripe member is completely missing from RAID0.
+
+*device='devicepath'*::
+Specify a path to a device that will be scanned for BTRFS filesystem during
+mount. This is usually done automatically by a device manager (like udev) or
+using the *btrfs device scan* command (eg. run from the initial ramdisk). In
+cases where this is not possible the 'device' mount option can help.
++
+NOTE: booting eg. a RAID1 system may fail even if all filesystem's 'device'
+paths are provided as the actual device nodes may not be discovered by the
+system at that point.
+
+*discard*::
+*nodiscard*::
+(default: off)
++
+Enable discarding of freed file blocks using TRIM operation. This is useful
+for SSD devices, thinly provisioned LUNs or virtual machine images where the
+backing device understands the operation. Depending on support of the
+underlying device, the operation may severly hurt performance in case the TRIM
+operation is synchronous (eg. with SATA devices up to revision 3.0).
++
+If discarding is not necessary to be done at the block freeing time, there's
+*fstrim* tool that lets the filesystem discard all free blocks in a batch,
+possibly not much interfering with other operations.
+
+*enospc_debug*::
+*noenospc_debug*::
+(default: off)
++
+Enable verbose output for some ENOSPC conditions. It's safe to use but can
+be noisy if the system hits reaches near-full state.
+
+*fatal_errors='action'*::
+(since: 3.4, default: bug)
++
+Action to take when encountering a fatal error.
++
+*bug*::::
+'BUG()' on a fatal error, the system will stay in the crashed state and may be
+still partially usable, but reboot is required for full operation
++
+*panic*::::
+'panic()' on a fatal error, depending on other system configuration, this may
+be followed by a reboot. Please refer to the documentation of kernel boot
+parameters, eg. 'panic', 'oops' or 'crashkernel'.
+
+*flushoncommit*::
+*noflushoncommit*::
+(default: on)
++
+This option forces any data dirtied by a write in a prior transaction to commit
+as part of the current commit. This makes the committed state a fully
+consistent view of the file system from the application's perspective (i.e., it
+includes all completed file system operations). This was previously the
+behavior only when a snapshot was created.
++
+Disabling flushing may improve performance but is not crash-safe.
+
+*fragment='type'*::
+(depends on compile-time option BTRFS_DEBUG, since: 4.4, default: off)
++
+A debugging helper to intentionally fragment given 'type' of block groups. The
+type can be 'data', 'metadata' or 'all'. This mount option should not be used
+outside of debugging environments and is not recognized if the kernel config
+option 'BTRFS_DEBUG' is not enabled.
+
+*inode_cache*::
+*noinode_cache*::
+(since: 3.0, default: off)
++
+Enable free inode number caching. Not recommended to use unless files on your
+filesystem get assigned inode numbers that are approaching 2^64^. Normally, new
+files in each subvolume get assigned incrementally (plus one from the last
+time) and are not reused. The mount option turns on caching of the existing
+inode numbers and reuse of inode numbers of deleted files.
++
+This option may slow down your system at first run, or after mounting without
+the option.
++
+NOTE: Defaults to off due to a potential overflow problem when the free space
+checksums don't fit inside a single page.
+
+*max_inline='bytes'*::
+(default: min(8192, page size) )
++
+Specify the maximum amount of space, in bytes, that can be inlined in
+a metadata B-tree leaf. The value is specified in bytes, optionally
+with a K suffix (case insensitive). In practice, this value
+is limited by the filesystem block size (named 'sectorsize' at mkfs time),
+and memory page size of the system. In case of sectorsize limit, there's
+some space unavailable due to leaf headers. For example, a 4k sectorsize, max
+inline data is ~3900 bytes.
++
+Inlining can be completely turned off specifying 0. This will increase data
+block slack if file sizes are much smaller than block size but will reduce
+metadata consumption in return.
+
+*metadata_ratio='value'*::
+(default: 0, internal logic)
++
+Specifies that 1 metadata chunk should be allocated after every 'value' data
+chunks. Default behaviour depends on internal logic, some percent of unused
+metadata space is attempted to be maintained but is not always possible if
+there's not space left for chunk allocation. The option could be useful to
+override the internal logic in favor of the metadata allocation if the expected
+workload is supposed to be metadata intense (snapshots, reflinks, xattrs,
+inlined files).
+
+*recovery*::
+(since: 3.2, default: off)
++
+Enable autorecovery attempts if a bad tree root is found at mount time.
+Currently this scans a backup list of several previous tree roots and tries to
+use the first readable. This can be used with read-only mounts as well.
+
+*rescan_uuid_tree*::
+(since: 3.12, default: off)
++
+Force check and rebuild procedure of the UUID tree. This should not
+normally be needed.
+
+*skip_balance*::
+(since: 3.3, default: off)
++
+Skip automatic resume of interrupted balance operation after mount.
+May be resumed with *btrfs balance resume* or the paused state can be removed
+by *btrfs balance cancel*.
+
+*space_cache*::
+*nospace_cache*::
+('nospace_cache' since: 3.2, default: on)
++
+Disable freespace cache loading without clearing the cache and the free space
+cache will not be used during the mount. This affects performance as searching
+for new free blocks could take longer. On the other hand, managing the space
+cache consumes some resources.
+
+*ssd*::
+*nossd*::
+*ssd_spread*::
+(default: SSD autodetected)
++
+Options to control SSD allocation schemes. By default, BTRFS will
+enable or disable SSD allocation heuristics depending on whether a
+rotational or nonrotational disk is in use. The 'ssd' and 'nossd' options
+can override this autodetection.
++
+The 'ssd_spread' mount option attempts to allocate into bigger and aligned
+chunks of unused space, and may perform better on low-end SSDs. 'ssd_spread'
+implies 'ssd', enabling all other SSD heuristics as well.
+
+*subvol='path'*::
+Mount subvolume from 'path' rather than the toplevel subvolume. The
+'path' is absolute (ie. starts at the toplevel subvolume).
+This mount option overrides the default subvolume set for the given filesystem.
+
+*subvolid='subvolid'*::
+Mount subvolume specified by a 'subvolid' number rather than the toplevel
+subvolume. You can use *btrfs subvolume list* to see subvolume ID numbers.
+This mount option overrides the default subvolume set for the given filesystem.
+
+*subvolrootid='objectid'*::
+(irrelevant since: 3.2, formally deprecated since: 3.10)
++
+A workaround option from times (pre 3.2) when it was not possible to mount a
+subvolume that did not reside directly under the toplevel subvolume.
+
+*thread_pool='number'*::
+(default: min(NRCPUS + 2, 8) )
++
+The number of worker threads to allocate. NRCPUS is number of on-line CPUs
+detected at the time of mount. Small number leads to less parallelism in
+processing data and metadata, higher numbers could lead to a performance due to
+increased locking contention, cache-line bouncing or costly data transfers
+between local CPU memories.
+
+*treelog*::
+*notreelog*::
+(default: on)
++
+Enable the tree logging used for 'fsync' and 'O_SYNC' writes. The tree log
+stores changes without the need of a full filesystem sync. The log operations
+are flushed at sync and transaction commit. If the system crashes between two
+such syncs, the pending tree log operations are replayed during mount.
++
+WARNING: currently, the tree log is replayed even with a read-only mount!
++
+The tree log could contain new files/directories, these would not exist on
+a mounted filesystm if the log is not replayed.
+
+*user_subvol_rm_allowed*::
+(default: off)
++
+Allow subvolumes to be deleted by their respective owner. Otherwise, only the
+root user can do that.
+
+FILE ATTRIBUTES
+---------------
+The btrfs filesystem supports setting the following file attributes using the
+`chattr`(1) utility:
+
+*a*::
+'append only', new writes are always written at the end of the file
+
+*A*::
+'no atime updates'
+
+*c*::
+'compress data', all data written after this attribute is set will be compressed.
+Please note that compression is also affected by the mount options or the parent
+directory attributes.
++
+When set on a directory, all newly created files will inherit this attribute.
+
+*C*::
+'no copy-on-write', file modifications are done in-place
++
+When set on a directory, all newly created files will inherit this attribute.
++
+NOTE: due to implementation limitations, this flag can be set/unset only on
+empty files.
+
+*d*::
+'no dump', makes sense with 3rd party tools like `dump`(8), on BTRFS the
+attribute can be set/unset on no other special handling is done
+
+*D*::
+'synchronous directory updates', for more details search `open`(2) for 'O_SYNC'
+and 'O_DSYNC'
+
+*i*::
+'immutable', no file data and metadata changes allowed even to the root user as
+long as this attribute is set (obviously the exception is unsetting the attribute)
+
+*S*::
+'synchronous updates', for more details search `open`(2) for 'O_SYNC' and
+'O_DSYNC'
+
+*X*::
+'no compression', permanently turn off compression on the given file, other
+compression mount options will not affect that
++
+When set on a directory, all newly created files will inherit this attribute.
+
+No other attributes are supported. For the complete list please refer to the
+`chattr`(1) manual page.
+
+SEE ALSO
+--------
+`acl`(5),
+`btrfs`(8),
+`chattr`(1),
+`fstrim`(8),
+`mkfs.btrfs`(8),
+`mount`(8)
diff --git a/Documentation/btrfs-map-logical.asciidoc b/Documentation/btrfs-map-logical.asciidoc
new file mode 100644
index 00000000..a3d110cb
--- /dev/null
+++ b/Documentation/btrfs-map-logical.asciidoc
@@ -0,0 +1,37 @@
+btrfs-map-logical(8)
+====================
+
+NAME
+----
+btrfs-map-logical - map btrfs logical extent to physical extent
+
+SYNOPSIS
+--------
+*btrfs-map-logical* <options> <device>
+
+DESCRIPTION
+-----------
+*btrfs-map-logical* can be used to find out what the physical offsets are
+on the mirrors, the result is dumped into stdout in default.
+
+Mainly used for debug purpose.
+
+OPTIONS
+-------
+-l|--logical <logical_num>::
+Logical extent to map.
+-c|--copy <copy>::
+Copy of the extent to read(usually 1 or 2).
+-o|--output <filename>::
+Output file to hold the extent.
+-b|--bytes <bytes>::
+Number of bytes to read.
+
+EXIT STATUS
+-----------
+*btrfs-map-logical* will return 0 if no error happened.
+If any problems happened, 1 will be returned.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8)
diff --git a/Documentation/btrfs-property.asciidoc b/Documentation/btrfs-property.asciidoc
new file mode 100644
index 00000000..8b9b7f03
--- /dev/null
+++ b/Documentation/btrfs-property.asciidoc
@@ -0,0 +1,73 @@
+btrfs-property(8)
+=================
+
+NAME
+----
+btrfs-property - get/set/list properties for given btrfs object.
+
+SYNOPSIS
+--------
+*btrfs property* <subcommand> <args>
+
+DESCRIPTION
+-----------
+*btrfs property* is used to get/set/list property for given btrfs object.
+See the description of *get* subcommand for more information about
+both btrfs object and property.
+
+*btrfs property* provides an unified and user-friendly method to tune different
+btrfs properties instead of using the traditional method like `chattr`(1) or
+`lsattr`(1).
+
+SUBCOMMAND
+----------
+*get* [-t <type>] <object> [<name>]::
+Gets a property from a btrfs object.
++
+A btrfs object, which is set by <object>, can be a btrfs filesystem
+itself, a btrfs subvolume, an inode(file or directory) inside btrfs,
+or a device on which a btrfs exists.
++
+The '-t <type>' option can be used to explicitly
+specify what type of object you meant. This is only needed when a
+property could be set for more then one object type.
++
+Possible types are 's[ubvol]', 'f[ilesystem]', 'i[node]' and 'd[evice]'.
++
+Set the name of property by '<name>'. If no '<name>' is specified,
+all properties for the given object are printed. '<name>' is one of
+the followings.
+
+ro::::
+read-only flag of subvolume: true or false
+label::::
+label of device
+compression::::
+compression setting for an inode: lzo, zlib, or "" (empty string)
+
+*list* [-t <type>] <object>::
+Lists available properties with their descriptions for the given object.
++
+See the description of *get* subcommand for the meaning of each option.
+
+*set* [-t <type>] <object> <name> <value>::
+Sets a property on a btrfs object.
++
+See the description of *get* subcommand for the meaning of each option.
+
+EXIT STATUS
+-----------
+*btrfs property* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`lsattr`(1),
+`chattr`(1)
diff --git a/Documentation/btrfs-qgroup.asciidoc b/Documentation/btrfs-qgroup.asciidoc
new file mode 100644
index 00000000..57cf012d
--- /dev/null
+++ b/Documentation/btrfs-qgroup.asciidoc
@@ -0,0 +1,145 @@
+btrfs-qgroup(8)
+===============
+
+NAME
+----
+btrfs-qgroup - control the quota group of a btrfs filesystem
+
+SYNOPSIS
+--------
+*btrfs qgroup* <subcommand> <args>
+
+DESCRIPTION
+-----------
+*btrfs qgroup* is used to control quota group (qgroup) of a btrfs filesystem.
+
+NOTE: To use qgroup you need to enable quota first using *btrfs quota enable*
+command.
+
+WARNING: Qgroup is not stable yet and will impact performance in current mainline
+kernel (v3.14 so far).
+
+QGROUP
+------
+Quota groups or qgroup in btrfs make a tree hierarchy, the leaf qgroups are
+attached to subvolumes. The size limits are set per qgroup and apply when any
+limit is reached in tree that contains a given subvolume.
+
+The limit sare separated between shared and exclusive and reflect the extent
+ownership. For example a fresh snapshot shares almost all the blocks with the
+original subvolume, new writes to either subvolume will raise towards the
+exclusive limit.
+
+The qgroup identifiers conform to 'level/id' where level 0 is reserved to the
+qgroups associated with subvolumes. Such qgroups are created automatically.
+
+The qgroup hierarchy is built by commands *create* and *assign*.
+
+NOTE: If the qgroup of a subvolume is destroyed, quota about the subvolume
+will not be functional until qgroup '0/<subvolume id>' is created again.
+
+SUBCOMMAND
+----------
+*assign* [options] <src> <dst> <path>::
+Assign qgroup <src> as the child qgroup of <dst> in the btrfs filesystem
+identified by <path>.
++
+`Options`
++
+--rescan::::
+Automatically schedule quota rescan if the new qgroup assignment leads to
+quota inconsistency.
+--no-rescan::::
+Explicitly ask not to do a rescan.
+
+*create* <qgroupid> <path>::
+Create a subvolume quota group.
++
+For the '0/<subvolume id>' qgroup, a qgroup can be created even before the
+subvolume created.
+
+*destroy* <qgroupid> <path>::
+Destroy a qgroup.
++
+If a qgroup is no isolated,which means it is a parent or child qgroup, it
+can't be destroyed.
+
+*limit* [options] <size>|none [<qgroupid>] <path>::
+Limit the size of a qgroup to <size> or no limit in the btrfs filesystem
+identified by <path>.
++
+If <qgroupid> is not given, qgroup of the subvolume identified by <path>
+is used if possible.
++
+`Options`
++
+-c::::
+limit amount of data after compression. This is the default, it is currently not
+possible to turn off this option.
++
+-e::::
+limit space exclusively assigned to this qgroup.
+
+*remove* <src> <dst> <path>::
+Remove the relationship between child qgroup <src> and parent qgroup <dst> in
+the btrfs filesystem identified by <path>.
+
+*show* [options] <path>::
+Show all qgroups in the btrfs filesystem identified by <path>.
++
+`Options`
++
+-p::::
+print parent qgroup id.
+-c::::
+print child qgroup id.
+-r::::
+print limit of referenced size of qgroup.
+-e::::
+print limit of exclusive size of qgroup.
+-F::::
+list all qgroups which impact the given path(include ancestral qgroups)
+-f::::
+list all qgroups which impact the given path(exclude ancestral qgroups)
+--raw::::
+raw numbers in bytes, without the 'B' suffix.
+--human-readable::::
+print human friendly numbers, base 1024, this is the default
+--iec::::
+select the 1024 base for the following options, according to the IEC standard.
+--si::::
+select the 1000 base for the following options, according to the SI standard.
+--kbytes::::
+show sizes in KiB, or kB with --si.
+--mbytes::::
+show sizes in MiB, or MB with --si.
+--gbytes::::
+show sizes in GiB, or GB with --si.
+--tbytes::::
+show sizes in TiB, or TB with --si.
+--sort=[\+/-]<attr>[,[+/-]<attr>]...::::
+list qgroups in order of <attr>.
++
+<attr> can be one or more of qgroupid,rfer,excl,max_rfer,max_excl.
++
+Prefix \'+' means ascending order and \'-' means descending order of <attr>.
+If no prefix is given, use ascending order by default.
++
+If multiple <attr>s is given, use comma to separate.
+
+EXIT STATUS
+-----------
+*btrfs qgroup* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-subvolume`(8),
+`btrfs-quota`(8),
diff --git a/Documentation/btrfs-quota.asciidoc b/Documentation/btrfs-quota.asciidoc
new file mode 100644
index 00000000..00e09c61
--- /dev/null
+++ b/Documentation/btrfs-quota.asciidoc
@@ -0,0 +1,56 @@
+btrfs-quota(8)
+==============
+
+NAME
+----
+btrfs-quota - control the quota of a btrfs filesystem
+
+SYNOPSIS
+--------
+*btrfs quota* <subcommand> <args>
+
+DESCRIPTION
+-----------
+*btrfs quota* is used to enable/disable or rescan subvolume quota of a btrfs
+filesystem.
+
+For setting quota or other quota operations on a btrfs filesystem, please see
+`btrfs-qgroup`(8) for details.
+
+WARNING: Quota and qgroup in btrfs filesystem is not stable and impacts
+performance in mainline kernel yet(v3.14 so far).
+
+SUBCOMMAND
+----------
+*disable* <path>::
+Disable subvolume quota support for a filesystem.
+
+*enable* <path>::
+Enable subvolume quota support for a filesystem.
+
+*rescan* [-s] <path>::
+Trash all qgroup numbers and scan the metadata again with the current config.
++
+`Options`
++
+-s::::
+show status of a running rescan operation.
+-w::::
+wait for rescan operation to finish(can be already in progress).
+
+EXIT STATUS
+-----------
+*btrfs quota* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-subvolume`(8),
+`btrfs-qgroup`(8)
diff --git a/Documentation/btrfs-receive.asciidoc b/Documentation/btrfs-receive.asciidoc
new file mode 100644
index 00000000..84b85c1c
--- /dev/null
+++ b/Documentation/btrfs-receive.asciidoc
@@ -0,0 +1,67 @@
+btrfs-receive(8)
+================
+
+NAME
+----
+btrfs-receive - receive subvolumes from stdin/file.
+
+SYNOPSIS
+--------
+*btrfs receive* [options] <mount>
+
+DESCRIPTION
+-----------
+Receives one or more subvolumes that were previously
+sent with *btrfs send*. The received subvolumes are stored
+into <mount>.
+
+*btrfs receive* will fail with the following case:
+
+1. a receiving subvolume already exists.
+
+2. a previously received subvolume was changed after it was received.
+
+3. default subvolume is changed or you don't mount btrfs filesystem with
+fs tree.
+
+After receiving a subvolume, it is immediately set to read only.
+
+`Options`
+
+-v::
+Enable verbose debug output. Each occurrence of this option increases the
+verbose level more.
+-f <infile>::
+By default, btrfs receive uses stdin to receive the subvolumes.
+Use this option to specify a file to use instead.
+-C|--chroot::
+Confine the process to <mount> using chroot.
+-e::
+Terminate after receiving an <end cmd> in the data stream.
+Without this option, the receiver terminates only if an error is recognized
+or on EOF.
+--max-errors <N>::
+Terminate as soon as N errors happened while processing commands from the send
+stream. Default value is 1. A value of 0 means no limit.
+-m::
+The root mount point of the destination fs.
++
+By default the mountpoint is searched in /proc/self/mounts.
+If you do not have /proc, eg. in a chroot environment, use this option to tell
+us where this filesystem is mounted.
+
+EXIT STATUS
+-----------
+*btrfs receive* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-send`(8)
diff --git a/Documentation/btrfs-replace.asciidoc b/Documentation/btrfs-replace.asciidoc
new file mode 100644
index 00000000..5a14a40a
--- /dev/null
+++ b/Documentation/btrfs-replace.asciidoc
@@ -0,0 +1,76 @@
+btrfs-replace(8)
+===============
+
+NAME
+----
+btrfs-replace - replace devices managed by btrfs with other device.
+
+SYNOPSIS
+--------
+*btrfs replace* <subcommand> <args>
+
+DESCRIPTION
+-----------
+*btrfs replace* is used to replace btrfs managed devices with other device.
+
+SUBCOMMAND
+----------
+*cancel* <mount_point>::
+Cancel a running device replace operation.
+
+*start* [-Bfr] <srcdev>|<devid> <targetdev> <path>::
+Replace device of a btrfs filesystem.
++
+On a live filesystem, duplicate the data to the target device which
+is currently stored on the source device.
+If the source device is not available anymore, or if the -r option is set,
+the data is built only using the RAID redundancy mechanisms.
+After completion of the operation, the source device is removed from the
+filesystem.
+If the <srcdev> is a numerical value, it is assumed to be the device id
+of the filesystem which is mounted at <path>, otherwise is is
+the path to the source device. If the source device is disconnected,
+from the system, you have to use the devid parameter format.
+The <targetdev> needs to be same size or larger than the <srcdev>.
++
+`Options`
++
+-r::::
+only read from <srcdev> if no other zero-defect mirror exists.
+(enable this if your drive has lots of read errors, the access would be very
+slow)
+-f::::
+force using and overwriting <targetdev> even if it looks like
+containing a valid btrfs filesystem.
++
+A valid filesystem is assumed if a btrfs superblock is found which contains a
+correct checksum. Devices which are currently mounted are
+never allowed to be used as the <targetdev>.
++
+-B::::
+no background replace.
+
+*status* [-1] <mount_point>::
+Print status and progress information of a running device replace operation.
++
+`Options`
++
+-1::::
+print once instead of print continuously until the replace
+operation finishes (or is canceled)
+
+EXIT STATUS
+-----------
+*btrfs replace* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-device`(8),
diff --git a/Documentation/btrfs-rescue.asciidoc b/Documentation/btrfs-rescue.asciidoc
new file mode 100644
index 00000000..42aca645
--- /dev/null
+++ b/Documentation/btrfs-rescue.asciidoc
@@ -0,0 +1,90 @@
+btrfs-rescue(8)
+==============
+
+NAME
+----
+btrfs-rescue - Recover a damaged btrfs filesystem
+
+SYNOPSIS
+--------
+*btrfs rescue* <subcommand> <args>
+
+DESCRIPTION
+-----------
+*btrfs rescue* is used to try to recover a damaged btrfs filesystem.
+
+SUBCOMMAND
+----------
+*chunk-recover* [options] <device>::
+Recover the chunk tree by scanning the devices
++
+`Options`
++
+-y::::
+assume an answer of 'yes' to all questions.
+-v::::
+verbose mode.
+-h::::
+help.
+
+NOTE: Since *chunk-recover* will scan the whole device, it will be *VERY* slow
+especially executed on a large device.
+
+*super-recover* [options] <device>::
+Recover bad superblocks from good copies.
++
+`Options`
++
+-y::::
+assume an answer of 'yes' to all questions.
+-v::::
+verbose mode.
+
+*zero-log* <device>::
+clear the filesystem log tree
+
+This command will clear the filesystem log tree. This may fix a specific
+set of problem when the filesystem mount fails due to the log replay. See below
+for sample stacktraces that may show up in system log.
+
+The common case where this happens has been fixed a long time ago,
+so it is unlikely that you will see this particular problem, but the utility is
+kept around.
+
+NOTE: clearing the log may lead to loss of changes that were made since the
+last transaction commit. This may be up to 30 seconds (default commit period)
+or less if the commit was implied by other filesystem activity.
+
+One can determine whether *zero-log* is needed according to the kernel
+backtrace:
+----
+? replay_one_dir_item+0xb5/0xb5 [btrfs]
+? walk_log_tree+0x9c/0x19d [btrfs]
+? btrfs_read_fs_root_no_radix+0x169/0x1a1 [btrfs]
+? btrfs_recover_log_trees+0x195/0x29c [btrfs]
+? replay_one_dir_item+0xb5/0xb5 [btrfs]
+? btree_read_extent_buffer_pages+0x76/0xbc [btrfs]
+? open_ctree+0xff6/0x132c [btrfs]
+----
+
+If the errors are like above, then *zero-log* should be used to clear
+the log and the filesystem may be mounted normally again. The keywords to look
+for are 'open_ctree' which says that it's during mount and function names
+that contain 'replay', 'recover' or 'log_tree'.
+
+EXIT STATUS
+-----------
+*btrfs rescue* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-scrub`(8),
+`btrfs-check`(8)
diff --git a/Documentation/btrfs-restore.asciidoc b/Documentation/btrfs-restore.asciidoc
new file mode 100644
index 00000000..ec3a08bc
--- /dev/null
+++ b/Documentation/btrfs-restore.asciidoc
@@ -0,0 +1,91 @@
+btrfs-restore(8)
+================
+
+NAME
+----
+btrfs-restore - try to restore files from a damaged btrfs filesystem image
+
+SYNOPSIS
+--------
+*btrfs restore* [options] <device> <path> | -l <device>
+
+DESCRIPTION
+-----------
+*btrfs restore* is used to try to salvage files from a damaged filesystem and
+restore them into <path> or just list the tree roots.
+
+Since current `btrfs-check`(8) or `btrfs-rescue`(8) only has very limited usage,
+*btrfs restore* is normally a better choice.
+
+NOTE: It is recommended to read the following btrfs wiki page if your data is
+not salvaged with default option: +
+https://btrfs.wiki.kernel.org/index.php/Restore
+
+OPTIONS
+-------
+-s|--snapshots::
+get snapshots, btrfs restore skips snapshots in default.
+
+-x|--xattr::
+get extended attributes.
+
+-m|--metadata::
+restore owner, mode and times.
+
+-S|--symlinks::
+restore symbolic links as well as normal files.
+
+-v|--verbose::
+verbose.
+
+-i|--ignore-errors::
+ignore errors.
+
+-o|--overwrite::
+overwrite directories/files in <path>.
+
+-t <bytenr>::
+use <bytenr> to read root tree.
+
+-f <bytenr>::
+only restore files that are under specified root whose root bytenr is <bytenr>.
+
+-u|--super <mirror>::
+use given superblock mirror identified by <mirror>, it can be 0,1,2.
+
+-r|--root <rootid>::
+only restore files that are under specified root whose objectid is <rootid>.
+
+-d::
+find dir.
+
+-l|--list-roots::
+list tree roots.
+
+-D|--dry-run::
+dry run (only list files that would be recovered).
+
+--path-regex <regex>::
+restore only filenames matching regex, you have to use following syntax (possibly quoted):
++
++^/(|home(|/username(|/Desktop(|/.*))))$+
+
+-c::
+ignore case (--path-regex only).
+
+EXIT STATUS
+-----------
+*btrfs restore* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-rescue`(8),
+`btrfs-check`(8)
diff --git a/Documentation/btrfs-scrub.asciidoc b/Documentation/btrfs-scrub.asciidoc
new file mode 100644
index 00000000..7750868d
--- /dev/null
+++ b/Documentation/btrfs-scrub.asciidoc
@@ -0,0 +1,94 @@
+btrfs-scrub(8)
+==============
+
+NAME
+----
+btrfs-scrub - scrub btrfs filesystem
+
+SYNOPSIS
+--------
+*btrfs scrub* <subcommand> <args>
+
+DESCRIPTION
+-----------
+*btrfs scrub* is used to scrub a btrfs filesystem, which will read all data
+from all disks and verify checksums.
+
+SUBCOMMAND
+----------
+*cancel* <path>|<device>::
+If a scrub is running on the filesystem identified by <path>, cancel it.
++
+Progress is saved in the scrub progress file and scrubbing can be resumed later
+using the scrub resume command.
+If a <device> is given, the corresponding filesystem is found and
+scrub cancel behaves as if it was called on that filesystem.
+
+*resume* [-BdqrR] [-c <ioprio_class> -n <ioprio_classdata>] <path>|<device>::
+Resume a canceled or interrupted scrub cycle on the filesystem identified by
+<path> or on a given <device>.
++
+Does not start a new scrub if the last scrub finished successfully.
++
+`Options`
++
+see *scrub start*.
+
+*start* [-BdqrRf] [-c <ioprio_class> -n <ioprio_classdata>] <path>|<device>::
+Start a scrub on all devices of the filesystem identified by <path> or on
+a single <device>. If a scrub is already running, the new one fails.
++
+Without options, scrub is started as a background process.
+Progress can be obtained with the *scrub status* command. Scrubbing
+involves reading all data from all disks and verifying checksums. Errors are
+corrected along the way if possible.
++
+The default IO priority of scrub is the idle class. The priority can be
+configured similar to the `ionice`(1) syntax using '-c' and '-n' options.
++
+`Options`
++
+-B::::
+Do not background and print scrub statistics when finished.
+-d::::
+Print separate statistics for each device of the filesystem (-B only).
+-q::::
+Quiet. Omit error messages and statistics.
+-r::::
+Read only mode. Do not attempt to correct anything.
+-R::::
+Raw print mode. Print full data instead of summary.
+-c <ioprio_class>::::
+Set IO priority class (see `ionice`(1) manpage).
+-n <ioprio_classdata>::::
+Set IO priority classdata (see `ionice`(1) manpage).
+-f::::
+Force starting new scrub even if a scrub is already running.
+This is useful when scrub stat record file is damaged.
+
+*status* [-d] <path>|<device>::
+Show status of a running scrub for the filesystem identified by <path> or
+for the specified <device>.
++
+If no scrub is running, show statistics of the last finished or canceled scrub
+for that filesystem or device.
++
+`Options`
++
+-d::::
+Print separate statistics for each device of the filesystem.
+
+EXIT STATUS
+-----------
+*btrfs scrub* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
diff --git a/Documentation/btrfs-select-super.asciidoc b/Documentation/btrfs-select-super.asciidoc
new file mode 100644
index 00000000..a8d7ef00
--- /dev/null
+++ b/Documentation/btrfs-select-super.asciidoc
@@ -0,0 +1,29 @@
+btrfs-select-super(8)
+=====================
+
+NAME
+----
+btrfs-select-super - overwrite superblock with a backup
+
+SYNOPSIS
+--------
+*btrfs-select-super* -s number dev
+
+DESCRIPTION
+-----------
+*btrfs-select-super* destructively overwrites all copies of the superblock
+with a specified copy. This helps with certain cases of damage, especially
+when barriers were disabled during a power failure. You can find a valid
+copy of the superblock with *btrfs check -s*.
+
+The filesystem specified by `dev` must not be mounted.
+
+OPTIONS
+-------
+-s|--super <superblock>::
+use <superblock>th superblock copy, valid values are 0 up to 2 if the
+respective superblock offset is within the filesystem
+
+SEE ALSO
+--------
+`btrfsck check`(8)
diff --git a/Documentation/btrfs-send.asciidoc b/Documentation/btrfs-send.asciidoc
new file mode 100644
index 00000000..e05342ff
--- /dev/null
+++ b/Documentation/btrfs-send.asciidoc
@@ -0,0 +1,63 @@
+btrfs-send(8)
+=============
+
+NAME
+----
+btrfs-send - send data of subvolume(s) to stdout/file.
+
+SYNOPSIS
+--------
+*btrfs send* [-ve] [-p <parent>] [-c <clone-src>] [-f <outfile>] <subvol> [<subvol>...]
+
+DESCRIPTION
+-----------
+Sends the subvolume(s) specified by <subvol> to stdout.
+<subvol> should be read-only here.
+
+By default, this will send the whole subvolume. To do an incremental
+send, use '-p <parent>'.
+
+If you want to allow btrfs to clone from any additional local snapshots,
+use '-c <clone-src>' (multiple times where applicable).
+
+You must not specify clone sources unless you guarantee that these snapshots
+are exactly in the same state on both sides, the sender and the receiver.
+
+It is allowed to omit the '-p <parent>' option when '-c <clone-src>' options
+are given, in which case *btrfs send* will determine a suitable parent among the
+clone sources itself.
+
+`Options`
+
+-v::
+Enable verbose debug output. Each occurrence of this option increases the
+verbose level more.
+-e::
+If sending multiple subvols at once, use the new format and omit the <end cmd> between the subvols.
+-p <parent>::
+Send an incremental stream from <parent> to <subvol>.
+-c <clone-src>::
+Use this snapshot as a clone source for an incremental send (multiple allowed).
+-f <outfile>::
+Output is normally written to stdout. To write to a file, use this option.
+An alternative would be to use pipes.
+--no-data::
+Send in NO_FILE_DATA mode. The output stream does not contain any file
+data and thus cannot be used to transfer changes. This mode is faster and
+useful to show the differences in metadata.
+
+EXIT STATUS
+-----------
+*btrfs send* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-receive`(8)
diff --git a/Documentation/btrfs-show-super.asciidoc b/Documentation/btrfs-show-super.asciidoc
new file mode 100644
index 00000000..8866c940
--- /dev/null
+++ b/Documentation/btrfs-show-super.asciidoc
@@ -0,0 +1,54 @@
+btrfs-show-super(8)
+====================
+
+NAME
+----
+btrfs-show-super - show btrfs superblock information stored in devices
+
+SYNOPSIS
+--------
+*btrfs-show-super* [options] <dev> [<dev>...]
+
+DESCRIPTION
+-----------
+*btrfs-show-super* is used to print the information of superblock,
+you can specify which mirror to print out.
+
+By default, every device's first superblock will be printed out.
+
+Mainly used for debug purpose.
+
+OPTIONS
+-------
+-f::
+Print full superblock information.
++
+Including the system chunk array and backup roots.
+
+-a::
+Print information of all superblocks.
++
+If this option is given, '-i' option will be ignored.
+
+-i <super_mirror>::
+Specify which mirror to print out.
++
+<super_mirror> is between 0 and 2.
+If several '-i <super_mirror>' are given, only the last one is valid.
+
+-F::
+Attempt to print the superblock even if no superblock magic is found. May end
+badly.
+
+-s <bytenr>::
+specifiy offset to a superblock in a non-standard location at 'bytenr', useful
+for debugging (disables the '-f' option)
+
+EXIT STATUS
+-----------
+*btrfs-show-super* will return 0 if no error happened.
+If any problems happened, 1 will be returned.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8)
diff --git a/Documentation/btrfs-subvolume.asciidoc b/Documentation/btrfs-subvolume.asciidoc
new file mode 100644
index 00000000..96cfe4ac
--- /dev/null
+++ b/Documentation/btrfs-subvolume.asciidoc
@@ -0,0 +1,182 @@
+btrfs-subvolume(8)
+==================
+
+NAME
+----
+btrfs-subvolume - control btrfs subvolume(s)
+
+SYNOPSIS
+--------
+*btrfs subvolume* <subcommand> [<args>]
+
+DESCRIPTION
+-----------
+*btrfs subvolume* is used to control the filesystem to create/delete/list/show
+subvolumes and snapshots.
+
+SUBVOLUME AND SNAPSHOT
+----------------------
+A subvolume in btrfs is not like an LVM logical volume, which is quite
+independent from each other, a btrfs subvolume has its hierarchy and relations
+between other subvolumes.
+
+A subvolume in btrfs can be accessed in two ways.
+
+1. From the parent subvolume +
+When accessing from the parent subvolume, the subvolume can be used just
+like a directory. It can have child subvolumes and its own files/directories.
+
+2. Separate mounted filesystem +
+When `mount`(8) using 'subvol' or 'subvolid' mount option, one can access
+files/directories/subvolumes inside it, but nothing in parent subvolumes.
+
+Also every btrfs filesystem has a default subvolume as its initially top-level
+subvolume, whose subvolume id is 5. (0 is also acceptable as an alias.)
+
+A btrfs snapshot is much like a subvolume, but shares its data(and metadata)
+with other subvolume/snapshot. Due to the capabilities of COW, modifications
+inside a snapshot will only show in a snapshot but not in its source subvolume.
+
+Although in btrfs, subvolumes/snapshots are treated as directories, only
+subvolume/snapshot can be the source of a snapshot, snapshot can not be made
+from normal directories.
+
+SUBCOMMAND
+-----------
+*create* [-i <qgroupid>] [<dest>]<name>::
+Create a subvolume <name> in <dest>.
++
+If <dest> is not given, subvolume <name> will be created in the currently
+directory.
++
+`Options`
++
+-i <qgroupid>::::
+Add the newly created subvolume to a qgroup. This option can be given multiple
+times.
+
+*delete* [options] <subvolume> [<subvolume>...]::
+Delete the subvolume(s) from the filesystem.
++
+If <subvolume> is not a subvolume, btrfs returns an error but continues if
+there are more arguments to process.
++
+The corresponding directory is removed instantly but the data blocks are
+removed later. The deletion does not involve full commit by default due to
+performance reasons (as a consequence, the subvolume may appear again after a
+crash). Use one of the '--commit' options to wait until the operation is safely
+stored on the media.
++
+`Options`
++
+-c|--commit-after::::
+wait for transaction commit at the end of the operation
++
+-C|--commit-each::::
+wait for transaction commit after delet each subvolume
+
+*find-new* <subvolume> <last_gen>::
+List the recently modified files in a subvolume, after <last_gen> ID.
+
+*get-default* <path>::
+Get the default subvolume of the filesystem <path>.
++
+The output format is similar to *subvolume list* command.
+
+*list* [options] [-G [\+|-]<value>] [-C [+|-]<value>] [--sort=rootid,gen,ogen,path] <path>::
+List the subvolumes present in the filesystem <path>.
++
+For every subvolume the following information is shown by default. +
+ID <ID> top level <ID> path <path> +
+where path is the relative path of the subvolume to the top level subvolume.
+The subvolume's ID may be used by the subvolume set-default command,
+or at mount time via the subvolid= option.
+If `-p` is given, then parent <ID> is added to the output between ID
+and top level. The parent's ID may be used at mount time via the
+`subvolrootid=` option.
++
+`Options`
++
+-p::::
+print parent ID.
+-a::::
+print all the subvolumes in the filesystem and distinguish between
+absolute and relative path with respect to the given <path>.
+-c::::
+print the ogeneration of the subvolume, aliases: ogen or origin generation.
+-g::::
+print the generation of the subvolume.
+-o::::
+print only subvolumes below specified <path>.
+-u::::
+print the UUID of the subvolume.
+-q::::
+print the parent uuid of subvolumes (and snapshots).
+-R::::
+print the UUID of the sent subvolume, where the subvolume is the result of a receive operation
+-t::::
+print the result as a table.
+-s::::
+only snapshot subvolumes in the filesystem will be listed.
+-r::::
+only readonly subvolumes in the filesystem will be listed.
+-G [+|-]<value>::::
+list subvolumes in the filesystem that its generation is
+>=, \<= or = value. \'\+' means >= value, \'-' means \<= value, If there is
+neither \'+' nor \'-', it means = value.
+-C [+|-]<value>::::
+list subvolumes in the filesystem that its ogeneration is
+>=, \<= or = value. The usage is the same to '-g' option.
+--sort=rootid,gen,ogen,path::::
+list subvolumes in order by specified items.
+you can add \'\+' or \'-' in front of each items, \'+' means ascending,
+\'-' means descending. The default is ascending.
++
+for --sort you can combine some items together by \',', just like
+-sort=+ogen,-gen,path,rootid.
+
+*set-default* <id> <path>::
+Set the subvolume of the filesystem <path> which is mounted as
+default.
++
+The subvolume is identified by <id>, which is returned by the *subvolume list*
+command.
+
+*show* <path>::
+Show information of a given subvolume in the <path>.
+
+*snapshot* [-r] <source> <dest>|[<dest>/]<name>::
+Create a writable/readonly snapshot of the subvolume <source> with the
+name <name> in the <dest> directory.
++
+If only <dest> is given, the subvolume will be named the basename of <source>.
+If <source> is not a subvolume, btrfs returns an error.
+If '-r' is given, the snapshot will be readonly.
+
+*sync* <path> [subvolid...]::
+Wait until given subvolume(s) are completely removed from the filesystem
+after deletion. If no subvolume id is given, wait until all current deletion
+requests are completed, but do not wait for subvolumes deleted meanwhile.
+The status of subvolume ids is checked periodically.
++
+`Options`
++
+-s <N>::::
+sleep N seconds between checks (default: 1)
+
+EXIT STATUS
+-----------
+*btrfs subvolume* returns a zero exit status if it succeeds. A non-zero value is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-quota`(8),
+`btrfs-qgroup`(8),
diff --git a/Documentation/btrfs.asciidoc b/Documentation/btrfs.asciidoc
new file mode 100644
index 00000000..abf1ff89
--- /dev/null
+++ b/Documentation/btrfs.asciidoc
@@ -0,0 +1,127 @@
+btrfs(8)
+========
+
+NAME
+----
+btrfs - control a btrfs filesystem
+
+SYNOPSIS
+--------
+*btrfs* <command> [<args>]
+
+DESCRIPTION
+-----------
+The *btrfs* utility is a toolbox for managing btrfs filesystems. There are
+command groups to work with subvolumes, devices, for whole filesystem or other
+specific actions. See section *COMMANDS*.
+
+COMMAND SYTNAX
+--------------
+
+Any command name can be shortened as far as it stays unambiguous,
+however it is recommended to use full command names in scripts.
+All command groups have their manual page named *btrfs-<group>*.
+
+For example: it is possible to run *btrfs sub snaps* instead of
+*btrfs subvolume snapshot*.
+But *btrfs file s* is not allowed, because *file s* may be interpreted
+both as *filesystem show* and as *filesystem sync*.
+
+If the command name is ambiguous, the list of conflicting options is
+printed.
+
+For an overview of a given command use 'btrfs command --help'
+or 'btrfs [command...] --help --full' to print all available options.
+
+COMMANDS
+--------
+*balance*::
+ Balance btrfs filesystem chunks across single or several devices. +
+ See `btrfs-balance`(8) for details.
+
+*check*::
+ Do off-line check on a btrfs filesystem. +
+ See `btrfs-check`(8) for details.
+
+*device*::
+ Manage devices managed by btrfs, including add/delete/scan and so
+ on. +
+ See `btrfs-device`(8) for details.
+
+*filesystem*::
+ Manage a btrfs filesystem, including label setting/sync and so on. +
+ See `btrfs-filesystem`(8) for details.
+
+*inspect-internal*::
+ Debug tools for developers/hackers. +
+ See `btrfs-inspect-internal`(8) for details.
+
+*property*::
+ Get/set a property from/to a btrfs object. +
+ See `btrfs-property`(8) for details.
+
+*qgroup*::
+ Manage quota group(qgroup) for btrfs filesystem. +
+ See `btrfs-qgroup`(8) for details.
+
+*quota*::
+ Manage quota on btrfs filesystem like enabling/rescan and etc. +
+ See `btrfs-quota`(8) and `btrfs-qgroup`(8) for details.
+
+*receive*::
+ Receive subvolume data from stdin/file for restore and etc. +
+ See `btrfs-receive`(8) for details.
+
+*replace*::
+ Replace btrfs devices. +
+ See `btrfs-replace`(8) for details.
+
+*rescue*::
+ Try to rescue damaged btrfs filesystem. +
+ See `btrfs-rescue`(8) for details.
+
+*restore*::
+ Try to restore files from a damaged btrfs filesystem. +
+ See `btrfs-restore`(8) for details.
+
+*scrub*::
+ Scrub a btrfs filesystem. +
+ See `btrfs-scrub`(8) for details.
+
+*send*::
+ Send subvolume data to stdout/file for backup and etc. +
+ See `btrfs-send`(8) for details.
+
+*subvolume*::
+ Create/delete/list/manage btrfs subvolume. +
+ See `btrfs-subvolume`(8) for details.
+
+EXIT STATUS
+-----------
+*btrfs* returns a zero exit status if it succeeds. Non zero is returned in
+case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8), `ionice`(1),
+`btrfs-balance`(8),
+`btrfs-check`(8),
+`btrfs-device`(8),
+`btrfs-filesystem`(8),
+`btrfs-inspect-internal`(8),
+`btrfs-property`(8),
+`btrfs-qgroup`(8),
+`btrfs-quota`(8),
+`btrfs-receive`(8),
+`btrfs-replace`(8),
+`btrfs-rescue`(8),
+`btrfs-restore`(8),
+`btrfs-scrub`(8),
+`btrfs-send`(8),
+`btrfs-subvolume`(8),
diff --git a/Documentation/btrfstune.asciidoc b/Documentation/btrfstune.asciidoc
new file mode 100644
index 00000000..f5cf15e7
--- /dev/null
+++ b/Documentation/btrfstune.asciidoc
@@ -0,0 +1,75 @@
+btrfstune(8)
+============
+
+NAME
+----
+btrfstune - tune various filesystem parameters
+
+SYNOPSIS
+--------
+*btrfstune* [options] <dev> [<dev>...]
+
+DESCRIPTION
+-----------
+*btrfstune* can be used to enable, disable or set various filesystem
+parameters. The filesystem must be unmounted.
+
+The common usecase is to enable features that were not enabled at mkfs time.
+Please make sure that you have kernel support for the features. You can find a
+complete list of features and kernel version of their introduction at
+https://btrfs.wiki.kernel.org/index.php/Changelog#By_feature .
+
+OPTIONS
+-------
+-S <0|1>::
+Enable seeding on a given device. Value 1 will enable seeding, 0 will disable it. +
+A seeding filesystem is forced to be mounted read-only. A new device can be added
+to the filesystem and will capture all writes keeping the seeding device intact.
+-r::
+Enable extended inode refs (hardlink limit per file in a directory is 65536),
+enabled by mkfs feature 'extref'. Since kernel 3.7.
+-x::
+Enable skinny metadata extent refs (more efficient representation of extents),
+enabled by mkfs feature 'skinny-metadata'. Since kernel 3.10.
+-n::
+Enable no-holes feature (more efficient representation of file holes), enabled
+by mkfs feature 'no-holes'. Since kernel 3.14.
+-f::
+Allow dangerous changes, e.g. clear the seeding flag or change fsid. Make sure
+that you are aware of the dangers.
+-u::
+Change fsid to a randomly generated UUID or continue previous fsid change
+operation in case it was interrupted.
+-U <UUID>::
+Change fsid to 'UUID'.
++
+The 'UUID' should be a 36 bytes string in `printf`(3) format
+'"%08x-%04x-%04x-%04x-%012x"'.
+If there is a previous unfinished fsid change, it will continue only if the
+'UUID' matches the unfinished one or if you use the option '-u'.
+
+WARNING: Cancelling or interrupting a UUID change operation will make the
+filesystem temporarily unmountable. To fix it, rerun 'btrfstune -u' to restore
+the UUID and let it complete.
+
+WARNING: Clearing the seeding flag on a device may be dangerous.
+If a previously-seeding device is changed, all filesystems that used that
+device will become unmountable. Setting the seeding flag back will not fix
+that. +
+A valid usecase is 'seeding device as a base image'. Clear the seeding
+flag, update the filesystem and make it seeding again, provided that it's ok
+to throw away all filesystems built on top of the previous base.
+
+EXIT STATUS
+-----------
+*btrfstune* returns 0 if no error happened, 1 otherwise.
+
+COMPATIBILITY NOTE
+------------------
+This tool exists for historical reasons but is still in use today. The
+functionality is about to be merged to the main tool someday and *btrfstune*
+will become deprecated and removed afterwards.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8)
diff --git a/Documentation/fsck.btrfs.asciidoc b/Documentation/fsck.btrfs.asciidoc
new file mode 100644
index 00000000..0bad075b
--- /dev/null
+++ b/Documentation/fsck.btrfs.asciidoc
@@ -0,0 +1,51 @@
+fsck.btrfs(8)
+=============
+
+NAME
+----
+fsck.btrfs - do nothing, successfully
+
+SYNOPSIS
+--------
+*fsck.btrfs* [-aApy] [<device>...]
+
+DESCRIPTION
+-----------
+*fsck.btrfs* is a type of utility that should exist for any filesystem and is
+called during system setup when the corresponding `/etc/fstab` entries
+contain non-zero value for `fs_passno` , see `fstab`(5) for more.
+
+Traditional filesystems need to run their respective fsck utility in case the
+filesystem was not unmounted cleanly and the log needs to be replayed before
+mount. This is not needed for BTRFS. You should set fs_passno to 0.
+
+If you wish to check the consistency of a BTRFS filesystem or repair a damaged
+filesystem, see `btrfs-check`(8). By default the filesystem
+consistency is checked, the repair mode is enabled via '--repair' option (use
+with care!).
+
+OPTIONS
+-------
+The options are all the same and detect if *fsck.btrfs* is executed in
+non-interactive mode and exits with success,
+otherwise prints a message about btrfs check.
+
+EXIT STATUS
+-----------
+There are two possible exit code returned:
+
+0::
+No error
+
+8::
+Operational error, eg. device does not exist
+
+FILES
+-----
+`/etc/fstab`
+
+SEE ALSO
+--------
+`btrfs`(8),
+`fsck`(8),
+`fstab`(5),
diff --git a/Documentation/manpage-base.xsl b/Documentation/manpage-base.xsl
new file mode 100644
index 00000000..a264fa61
--- /dev/null
+++ b/Documentation/manpage-base.xsl
@@ -0,0 +1,35 @@
+<!-- manpage-base.xsl:
+ special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<!-- these params silence some output from xmlto -->
+<xsl:param name="man.output.quietly" select="1"/>
+<xsl:param name="refentry.meta.get.quietly" select="1"/>
+
+<!-- convert asciidoc callouts to man page format;
+ git.docbook.backslash and git.docbook.dot params
+ must be supplied by another XSL file or other means -->
+<xsl:template match="co">
+ <xsl:value-of select="concat(
+ $git.docbook.backslash,'fB(',
+ substring-after(@id,'-'),')',
+ $git.docbook.backslash,'fR')"/>
+</xsl:template>
+<xsl:template match="calloutlist">
+ <xsl:value-of select="$git.docbook.dot"/>
+ <xsl:text>sp&#10;</xsl:text>
+ <xsl:apply-templates/>
+ <xsl:text>&#10;</xsl:text>
+</xsl:template>
+<xsl:template match="callout">
+ <xsl:value-of select="concat(
+ $git.docbook.backslash,'fB',
+ substring-after(@arearefs,'-'),
+ '. ',$git.docbook.backslash,'fR')"/>
+ <xsl:apply-templates/>
+ <xsl:value-of select="$git.docbook.dot"/>
+ <xsl:text>br&#10;</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/Documentation/manpage-bold-literal.xsl b/Documentation/manpage-bold-literal.xsl
new file mode 100644
index 00000000..608eb5df
--- /dev/null
+++ b/Documentation/manpage-bold-literal.xsl
@@ -0,0 +1,17 @@
+<!-- manpage-bold-literal.xsl:
+ special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<!-- render literal text as bold (instead of plain or monospace);
+ this makes literal text easier to distinguish in manpages
+ viewed on a tty -->
+<xsl:template match="literal">
+ <xsl:value-of select="$git.docbook.backslash"/>
+ <xsl:text>fB</xsl:text>
+ <xsl:apply-templates/>
+ <xsl:value-of select="$git.docbook.backslash"/>
+ <xsl:text>fR</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/Documentation/manpage-normal.xsl b/Documentation/manpage-normal.xsl
new file mode 100644
index 00000000..a48f5b11
--- /dev/null
+++ b/Documentation/manpage-normal.xsl
@@ -0,0 +1,13 @@
+<!-- manpage-normal.xsl:
+ special settings for manpages rendered from asciidoc+docbook
+ handles anything we want to keep away from docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the normal values for the roff control characters -->
+<xsl:param name="git.docbook.backslash">\</xsl:param>
+<xsl:param name="git.docbook.dot" >.</xsl:param>
+
+</xsl:stylesheet>
diff --git a/Documentation/mkfs.btrfs.asciidoc b/Documentation/mkfs.btrfs.asciidoc
new file mode 100644
index 00000000..6a492658
--- /dev/null
+++ b/Documentation/mkfs.btrfs.asciidoc
@@ -0,0 +1,341 @@
+mkfs.btrfs(8)
+=============
+
+NAME
+----
+mkfs.btrfs - create a btrfs filesystem
+
+SYNOPSIS
+--------
+*mkfs.btrfs*
+$$[-A|--alloc-start <alloc-start>]$$
+$$[-b|--byte-count <byte-count>]$$
+$$[-d|--data <data-profile>]$$
+$$[-m|--metadata <metadata profile>]$$
+$$[-M|--mixed]$$
+$$[-l|--leafsize <leafsize>]$$
+$$[-n|--nodesize <nodesize>]$$
+$$[-s|--sectorsize <sectorsize>]$$
+$$[-L|--label <label>]$$
+$$[-K|--nodiscard]$$
+$$[-r|--rootdir <rootdir>]$$
+$$[-O|--features <feature1>[,<feature2>...]]$$
+$$[-U|--uuid <UUID>]$$
+$$[-f|--force]$$
+$$[-q|--quiet]$$
+$$[--help]$$
+$$[-V|--version]$$
+$$<device> [<device>...]$$
+
+DESCRIPTION
+-----------
+*mkfs.btrfs* is used to create the btrfs filesystem on a single or multiple
+devices. <device> is typically a block device but can be a file-backed image
+as well. Multiple devices are grouped by UUID of the filesystem.
+
+Before mounting such filesystem, the kernel module must know all the devices
+either via preceding execution of *btrfs device scan* or using the *device*
+mount option. See section *MULTIPLE DEVICES* for more details.
+
+OPTIONS
+-------
+*-A|--alloc-start <offset>*::
+(An option to help debugging chunk allocator.)
+Specify the (physical) offset from the start of the device at which allocations
+start. The default value is zero.
+
+*-b|--byte-count <size>*::
+Specify the size of the filesystem. If this option is not used,
+mkfs.btrfs uses the entire device space for the filesystem.
+
+*-d|--data <profile>*::
+Specify the profile for the data block groups. Valid values are 'raid0',
+'raid1', 'raid5', 'raid6', 'raid10' or 'single' or dup (case does not matter).
++
+See 'DUP PROFILES ON A SINGLE DEVICE' for more.
+
+*-m|--metadata <profile>*::
+Specify the profile for the metadata block groups.
+Valid values are 'raid0', 'raid1', 'raid5', 'raid6', 'raid10', 'single' or
+'dup', (case does not matter).
++
+A single device filesystem will default to 'DUP', unless a SSD is detected. Then
+it will default to 'single'. The detection is based on the value of
+`/sys/block/DEV/queue/rotational`, where 'DEV' is the short name of the device.
++
+Note that the rotational status can be arbitrarily set by the underlying block
+device driver and may not reflect the true status (network block device, memory-backed
+SCSI devices etc). Use the options '--data/--metadata' to avoid confusion.
++
+See 'DUP PROFILES ON A SINGLE DEVICE' for more details.
+
+*-M|--mixed*::
+Normally the data and metadata block groups are isolated. The 'mixed' mode
+will remove the isolation and store both types in the same block group type.
+This helps to utilize the free space regardless of the purpose and is suitable
+for small devices. The separate allocation of block groups leads to a situation
+where the space is reserved for the other block group type, is not available for
+allocation and can lead to ENOSPC state.
++
+The recommended size for the mixed mode is for filesystems less than 1GiB. The
+soft recommendation is to use it for filesystems smaller than 5GiB. The mixed
+mode may lead to degraded performance on larger filesystems, but is otherwise
+usable, even on multiple devices.
++
+The 'nodesize' and 'sectorsize' must be equal, and the block group types must
+match.
++
+NOTE: versions up to 4.2.x forced the mixed mode for devices smaller than 1GiB.
+This has been removed in 4.3+ as it caused some usability issues.
+
+*-l|--leafsize <size>*::
+Alias for --nodesize. Deprecated.
+
+*-n|--nodesize <size>*::
+Specify the nodesize, the tree block size in which btrfs stores metadata. The
+default value is 16KiB (16384) or the page size, whichever is bigger. Must be a
+multiple of the sectorsize, but not larger than 64KiB (65536). Leafsize always
+equals nodesize and the options are aliases.
++
+Smaller node size increases fragmentation but lead to higher b-trees which in
+turn leads to lower locking contention. Higher node sizes give better packing
+and less fragmentation at the cost of more expensive memory operations while
+updating the metadata blocks.
++
+NOTE: versions up to 3.11 set the nodesize to 4k.
+
+*-s|--sectorsize <size>*::
+Specify the sectorsize, the minimum data block allocation unit.
++
+The default value is the page size and is autodetected. If the sectorsize
+differs from the page size, the created filesystem may not be mountable by the
+kernel. Therefore it is not recommended to use this option unless you are going
+to mount it on a system with the appropriate page size.
+
+*-L|--label <string>*::
+Specify a label for the filesystem. The 'string' should be less than 256
+bytes and must not contain newline characters.
+
+*-K|--nodiscard*::
+Do not perform whole device TRIM operation on devices that are capable of that.
+
+*-r|--rootdir <rootdir>*::
+Populate the toplevel subvolume with files from 'rootdir'. This does not
+require root permissions and does not mount the filesystem.
+
+*-O|--features <feature1>[,<feature2>...]*::
+A list of filesystem features turned on at mkfs time. Not all features are
+supported by old kernels. To disable a feature, prefix it with '^'.
++
+See section *FILESYSTEM FEATURES* for more details. To see all available
+features that mkfs.btrfs supports run:
++
++mkfs.btrfs -O list-all+
+
+*-f|--force*::
+Forcibly overwrite the block devices when an existing filesystem is detected.
+By default, mkfs.btrfs will utilize 'libblkid' to check for any known
+filesystem on the devices. Alternatively you can use the `wipefs` utility
+to clear the devices.
+
+*-q|--quiet*::
+Print only error or warning messages. Options --features or --help are unaffected.
+
+*-U|--uuid <UUID>*::
+Create the filesystem with the given 'UUID'. The UUID must not exist on any
+filesystem currently present.
+
+*-V|--version*::
+Print the *mkfs.btrfs* version and exit.
+
+*--help*::
+Print help.
+
+SIZE UNITS
+----------
+The default unit is 'byte'. All size parameters accept suffixes in the 1024
+base. The recognized suffixes are: 'k', 'm', 'g', 't', 'p', 'e', both uppercase
+and lowercase.
+
+MULTIPLE DEVICES
+----------------
+
+Before mounting a multiple device filesystem, the kernel module must know the
+association of the block devices that are attached to the filesystem UUID.
+
+There is typically no action needed from the user. On a system that utilizes a
+udev-like daemon, any new block device is automatically registered. The rules
+call *btrfs device scan*.
+
+The same command can be used to trigger the device scanning if the btrfs kernel
+module is reloaded (naturally all previous information about the device
+registration is lost).
+
+Another possibility is to use the mount options *device* to specify the list of
+devices to scan at the time of mount.
+
+ # mount -o device=/dev/sdb,device=/dev/sdc /dev/sda /mnt
+
+NOTE: that this means only scanning, if the devices do not exist in the system,
+mount will fail anyway. This can happen on systems without initramfs/initrd and
+root partition created with RAID1/10/5/6 profiles. The mount action can happen
+before all block devices are discovered. The waiting is usually done on the
+initramfs/initrd systems.
+
+FILESYSTEM FEATURES
+-------------------
+
+*mixed-bg*::
+mixed data and metadata block groups, also set by option '--mixed'
+
+*extref*::
+(default since btrfs-progs 3.12, kernel support since 3.7)
++
+increased hardlink limit per file in a directory to 65536, older kernels
+supported a varying number of hardlinks depending on the sum of all file name
+sizes that can be stored into one metadata block
+
+*raid56*::
+extended format for RAID5/6, also enabled if raid5 or raid6 block groups
+are selected
+
+*skinny-metadata*::
+(default since btrfs-progs 3.18, kernel support since 3.10)
++
+reduced-size metadata for extent references, saves a few percent of metadata
+
+*no-holes*::
+improved representation of file extents where holes are not explicitly
+stored as an extent, saves a few percent of metadata if sparse files are used
+
+BLOCK GROUPS, CHUNKS, RAID
+--------------------------
+
+The highlevel organizational units of a filesystem are block groups of three types:
+data, metadata and system.
+
+*DATA*::
+store data blocks and nothing else
+
+*METADATA*::
+store internal metadata in b-trees, can store file data if they fit into the
+inline limit
+
+*SYSTEM*::
+store structures that describe the mapping between the physical devices and the
+linear logical space representing the filesystem
+
+Other terms commonly used:
+
+*block group*::
+*chunk*::
+a logical range of space of a given profile, stores data, metadata or both;
+sometimes the terms are used interchangably
++
+A typical size of metadata block group is 256MiB (filesystem smaller than
+50GiB) and 1GiB (larger than 50GiB), for data it's 1GiB. The system block group
+size is a few megabytes.
+
+*RAID*::
+a block group profile type that utilizes RAID-like features on multiple
+devices: striping, mirroring, parity
+
+*profile*::
+when used in connection with block groups refers to the allocation strategy
+and constraints, see the section 'PROFILES' for more details
+
+PROFILES
+--------
+
+There are the following block group types available:
+
+[ cols="^,^,^,^,^",width="60%" ]
+|=============================================================
+.2+^.<h| Profile 3+^.^h| Redundancy .2+^.<h| Min/max devices
+ ^.^h| Copies ^.^h| Parity ^.<h| Striping
+| single | 1 | | | 1/any
+| DUP | 2 / 1 device | | | 1/1 ^(see note)^
+| RAID0 | | | 1 to N | 2/any
+| RAID1 | 2 | | | 2/any
+| RAID10 | 2 | | 1 to N | 4/any
+| RAID5 | 1 | 1 | 2 to N - 1 | 2/any
+| RAID6 | 1 | 2 | 3 to N - 2 | 3/any
+|=============================================================
+
+'Note:' DUP may exist on more than 1 device if it starts on a single device and
+another one is added, but *mkfs.btrfs* will not let you create DUP on multiple
+devices.
+
+DUP PROFILES ON A SINGLE DEVICE
+-------------------------------
+
+The mkfs utility will let the user create a filesystem with profiles that write
+the logical blocks to 2 physical locations. Whether there are really 2
+physical copies highly depends on the underlying device type.
+
+For example, a SSD drive can remap the blocks internally to a single copy thus
+deduplicating them. This negates the purpose of increased redunancy and just
+wastes space.
+
+The duplicated data/metadata may still be useful to statistically improve the
+chances on a device that might perform some internal optimizations. The actual
+details are not usually disclosed by vendors. As another example, the widely
+used USB flash or SD cards use a translation layer. The data lifetime may
+be affected by frequent plugging. The memory cells could get damaged, hopefully
+not destroying both copies of particular data.
+
+The traditional rotational hard drives usually fail at the sector level.
+
+In any case, a device that starts to misbehave and repairs from the DUP copy
+should be replaced! *DUP is not backup*.
+
+KNOWN ISSUES
+------------
+
+**SMALL FILESYSTEMS AND LARGE NODESIZE**
+
+The combination of small filesystem size and large nodesize is not recommended
+in general and can lead to various ENOSPC-related issues during mount time or runtime.
+
+Since mixed block group creation is optional, we allow small
+filesystem instances with differing values for 'sectorsize' and 'nodesize'
+to be created and could end up in the following situation:
+
+ # mkfs.btrfs -f -n 65536 /dev/loop0
+ btrfs-progs v3.19-rc2-405-g976307c
+ See http://btrfs.wiki.kernel.org for more information.
+
+ Performing full device TRIM (512.00MiB) ...
+ Label: (null)
+ UUID: 49fab72e-0c8b-466b-a3ca-d1bfe56475f0
+ Node size: 65536
+ Sector size: 4096
+ Filesystem size: 512.00MiB
+ Block group profiles:
+ Data: single 8.00MiB
+ Metadata: DUP 40.00MiB
+ System: DUP 12.00MiB
+ SSD detected: no
+ Incompat features: extref, skinny-metadata
+ Number of devices: 1
+ Devices:
+ ID SIZE PATH
+ 1 512.00MiB /dev/loop0
+
+ # mount /dev/loop0 /mnt/
+ mount: mount /dev/loop0 on /mnt failed: No space left on device
+
+The ENOSPC occurs during the creation of the UUID tree. This is caused
+by large metadata blocks and space reservation strategy that allocates more
+than can fit into the filesystem.
+
+
+AVAILABILITY
+------------
+*mkfs.btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`btrfs`(8), `wipefs`(8)
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 00000000..85a839f1
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,79 @@
+Installation instructions
+=========================
+
+The Btrfs utility programs require the following libraries/tools to build:
+
+- libuuid - provided by util-linux, e2fsprogs/e2fslibs or libuuid
+- libblkid - block device id library
+- liblzo2 - LZO data compression library
+- zlib - ZLIB data compression library
+
+For the btrfs-convert utility:
+
+- e2fsprogs - ext2/ext3/ext4 file system libraries, or called e2fslibs
+
+Generating documentation:
+
+- asciidoc - text document format tool
+- xmlto - text document format tool
+
+XATTR library should be provided by the standard C library or by
+
+- libattr - extended attribute library
+
+Please note that the package names may differ according to the distribution.
+See https://btrfs.wiki.kernel.org/index.php/Btrfs_source_repositories#Dependencies .
+
+
+Building from sources
+---------------------
+
+To build from git sources you need to generate the configure script using the
+autotools:
+
+ $ ./autogen.sh
+
+To build from the released tarballs:
+
+ $ ./configure
+ $ make
+ $ make install
+
+You may disable building some parts like documentation, btrfs-convert or
+backtrace support. See ./configure --help for more.
+
+Specific CFLAGS or LDFLAGS should be set like
+
+ $ CFLAGS=... LDFLAGS=... ./configure --prefix=/usr
+
+and not as arguments to make. You can specify additional flags to build via
+variables EXTRA_CFLAGS and EXTRA_LDFLAGS that get appended to the predefined
+values of the respective variables.
+
+ $ make EXTRA_CFLAGS=-ggdb3
+
+The build utilizes autotools, dependencies for generating the configure
+scripts are:
+
+* autconf, autoheader
+* automake, aclocal
+* pkg-config
+
+
+Staticly built binaries
+-----------------------
+
+The makefiles are ready to let you build static binaries of the utilities. This
+may be handy in rescue environments. Your system has to provide static version
+of the libraries.
+
+$ make static
+$ make btrfs.static
+$ make btrfs-convert.static
+
+The resulting static binaries have the '.static' suffix, the intermediate object
+files do not conflict with the normal (dynamic) build.
+
+
+References:
+* https://btrfs.wiki.kernel.org
diff --git a/Makefile.extrawarn b/Makefile.extrawarn
new file mode 100644
index 00000000..1f4bda94
--- /dev/null
+++ b/Makefile.extrawarn
@@ -0,0 +1,90 @@
+# From linux.git/scripts/Kbuild.include
+#
+# try-run
+# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
+# Exit code chooses option. "$$TMP" is can be used as temporary file and
+# is automatically cleaned up.
+try-run = $(shell set -e; \
+ TMP="$(TMPOUT).$$$$.tmp"; \
+ TMPO="$(TMPOUT).$$$$.o"; \
+ if ($(1)) >/dev/null 2>&1; \
+ then echo "$(2)"; \
+ else echo "$(3)"; \
+ fi; \
+ rm -f "$$TMP" "$$TMPO")
+
+ # cc-option
+ # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
+
+ cc-option = $(call try-run,\
+ $(CC) $(CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
+
+# From linux.git/scripts/Makefile.extrawarn
+# ==========================================================================
+#
+# make W=... settings
+#
+# W=1 - warnings that may be relevant and does not occur too often
+# W=2 - warnings that occur quite often but may still be relevant
+# W=3 - the more obscure warnings, can most likely be ignored
+#
+# $(call cc-option, -W...) handles gcc -W.. options which
+# are not supported by all versions of the compiler
+# ==========================================================================
+
+ifeq ("$(origin W)", "command line")
+ export BUILD_ENABLE_EXTRA_GCC_CHECKS := $(W)
+endif
+
+ifdef BUILD_ENABLE_EXTRA_GCC_CHECKS
+warning- := $(empty)
+
+warning-1 := -Wextra -Wunused -Wno-unused-parameter
+warning-1 += -Wmissing-declarations
+warning-1 += -Wmissing-format-attribute
+warning-1 += $(call cc-option, -Wmissing-prototypes)
+warning-1 += -Wold-style-definition
+warning-1 += $(call cc-option, -Wmissing-include-dirs)
+warning-1 += $(call cc-option, -Wunused-but-set-variable)
+warning-1 += $(call cc-disable-warning, missing-field-initializers)
+
+warning-2 := -Waggregate-return
+warning-2 += -Wcast-align
+warning-2 += -Wdisabled-optimization
+warning-2 += -Wnested-externs
+warning-2 += -Wshadow
+warning-2 += $(call cc-option, -Wlogical-op)
+warning-2 += $(call cc-option, -Wmissing-field-initializers)
+
+warning-3 := -Wbad-function-cast
+warning-3 += -Wcast-qual
+warning-3 += -Wconversion
+warning-3 += -Wpacked
+warning-3 += -Wpadded
+warning-3 += -Wpointer-arith
+warning-3 += -Wredundant-decls
+warning-3 += -Wswitch-default
+warning-3 += $(call cc-option, -Wpacked-bitfield-compat)
+warning-3 += $(call cc-option, -Wvla)
+
+warning := $(warning-$(findstring 1, $(BUILD_ENABLE_EXTRA_GCC_CHECKS)))
+warning += $(warning-$(findstring 2, $(BUILD_ENABLE_EXTRA_GCC_CHECKS)))
+warning += $(warning-$(findstring 3, $(BUILD_ENABLE_EXTRA_GCC_CHECKS)))
+
+ifeq ("$(strip $(warning))","")
+ $(error W=$(BUILD_ENABLE_EXTRA_GCC_CHECKS) is unknown)
+endif
+
+EXTRAWARN_CFLAGS += $(warning)
+else
+
+ifeq ($(COMPILER),clang)
+EXTRAWARN_CFLAGS += $(call cc-disable-warning, initializer-overrides)
+EXTRAWARN_CFLAGS += $(call cc-disable-warning, unused-value)
+EXTRAWARN_CFLAGS += $(call cc-disable-warning, format)
+EXTRAWARN_CFLAGS += $(call cc-disable-warning, unknown-warning-option)
+EXTRAWARN_CFLAGS += $(call cc-disable-warning, sign-compare)
+EXTRAWARN_CFLAGS += $(call cc-disable-warning, format-zero-length)
+EXTRAWARN_CFLAGS += $(call cc-disable-warning, uninitialized)
+endif
+endif
diff --git a/Makefile.in b/Makefile.in
new file mode 100644
index 00000000..91847896
--- /dev/null
+++ b/Makefile.in
@@ -0,0 +1,399 @@
+# btrfs-progs
+#
+# Basic build targets:
+# all all main tools
+# static build static bnaries, requires static version of the libraries
+# test run the full testsuite
+# install install to default location (/usr/local)
+# clean clean built binaries (not the documentation)
+#
+# Tuning by variables (environment or make arguments):
+# V=1 verbose, print command lines (default: quiet)
+# C=1 run checker before compilation (default checker: sparse)
+# W=123 build with warnings (default: off)
+# EXTRA_CFLAGS additional compiler flags
+# EXTRA_LDFLAGS additional linker flags
+#
+# Static checkers:
+# CHECKER static checker binary to be called (default: sparse)
+# CHECKER_FLAGS flags to pass to CHECKER, can override CFLAGS
+#
+
+# Export all variables to sub-makes by default
+export
+
+include Makefile.extrawarn
+
+CC = @CC@
+LN_S = @LN_S@
+AR = @AR@
+RM = @RM@
+RMDIR = @RMDIR@
+INSTALL = @INSTALL@
+DISABLE_DOCUMENTATION = @DISABLE_DOCUMENTATION@
+DISABLE_BTRFSCONVERT = @DISABLE_BTRFSCONVERT@
+
+EXTRA_CFLAGS :=
+EXTRA_LDFLAGS :=
+
+# Common build flags
+CFLAGS = @CFLAGS@ \
+ -include config.h \
+ -DBTRFS_FLAT_INCLUDES \
+ -D_XOPEN_SOURCE=700 \
+ -fno-strict-aliasing \
+ -fPIC $(EXTRAWARN_CFLAGS) $(EXTRA_CFLAGS)
+
+LDFLAGS = @LDFLAGS@ \
+ -rdynamic $(EXTRA_LDFLAGS)
+
+LIBS = @UUID_LIBS@ @BLKID_LIBS@ @ZLIB_LIBS@ @LZO2_LIBS@ -L. -pthread
+LIBBTRFS_LIBS = $(LIBS)
+
+# Static compilation flags
+STATIC_CFLAGS = $(CFLAGS) -ffunction-sections -fdata-sections
+STATIC_LDFLAGS = -static -Wl,--gc-sections
+STATIC_LIBS = @UUID_LIBS_STATIC@ @BLKID_LIBS_STATIC@ \
+ @ZLIB_LIBS_STATIC@ @LZO2_LIBS_STATIC@ -L. -pthread
+
+# don't use FORTIFY with sparse because glibc with FORTIFY can
+# generate so many sparse errors that sparse stops parsing,
+# which masks real errors that we want to see.
+CHECKER := sparse
+check_defs := .cc-defines.h
+CHECKER_FLAGS := -include $(check_defs) -D__CHECKER__ \
+ -D__CHECK_ENDIAN__ -Wbitwise -Wuninitialized -Wshadow -Wundef \
+ -U_FORTIFY_SOURCE
+
+objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
+ root-tree.o dir-item.o file-item.o inode-item.o inode-map.o \
+ extent-cache.o extent_io.o volumes.o utils.o repair.o \
+ qgroup.o raid6.o free-space-cache.o list_sort.o props.o \
+ ulist.o qgroup-verify.o backref.o string-table.o task-utils.o \
+ inode.o file.o find-root.o free-space-tree.o help.o
+cmds_objects = cmds-subvolume.o cmds-filesystem.o cmds-device.o cmds-scrub.o \
+ cmds-inspect.o cmds-balance.o cmds-send.o cmds-receive.o \
+ cmds-quota.o cmds-qgroup.o cmds-replace.o cmds-check.o \
+ cmds-restore.o cmds-rescue.o chunk-recover.o super-recover.o \
+ cmds-property.o cmds-fi-usage.o
+libbtrfs_objects = send-stream.o send-utils.o rbtree.o btrfs-list.o crc32c.o \
+ uuid-tree.o utils-lib.o rbtree-utils.o
+libbtrfs_headers = send-stream.h send-utils.h send.h rbtree.h btrfs-list.h \
+ crc32c.h list.h kerncompat.h radix-tree.h extent-cache.h \
+ extent_io.h ioctl.h ctree.h btrfsck.h version.h
+TESTS = fsck-tests.sh convert-tests.sh
+
+prefix ?= @prefix@
+exec_prefix = @exec_prefix@
+bindir = @bindir@
+libdir ?= @libdir@
+incdir = @includedir@/btrfs
+
+ifeq ("$(origin V)", "command line")
+ BUILD_VERBOSE = $(V)
+endif
+ifndef BUILD_VERBOSE
+ BUILD_VERBOSE = 0
+endif
+
+ifeq ($(BUILD_VERBOSE),1)
+ Q =
+else
+ Q = @
+endif
+
+MAKEOPTS = --no-print-directory Q=$(Q)
+
+# build all by default
+progs = $(progs_install) btrfsck btrfs-corrupt-block btrfs-calc-size
+
+# install only selected
+progs_install = btrfs mkfs.btrfs btrfs-debug-tree \
+ btrfs-map-logical btrfs-image btrfs-zero-log \
+ btrfs-find-root btrfstune btrfs-show-super \
+ btrfs-select-super
+
+progs_extra = btrfs-fragments
+
+progs_static = $(foreach p,$(progs),$(p).static)
+
+ifneq ($(DISABLE_BTRFSCONVERT),1)
+progs_install += btrfs-convert
+endif
+
+# external libs required by various binaries; for btrfs-foo,
+# specify btrfs_foo_libs = <list of libs>; see $($(subst...)) rules below
+btrfs_convert_libs = @EXT2FS_LIBS@ @COM_ERR_LIBS@
+btrfs_fragments_libs = -lgd -lpng -ljpeg -lfreetype
+
+SUBDIRS =
+BUILDDIRS = $(patsubst %,build-%,$(SUBDIRS))
+INSTALLDIRS = $(patsubst %,install-%,$(SUBDIRS))
+CLEANDIRS = $(patsubst %,clean-%,$(SUBDIRS))
+
+ifneq ($(DISABLE_DOCUMENTATION),1)
+BUILDDIRS += build-Documentation
+INSTALLDIRS += install-Documentation
+endif
+
+.PHONY: $(SUBDIRS)
+.PHONY: $(BUILDDIRS)
+.PHONY: $(INSTALLDIRS)
+.PHONY: $(TESTDIRS)
+.PHONY: $(CLEANDIRS)
+.PHONY: all install clean
+
+# Create all the static targets
+static_objects = $(patsubst %.o, %.static.o, $(objects))
+static_cmds_objects = $(patsubst %.o, %.static.o, $(cmds_objects))
+static_libbtrfs_objects = $(patsubst %.o, %.static.o, $(libbtrfs_objects))
+
+libs_shared = libbtrfs.so.0.1
+libs_static = libbtrfs.a
+libs = $(libs_shared) $(libs_static)
+lib_links = libbtrfs.so.0 libbtrfs.so
+headers = $(libbtrfs_headers)
+
+# make C=1 to enable sparse
+ifdef C
+ # We're trying to use sparse against glibc headers which go wild
+ # trying to use internal compiler macros to test features. We
+ # copy gcc's and give them to sparse. But not __SIZE_TYPE__
+ # 'cause sparse defines that one.
+ #
+ dummy := $(shell $(CC) -dM -E -x c - < /dev/null | \
+ grep -v __SIZE_TYPE__ > $(check_defs))
+ check = $(CHECKER)
+ check_echo = echo
+else
+ check = true
+ check_echo = true
+endif
+
+%.o.d: %.c
+ $(Q)$(CC) -MM -MG -MF $@ -MT $(@:.o.d=.o) -MT $(@:.o.d=.static.o) -MT $@ $(CFLAGS) $<
+
+.c.o:
+ @$(check_echo) " [SP] $<"
+ $(Q)$(check) $(CFLAGS) $(CHECKER_FLAGS) $<
+ @echo " [CC] $@"
+ $(Q)$(CC) $(CFLAGS) -c $<
+
+%.static.o: %.c
+ @echo " [CC] $@"
+ $(Q)$(CC) $(STATIC_CFLAGS) -c $< -o $@
+
+all: $(progs) $(BUILDDIRS)
+$(SUBDIRS): $(BUILDDIRS)
+$(BUILDDIRS):
+ @echo "Making all in $(patsubst build-%,%,$@)"
+ $(Q)$(MAKE) $(MAKEOPTS) -C $(patsubst build-%,%,$@)
+
+test-convert: btrfs btrfs-convert
+ @echo " [TEST] convert-tests.sh"
+ $(Q)bash tests/convert-tests.sh
+
+test-fsck: btrfs btrfs-image btrfs-corrupt-block btrfs-debug-tree mkfs.btrfs
+ @echo " [TEST] fsck-tests.sh"
+ $(Q)bash tests/fsck-tests.sh
+
+test-misc: btrfs btrfs-image btrfs-corrupt-block btrfs-debug-tree mkfs.btrfs btrfstune
+ @echo " [TEST] misc-tests.sh"
+ $(Q)bash tests/misc-tests.sh
+
+test-mkfs: btrfs mkfs.btrfs
+ @echo " [TEST] mkfs-tests.sh"
+ $(Q)bash tests/mkfs-tests.sh
+
+test-fuzz: btrfs
+ @echo " [TEST] fuzz-tests.sh"
+ $(Q)bash tests/fuzz-tests.sh
+
+test-clean:
+ @echo "Cleaning tests"
+ $(Q)bash tests/clean-tests.sh
+
+test: test-fsck test-mkfs test-convert test-misc test-fuzz
+
+#
+# NOTE: For static compiles, you need to have all the required libs
+# static equivalent available
+#
+static: $(progs_static)
+
+version.h: version.sh version.h.in configure.ac
+ @echo " [SH] $@"
+ $(Q)bash ./config.status --silent $@
+
+$(libs_shared): $(libbtrfs_objects) $(lib_links) send.h
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) $(libbtrfs_objects) $(LDFLAGS) $(LIBBTRFS_LIBS) \
+ -shared -Wl,-soname,libbtrfs.so.0 -o libbtrfs.so.0.1
+
+$(libs_static): $(libbtrfs_objects)
+ @echo " [AR] $@"
+ $(Q)$(AR) cr libbtrfs.a $(libbtrfs_objects)
+
+$(lib_links):
+ @echo " [LN] $@"
+ $(Q)$(LN_S) -f libbtrfs.so.0.1 $@
+
+# keep intermediate files from the below implicit rules around
+.PRECIOUS: $(addsuffix .o,$(progs))
+
+# Make any btrfs-foo out of btrfs-foo.o, with appropriate libs.
+# The $($(subst...)) bits below takes the btrfs_*_libs definitions above and
+# turns them into a list of libraries to link against if they exist
+#
+# For static variants, use an extra $(subst) to get rid of the ".static"
+# from the target name before translating to list of libs
+
+btrfs-%.static: $(static_objects) btrfs-%.static.o $(static_libbtrfs_objects)
+ @echo " [LD] $@"
+ $(Q)$(CC) $(STATIC_CFLAGS) -o $@ $@.o $(static_objects) \
+ $(static_libbtrfs_objects) $(STATIC_LDFLAGS) \
+ $($(subst -,_,$(subst .static,,$@)-libs)) $(STATIC_LIBS)
+
+btrfs-%: $(objects) $(libs_static) btrfs-%.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o $@ $(objects) $@.o $(libs_static) \
+ $(LDFLAGS) $(LIBS) $($(subst -,_,$@-libs))
+
+btrfs: $(objects) btrfs.o $(cmds_objects) $(libs_static)
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o btrfs btrfs.o $(cmds_objects) \
+ $(objects) $(libs_static) $(LDFLAGS) $(LIBS)
+
+btrfs.static: $(static_objects) btrfs.static.o $(static_cmds_objects) $(static_libbtrfs_objects)
+ @echo " [LD] $@"
+ $(Q)$(CC) $(STATIC_CFLAGS) -o btrfs.static btrfs.static.o $(static_cmds_objects) \
+ $(static_objects) $(static_libbtrfs_objects) $(STATIC_LDFLAGS) $(STATIC_LIBS)
+
+# For backward compatibility, 'btrfs' changes behaviour to fsck if it's named 'btrfsck'
+btrfsck: btrfs
+ @echo " [LN] $@"
+ $(Q)$(LN_S) -f btrfs btrfsck
+
+btrfsck.static: btrfs.static
+ @echo " [LN] $@"
+ $(Q)$(LN_S) -f $^ $@
+
+mkfs.btrfs: $(objects) $(libs_static) mkfs.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o mkfs.btrfs $(objects) $(libs_static) mkfs.o $(LDFLAGS) $(LIBS)
+
+mkfs.btrfs.static: $(static_objects) mkfs.static.o $(static_libbtrfs_objects)
+ @echo " [LD] $@"
+ $(Q)$(CC) $(STATIC_CFLAGS) -o mkfs.btrfs.static mkfs.static.o $(static_objects) \
+ $(static_libbtrfs_objects) $(STATIC_LDFLAGS) $(STATIC_LIBS)
+
+btrfstune: $(objects) $(libs_static) btrfstune.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o btrfstune $(objects) btrfstune.o $(libs_static) $(LDFLAGS) $(LIBS)
+
+btrfstune.static: $(static_objects) btrfstune.static.o $(static_libbtrfs_objects)
+ @echo " [LD] $@"
+ $(Q)$(CC) $(STATIC_CFLAGS) -o $@ btrfstune.static.o $(static_objects) \
+ $(static_libbtrfs_objects) $(STATIC_LDFLAGS) $(STATIC_LIBS)
+
+dir-test: $(objects) $(libs) dir-test.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o dir-test $(objects) $(libs) dir-test.o $(LDFLAGS) $(LIBS)
+
+quick-test: $(objects) $(libs) quick-test.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o quick-test $(objects) $(libs) quick-test.o $(LDFLAGS) $(LIBS)
+
+ioctl-test: $(objects) $(libs) ioctl-test.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o ioctl-test $(objects) $(libs) ioctl-test.o $(LDFLAGS) $(LIBS)
+
+send-test: $(objects) $(libs) send-test.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o send-test $(objects) $(libs) send-test.o $(LDFLAGS) $(LIBS)
+
+library-test: $(libs_shared) library-test.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o library-test library-test.o $(LDFLAGS) -lbtrfs
+
+library-test.static: $(libs_static) library-test.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o library-test-static library-test.o $(LDFLAGS) $(libs_static)
+
+test-build: test-build-pre test-build-real
+
+test-build-pre:
+ $(MAKE) clean-all
+ ./autogen.sh
+ ./configure
+
+test-build-real:
+ $(MAKE) library-test
+ -$(MAKE) library-test.static
+ $(MAKE) -j 8 all
+ -$(MAKE) -j 8 static
+ $(MAKE) -j 8 $(progs_extra)
+
+manpages:
+ $(Q)$(MAKE) $(MAKEOPTS) -C Documentation
+
+
+clean-all: clean clean-doc clean-gen
+
+clean: $(CLEANDIRS)
+ @echo "Cleaning"
+ $(Q)$(RM) -f $(progs) cscope.out *.o *.o.d \
+ dir-test ioctl-test quick-test send-test library-test library-test-static \
+ btrfs.static mkfs.btrfs.static \
+ $(check_defs) \
+ $(libs) $(lib_links) \
+ $(progs_static) $(progs_extra)
+
+clean-doc:
+ @echo "Cleaning Documentation"
+ $(Q)$(MAKE) $(MAKEOPTS) -C Documentation clean
+
+clean-gen:
+ @echo "Cleaning Generated Files"
+ $(Q)$(RM) -rf version.h config.status config.cache connfig.log \
+ configure.lineno config.status.lineno Makefile \
+ Documentation/Makefile \
+ config.log config.h config.h.in~ aclocal.m4 \
+ configure autom4te.cache/ config/
+
+$(CLEANDIRS):
+ @echo "Cleaning $(patsubst clean-%,%,$@)"
+ $(Q)$(MAKE) $(MAKEOPTS) -C $(patsubst clean-%,%,$@) clean
+
+install: $(libs) $(progs_install) $(INSTALLDIRS)
+ $(INSTALL) -m755 -d $(DESTDIR)$(bindir)
+ $(INSTALL) $(progs_install) $(DESTDIR)$(bindir)
+ $(INSTALL) fsck.btrfs $(DESTDIR)$(bindir)
+ # btrfsck is a link to btrfs in the src tree, make it so for installed file as well
+ $(LN_S) -f btrfs $(DESTDIR)$(bindir)/btrfsck
+ $(INSTALL) -m755 -d $(DESTDIR)$(libdir)
+ $(INSTALL) $(libs) $(DESTDIR)$(libdir)
+ cp -a $(lib_links) $(DESTDIR)$(libdir)
+ $(INSTALL) -m755 -d $(DESTDIR)$(incdir)
+ $(INSTALL) -m644 $(headers) $(DESTDIR)$(incdir)
+
+install-static: $(progs_static) $(INSTALLDIRS)
+ $(INSTALL) -m755 -d $(DESTDIR)$(bindir)
+ $(INSTALL) $(progs_static) $(DESTDIR)$(bindir)
+ # btrfsck is a link to btrfs in the src tree, make it so for installed file as well
+ $(LN_S) -f btrfs.static $(DESTDIR)$(bindir)/btrfsck.static
+
+$(INSTALLDIRS):
+ @echo "Making install in $(patsubst install-%,%,$@)"
+ $(Q)$(MAKE) $(MAKEOPTS) -C $(patsubst install-%,%,$@) install
+
+uninstall:
+ $(Q)$(MAKE) $(MAKEOPTS) -C Documentation uninstall
+ cd $(DESTDIR)$(incdir); $(RM) -f $(headers)
+ $(RMDIR) -p --ignore-fail-on-non-empty $(DESTDIR)$(incdir)
+ cd $(DESTDIR)$(libdir); $(RM) -f $(lib_links) $(libs)
+ cd $(DESTDIR)$(bindir); $(RM) -f btrfsck fsck.btrfs $(progs_install)
+
+ifneq ($(MAKECMDGOALS),clean)
+-include $(objects:.o=.o.d) $(cmds_objects:.o=.o.d) $(subst .btrfs,, $(filter-out btrfsck.o.d, $(progs:=.o.d)))
+endif
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..2f9d4e7e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,41 @@
+Btrfs-progs
+===========
+
+Userspace utilities to manage btrfs filesystems.
+License: GPLv2.
+
+Btrfs is a copy on write (COW) filesystem for Linux aimed at implementing
+advanced features while focusing on fault tolerance, repair and easy
+administration.
+
+
+This repository hosts following utilities:
+
+* **btrfs** &mdash; the main administration tool ([manual page](https://btrfs.wiki.kernel.org/index.php/Manpage/btrfs))
+* **mkfs.btrfs** &mdash; utility to create the filesystem ([manual page](https://btrfs.wiki.kernel.org/index.php/Manpage/mkfs.btrfs))
+
+See INSTALL for build instructions.
+
+Release cycle
+-------------
+
+The major version releases are time-based and follow the cycle of the linux
+kernel releases. The cycle usually takes 2 months. A minor version releases may
+happen in the meantime if there are queued bug fixes or minor useful
+improvements.
+
+Development
+-----------
+
+The patch submissions, development or general discussions take place at
+*linux-btrfs@vger.kernel.org* mailinglist, subsciption not required.
+
+References
+----------
+
+* [Wiki with more information](https://btrfs.wiki.kernel.org)
+* [Btrfs-progs changelogs](https://btrfs.wiki.kernel.org/index.php/Changelog#By_version_.28btrfs-progs.29)
+* [wiki/FAQ](https://btrfs.wiki.kernel.org/index.php/FAQ)
+* [wiki/Getting started](https://btrfs.wiki.kernel.org/index.php/Getting_started)
+* [wiki/TODO](https://btrfs.wiki.kernel.org/index.php/Project_ideas#Userspace_tools_projects)
+* [wiki/Developer's FAQ](https://btrfs.wiki.kernel.org/index.php/Developer's_FAQ)
diff --git a/androidcompat.h b/androidcompat.h
new file mode 100644
index 00000000..eec76dad
--- /dev/null
+++ b/androidcompat.h
@@ -0,0 +1,28 @@
+/*
+ * Compatibility layer for Android.
+ *
+ * Stub calls or alternate functions for pthreads.
+ */
+
+#ifndef __ANDROID_H__
+#define __ANDROID_H__
+
+#ifdef ANDROID
+
+#define pthread_setcanceltype(type, oldtype) (0)
+#define pthread_setcancelstate(state, oldstate) (0)
+
+#define pthread_cancel(ret) pthread_kill((ret), SIGUSR1)
+
+typedef struct blkid_struct_probe *blkid_probe;
+
+#include <dirent.h>
+#define direct dirent
+
+#else /* !ANDROID */
+
+#include <sys/dir.h>
+
+#endif /* !ANDROID */
+
+#endif /* __ANDROID_H__ */
diff --git a/autogen.sh b/autogen.sh
new file mode 100755
index 00000000..96698502
--- /dev/null
+++ b/autogen.sh
@@ -0,0 +1,99 @@
+#!/bin/sh
+
+#
+# Helps generate autoconf stuff, when code is checked out from SCM.
+#
+# Copyright (C) 2006-2014 - Karel Zak <kzak@redhat.com>
+#
+
+srcdir=`dirname $0`
+test -z "$srcdir" && srcdir=.
+
+THEDIR=`pwd`
+cd $srcdir
+DIE=0
+
+test -f btrfs.c || {
+ echo
+ echo "You must run this script in the top-level btrfs-progs directory"
+ echo
+ DIE=1
+}
+
+(autoconf --version) < /dev/null > /dev/null 2>&1 || {
+ echo
+ echo "You must have autoconf installed to generate btrfs-progs build system."
+ echo
+ DIE=1
+}
+(autoheader --version) < /dev/null > /dev/null 2>&1 || {
+ echo
+ echo "You must have autoheader installed to generate btrfs-progs build system."
+ echo "The autoheader command is part of the GNU autoconf package."
+ echo
+ DIE=1
+}
+
+(automake --version) < /dev/null > /dev/null 2>&1 || {
+ echo
+ echo "You must have automake installed to generate btrfs-progs build system."
+ echo
+ DIE=1
+}
+
+(pkg-config --version) < /dev/null > /dev/null 2>&1 || {
+ echo
+ echo "You must have pkg-config installed to use btrfs-progs build system."
+ echo "The pkg-config utility was not found in the standard location, set"
+ echo "the PKG_CONFIG/PKG_CONFIG_PATH/PKG_CONFIG_LIBDIR variables at the"
+ echo "configure time."
+ echo
+}
+
+if test "$DIE" -eq 1; then
+ exit 1
+fi
+
+echo
+echo "Generate build-system by:"
+echo " aclocal: $(aclocal --version | head -1)"
+echo " autoconf: $(autoconf --version | head -1)"
+echo " autoheader: $(autoheader --version | head -1)"
+echo " automake: $(automake --version | head -1)"
+
+chmod +x version.sh
+rm -rf autom4te.cache
+
+aclocal $AL_OPTS
+autoconf $AC_OPTS
+autoheader $AH_OPTS
+
+# it's better to use helper files from automake installation than
+# maintain copies in git tree
+find_autofile() {
+ if [ -f "$1" ]; then
+ return
+ fi
+ for HELPER_DIR in $(automake --print-libdir 2>/dev/null) \
+ /usr/share/libtool \
+ /usr/share/automake-* ; do
+ f="$HELPER_DIR/$1"
+ if [ -f "$f" ]; then
+ cp "$f" config/
+ return
+ fi
+ done
+ echo "Cannot find "$1" in known locations"
+ exit 1
+}
+
+mkdir -p config/
+find_autofile config.guess
+find_autofile config.sub
+find_autofile install-sh
+
+cd $THEDIR
+
+echo
+echo "Now type '$srcdir/configure' and 'make' to compile."
+echo
diff --git a/backref.c b/backref.c
new file mode 100644
index 00000000..8f41f829
--- /dev/null
+++ b/backref.c
@@ -0,0 +1,1651 @@
+/*
+ * Copyright (C) 2011 STRATO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "backref.h"
+#include "ulist.h"
+#include "transaction.h"
+
+#define pr_debug(...) do { } while (0)
+
+struct extent_inode_elem {
+ u64 inum;
+ u64 offset;
+ struct extent_inode_elem *next;
+};
+
+static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb,
+ struct btrfs_file_extent_item *fi,
+ u64 extent_item_pos,
+ struct extent_inode_elem **eie)
+{
+ u64 offset = 0;
+ struct extent_inode_elem *e;
+
+ if (!btrfs_file_extent_compression(eb, fi) &&
+ !btrfs_file_extent_encryption(eb, fi) &&
+ !btrfs_file_extent_other_encoding(eb, fi)) {
+ u64 data_offset;
+ u64 data_len;
+
+ data_offset = btrfs_file_extent_offset(eb, fi);
+ data_len = btrfs_file_extent_num_bytes(eb, fi);
+
+ if (extent_item_pos < data_offset ||
+ extent_item_pos >= data_offset + data_len)
+ return 1;
+ offset = extent_item_pos - data_offset;
+ }
+
+ e = kmalloc(sizeof(*e), GFP_NOFS);
+ if (!e)
+ return -ENOMEM;
+
+ e->next = *eie;
+ e->inum = key->objectid;
+ e->offset = key->offset + offset;
+ *eie = e;
+
+ return 0;
+}
+
+static void free_inode_elem_list(struct extent_inode_elem *eie)
+{
+ struct extent_inode_elem *eie_next;
+
+ for (; eie; eie = eie_next) {
+ eie_next = eie->next;
+ kfree(eie);
+ }
+}
+
+static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte,
+ u64 extent_item_pos,
+ struct extent_inode_elem **eie)
+{
+ u64 disk_byte;
+ struct btrfs_key key;
+ struct btrfs_file_extent_item *fi;
+ int slot;
+ int nritems;
+ int extent_type;
+ int ret;
+
+ /*
+ * from the shared data ref, we only have the leaf but we need
+ * the key. thus, we must look into all items and see that we
+ * find one (some) with a reference to our extent item.
+ */
+ nritems = btrfs_header_nritems(eb);
+ for (slot = 0; slot < nritems; ++slot) {
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ continue;
+ fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
+ extent_type = btrfs_file_extent_type(eb, fi);
+ if (extent_type == BTRFS_FILE_EXTENT_INLINE)
+ continue;
+ /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */
+ disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
+ if (disk_byte != wanted_disk_byte)
+ continue;
+
+ ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * this structure records all encountered refs on the way up to the root
+ */
+struct __prelim_ref {
+ struct list_head list;
+ u64 root_id;
+ struct btrfs_key key_for_search;
+ int level;
+ int count;
+ struct extent_inode_elem *inode_list;
+ u64 parent;
+ u64 wanted_disk_byte;
+};
+
+/*
+ * the rules for all callers of this function are:
+ * - obtaining the parent is the goal
+ * - if you add a key, you must know that it is a correct key
+ * - if you cannot add the parent or a correct key, then we will look into the
+ * block later to set a correct key
+ *
+ * delayed refs
+ * ============
+ * backref type | shared | indirect | shared | indirect
+ * information | tree | tree | data | data
+ * --------------------+--------+----------+--------+----------
+ * parent logical | y | - | - | -
+ * key to resolve | - | y | y | y
+ * tree block logical | - | - | - | -
+ * root for resolving | y | y | y | y
+ *
+ * - column 1: we've the parent -> done
+ * - column 2, 3, 4: we use the key to find the parent
+ *
+ * on disk refs (inline or keyed)
+ * ==============================
+ * backref type | shared | indirect | shared | indirect
+ * information | tree | tree | data | data
+ * --------------------+--------+----------+--------+----------
+ * parent logical | y | - | y | -
+ * key to resolve | - | - | - | y
+ * tree block logical | y | y | y | y
+ * root for resolving | - | y | y | y
+ *
+ * - column 1, 3: we've the parent -> done
+ * - column 2: we take the first key from the block to find the parent
+ * (see __add_missing_keys)
+ * - column 4: we use the key to find the parent
+ *
+ * additional information that's available but not required to find the parent
+ * block might help in merging entries to gain some speed.
+ */
+
+static int __add_prelim_ref(struct list_head *head, u64 root_id,
+ struct btrfs_key *key, int level,
+ u64 parent, u64 wanted_disk_byte, int count,
+ gfp_t gfp_mask)
+{
+ struct __prelim_ref *ref;
+
+ if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ return 0;
+
+ ref = kmalloc(sizeof(*ref), gfp_mask);
+ if (!ref)
+ return -ENOMEM;
+
+ ref->root_id = root_id;
+ if (key)
+ ref->key_for_search = *key;
+ else
+ memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
+
+ ref->inode_list = NULL;
+ ref->level = level;
+ ref->count = count;
+ ref->parent = parent;
+ ref->wanted_disk_byte = wanted_disk_byte;
+ list_add_tail(&ref->list, head);
+
+ return 0;
+}
+
+static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
+ struct ulist *parents, struct __prelim_ref *ref,
+ int level, u64 time_seq, const u64 *extent_item_pos,
+ u64 total_refs)
+{
+ int ret = 0;
+ int slot;
+ struct extent_buffer *eb;
+ struct btrfs_key key;
+ struct btrfs_key *key_for_search = &ref->key_for_search;
+ struct btrfs_file_extent_item *fi;
+ struct extent_inode_elem *eie = NULL, *old = NULL;
+ u64 disk_byte;
+ u64 wanted_disk_byte = ref->wanted_disk_byte;
+ u64 count = 0;
+
+ if (level != 0) {
+ eb = path->nodes[level];
+ ret = ulist_add(parents, eb->start, 0, GFP_NOFS);
+ if (ret < 0)
+ return ret;
+ return 0;
+ }
+
+ /*
+ * We normally enter this function with the path already pointing to
+ * the first item to check. But sometimes, we may enter it with
+ * slot==nritems. In that case, go to the next leaf before we continue.
+ */
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
+ ret = btrfs_next_leaf(root, path);
+
+ while (!ret && count < total_refs) {
+ eb = path->nodes[0];
+ slot = path->slots[0];
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+
+ if (key.objectid != key_for_search->objectid ||
+ key.type != BTRFS_EXTENT_DATA_KEY)
+ break;
+
+ fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
+ disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
+
+ if (disk_byte == wanted_disk_byte) {
+ eie = NULL;
+ old = NULL;
+ count++;
+ if (extent_item_pos) {
+ ret = check_extent_in_eb(&key, eb, fi,
+ *extent_item_pos,
+ &eie);
+ if (ret < 0)
+ break;
+ }
+ if (ret > 0)
+ goto next;
+ ret = ulist_add_merge_ptr(parents, eb->start,
+ eie, (void **)&old, GFP_NOFS);
+ if (ret < 0)
+ break;
+ if (!ret && extent_item_pos) {
+ while (old->next)
+ old = old->next;
+ old->next = eie;
+ }
+ eie = NULL;
+ }
+next:
+ ret = btrfs_next_item(root, path);
+ }
+
+ if (ret > 0)
+ ret = 0;
+ else if (ret < 0)
+ free_inode_elem_list(eie);
+ return ret;
+}
+
+/*
+ * resolve an indirect backref in the form (root_id, key, level)
+ * to a logical address
+ */
+static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u64 time_seq,
+ struct __prelim_ref *ref,
+ struct ulist *parents,
+ const u64 *extent_item_pos, u64 total_refs)
+{
+ struct btrfs_root *root;
+ struct btrfs_key root_key;
+ struct extent_buffer *eb;
+ int ret = 0;
+ int root_level;
+ int level = ref->level;
+
+ root_key.objectid = ref->root_id;
+ root_key.type = BTRFS_ROOT_ITEM_KEY;
+ root_key.offset = (u64)-1;
+
+ root = btrfs_read_fs_root(fs_info, &root_key);
+ if (IS_ERR(root)) {
+ ret = PTR_ERR(root);
+ goto out;
+ }
+
+ root_level = btrfs_root_level(&root->root_item);
+
+ if (root_level + 1 == level)
+ goto out;
+
+ path->lowest_level = level;
+ ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path, 0, 0);
+
+ pr_debug("search slot in root %llu (level %d, ref count %d) returned "
+ "%d for key (%llu %u %llu)\n",
+ ref->root_id, level, ref->count, ret,
+ ref->key_for_search.objectid, ref->key_for_search.type,
+ ref->key_for_search.offset);
+ if (ret < 0)
+ goto out;
+
+ eb = path->nodes[level];
+ while (!eb) {
+ if (!level) {
+ ret = 1;
+ WARN_ON(1);
+ goto out;
+ }
+ level--;
+ eb = path->nodes[level];
+ }
+
+ ret = add_all_parents(root, path, parents, ref, level, time_seq,
+ extent_item_pos, total_refs);
+out:
+ path->lowest_level = 0;
+ btrfs_release_path(path);
+ return ret;
+}
+
+/*
+ * resolve all indirect backrefs from the list
+ */
+static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u64 time_seq,
+ struct list_head *head,
+ const u64 *extent_item_pos, u64 total_refs)
+{
+ int err;
+ int ret = 0;
+ struct __prelim_ref *ref;
+ struct __prelim_ref *ref_safe;
+ struct __prelim_ref *new_ref;
+ struct ulist *parents;
+ struct ulist_node *node;
+ struct ulist_iterator uiter;
+
+ parents = ulist_alloc(GFP_NOFS);
+ if (!parents)
+ return -ENOMEM;
+
+ /*
+ * _safe allows us to insert directly after the current item without
+ * iterating over the newly inserted items.
+ * we're also allowed to re-assign ref during iteration.
+ */
+ list_for_each_entry_safe(ref, ref_safe, head, list) {
+ if (ref->parent) /* already direct */
+ continue;
+ if (ref->count == 0)
+ continue;
+ err = __resolve_indirect_ref(fs_info, path, time_seq, ref,
+ parents, extent_item_pos,
+ total_refs);
+ /*
+ * we can only tolerate ENOENT,otherwise,we should catch error
+ * and return directly.
+ */
+ if (err == -ENOENT) {
+ continue;
+ } else if (err) {
+ ret = err;
+ goto out;
+ }
+
+ /* we put the first parent into the ref at hand */
+ ULIST_ITER_INIT(&uiter);
+ node = ulist_next(parents, &uiter);
+ ref->parent = node ? node->val : 0;
+ ref->inode_list = node ?
+ (struct extent_inode_elem *)(uintptr_t)node->aux : NULL;
+
+ /* additional parents require new refs being added here */
+ while ((node = ulist_next(parents, &uiter))) {
+ new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);
+ if (!new_ref) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ memcpy(new_ref, ref, sizeof(*ref));
+ new_ref->parent = node->val;
+ new_ref->inode_list = (struct extent_inode_elem *)
+ (uintptr_t)node->aux;
+ list_add(&new_ref->list, &ref->list);
+ }
+ ulist_reinit(parents);
+ }
+out:
+ ulist_free(parents);
+ return ret;
+}
+
+static inline int ref_for_same_block(struct __prelim_ref *ref1,
+ struct __prelim_ref *ref2)
+{
+ if (ref1->level != ref2->level)
+ return 0;
+ if (ref1->root_id != ref2->root_id)
+ return 0;
+ if (ref1->key_for_search.type != ref2->key_for_search.type)
+ return 0;
+ if (ref1->key_for_search.objectid != ref2->key_for_search.objectid)
+ return 0;
+ if (ref1->key_for_search.offset != ref2->key_for_search.offset)
+ return 0;
+ if (ref1->parent != ref2->parent)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * read tree blocks and add keys where required.
+ */
+static int __add_missing_keys(struct btrfs_fs_info *fs_info,
+ struct list_head *head)
+{
+ struct list_head *pos;
+ struct extent_buffer *eb;
+
+ list_for_each(pos, head) {
+ struct __prelim_ref *ref;
+ ref = list_entry(pos, struct __prelim_ref, list);
+
+ if (ref->parent)
+ continue;
+ if (ref->key_for_search.type)
+ continue;
+ BUG_ON(!ref->wanted_disk_byte);
+ eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte,
+ fs_info->tree_root->leafsize, 0);
+ if (!extent_buffer_uptodate(eb)) {
+ free_extent_buffer(eb);
+ return -EIO;
+ }
+ if (btrfs_header_level(eb) == 0)
+ btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0);
+ else
+ btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0);
+ free_extent_buffer(eb);
+ }
+ return 0;
+}
+
+/*
+ * merge two lists of backrefs and adjust counts accordingly
+ *
+ * mode = 1: merge identical keys, if key is set
+ * FIXME: if we add more keys in __add_prelim_ref, we can merge more here.
+ * additionally, we could even add a key range for the blocks we
+ * looked into to merge even more (-> replace unresolved refs by those
+ * having a parent).
+ * mode = 2: merge identical parents
+ */
+static void __merge_refs(struct list_head *head, int mode)
+{
+ struct list_head *pos1;
+
+ list_for_each(pos1, head) {
+ struct list_head *n2;
+ struct list_head *pos2;
+ struct __prelim_ref *ref1;
+
+ ref1 = list_entry(pos1, struct __prelim_ref, list);
+
+ for (pos2 = pos1->next, n2 = pos2->next; pos2 != head;
+ pos2 = n2, n2 = pos2->next) {
+ struct __prelim_ref *ref2;
+ struct __prelim_ref *xchg;
+ struct extent_inode_elem *eie;
+
+ ref2 = list_entry(pos2, struct __prelim_ref, list);
+
+ if (mode == 1) {
+ if (!ref_for_same_block(ref1, ref2))
+ continue;
+ if (!ref1->parent && ref2->parent) {
+ xchg = ref1;
+ ref1 = ref2;
+ ref2 = xchg;
+ }
+ } else {
+ if (ref1->parent != ref2->parent)
+ continue;
+ }
+
+ eie = ref1->inode_list;
+ while (eie && eie->next)
+ eie = eie->next;
+ if (eie)
+ eie->next = ref2->inode_list;
+ else
+ ref1->inode_list = ref2->inode_list;
+ ref1->count += ref2->count;
+
+ list_del(&ref2->list);
+ kfree(ref2);
+ }
+
+ }
+}
+
+/*
+ * add all inline backrefs for bytenr to the list
+ */
+static int __add_inline_refs(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u64 bytenr,
+ int *info_level, struct list_head *prefs,
+ u64 *total_refs)
+{
+ int ret = 0;
+ int slot;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ unsigned long ptr;
+ unsigned long end;
+ struct btrfs_extent_item *ei;
+ u64 flags;
+ u64 item_size;
+
+ /*
+ * enumerate all inline refs
+ */
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+
+ item_size = btrfs_item_size_nr(leaf, slot);
+ BUG_ON(item_size < sizeof(*ei));
+
+ ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
+ flags = btrfs_extent_flags(leaf, ei);
+ *total_refs += btrfs_extent_refs(leaf, ei);
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+
+ ptr = (unsigned long)(ei + 1);
+ end = (unsigned long)ei + item_size;
+
+ if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
+ flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ struct btrfs_tree_block_info *info;
+
+ info = (struct btrfs_tree_block_info *)ptr;
+ *info_level = btrfs_tree_block_level(leaf, info);
+ ptr += sizeof(struct btrfs_tree_block_info);
+ BUG_ON(ptr > end);
+ } else if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
+ *info_level = found_key.offset;
+ } else {
+ BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
+ }
+
+ while (ptr < end) {
+ struct btrfs_extent_inline_ref *iref;
+ u64 offset;
+ int type;
+
+ iref = (struct btrfs_extent_inline_ref *)ptr;
+ type = btrfs_extent_inline_ref_type(leaf, iref);
+ offset = btrfs_extent_inline_ref_offset(leaf, iref);
+
+ switch (type) {
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ ret = __add_prelim_ref(prefs, 0, NULL,
+ *info_level + 1, offset,
+ bytenr, 1, GFP_NOFS);
+ break;
+ case BTRFS_SHARED_DATA_REF_KEY: {
+ struct btrfs_shared_data_ref *sdref;
+ int count;
+
+ sdref = (struct btrfs_shared_data_ref *)(iref + 1);
+ count = btrfs_shared_data_ref_count(leaf, sdref);
+ ret = __add_prelim_ref(prefs, 0, NULL, 0, offset,
+ bytenr, count, GFP_NOFS);
+ break;
+ }
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ ret = __add_prelim_ref(prefs, offset, NULL,
+ *info_level + 1, 0,
+ bytenr, 1, GFP_NOFS);
+ break;
+ case BTRFS_EXTENT_DATA_REF_KEY: {
+ struct btrfs_extent_data_ref *dref;
+ int count;
+ u64 root;
+
+ dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+ count = btrfs_extent_data_ref_count(leaf, dref);
+ key.objectid = btrfs_extent_data_ref_objectid(leaf,
+ dref);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = btrfs_extent_data_ref_offset(leaf, dref);
+ root = btrfs_extent_data_ref_root(leaf, dref);
+ ret = __add_prelim_ref(prefs, root, &key, 0, 0,
+ bytenr, count, GFP_NOFS);
+ break;
+ }
+ default:
+ WARN_ON(1);
+ }
+ if (ret)
+ return ret;
+ ptr += btrfs_extent_inline_ref_size(type);
+ }
+
+ return 0;
+}
+
+/*
+ * add all non-inline backrefs for bytenr to the list
+ */
+static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u64 bytenr,
+ int info_level, struct list_head *prefs)
+{
+ struct btrfs_root *extent_root = fs_info->extent_root;
+ int ret;
+ int slot;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+
+ while (1) {
+ ret = btrfs_next_item(extent_root, path);
+ if (ret < 0)
+ break;
+ if (ret) {
+ ret = 0;
+ break;
+ }
+
+ slot = path->slots[0];
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
+ if (key.objectid != bytenr)
+ break;
+ if (key.type < BTRFS_TREE_BLOCK_REF_KEY)
+ continue;
+ if (key.type > BTRFS_SHARED_DATA_REF_KEY)
+ break;
+
+ switch (key.type) {
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ ret = __add_prelim_ref(prefs, 0, NULL,
+ info_level + 1, key.offset,
+ bytenr, 1, GFP_NOFS);
+ break;
+ case BTRFS_SHARED_DATA_REF_KEY: {
+ struct btrfs_shared_data_ref *sdref;
+ int count;
+
+ sdref = btrfs_item_ptr(leaf, slot,
+ struct btrfs_shared_data_ref);
+ count = btrfs_shared_data_ref_count(leaf, sdref);
+ ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset,
+ bytenr, count, GFP_NOFS);
+ break;
+ }
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ ret = __add_prelim_ref(prefs, key.offset, NULL,
+ info_level + 1, 0,
+ bytenr, 1, GFP_NOFS);
+ break;
+ case BTRFS_EXTENT_DATA_REF_KEY: {
+ struct btrfs_extent_data_ref *dref;
+ int count;
+ u64 root;
+
+ dref = btrfs_item_ptr(leaf, slot,
+ struct btrfs_extent_data_ref);
+ count = btrfs_extent_data_ref_count(leaf, dref);
+ key.objectid = btrfs_extent_data_ref_objectid(leaf,
+ dref);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = btrfs_extent_data_ref_offset(leaf, dref);
+ root = btrfs_extent_data_ref_root(leaf, dref);
+ ret = __add_prelim_ref(prefs, root, &key, 0, 0,
+ bytenr, count, GFP_NOFS);
+ break;
+ }
+ default:
+ WARN_ON(1);
+ }
+ if (ret)
+ return ret;
+
+ }
+
+ return ret;
+}
+
+/*
+ * this adds all existing backrefs (inline backrefs, backrefs and delayed
+ * refs) for the given bytenr to the refs list, merges duplicates and resolves
+ * indirect refs to their parent bytenr.
+ * When roots are found, they're added to the roots list
+ *
+ * FIXME some caching might speed things up
+ */
+static int find_parent_nodes(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 time_seq, struct ulist *refs,
+ struct ulist *roots, const u64 *extent_item_pos)
+{
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ int info_level = 0;
+ int ret;
+ struct list_head prefs;
+ struct __prelim_ref *ref;
+ struct extent_inode_elem *eie = NULL;
+ u64 total_refs = 0;
+
+ INIT_LIST_HEAD(&prefs);
+
+ key.objectid = bytenr;
+ key.offset = (u64)-1;
+ if (btrfs_fs_incompat(fs_info,
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ else
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ BUG_ON(ret == 0);
+
+ if (path->slots[0]) {
+ struct extent_buffer *leaf;
+ int slot;
+
+ path->slots[0]--;
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid == bytenr &&
+ (key.type == BTRFS_EXTENT_ITEM_KEY ||
+ key.type == BTRFS_METADATA_ITEM_KEY)) {
+ ret = __add_inline_refs(fs_info, path, bytenr,
+ &info_level, &prefs,
+ &total_refs);
+ if (ret)
+ goto out;
+ ret = __add_keyed_refs(fs_info, path, bytenr,
+ info_level, &prefs);
+ if (ret)
+ goto out;
+ }
+ }
+ btrfs_release_path(path);
+
+ ret = __add_missing_keys(fs_info, &prefs);
+ if (ret)
+ goto out;
+
+ __merge_refs(&prefs, 1);
+
+ ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs,
+ extent_item_pos, total_refs);
+ if (ret)
+ goto out;
+
+ __merge_refs(&prefs, 2);
+
+ while (!list_empty(&prefs)) {
+ ref = list_first_entry(&prefs, struct __prelim_ref, list);
+ WARN_ON(ref->count < 0);
+ if (roots && ref->count && ref->root_id && ref->parent == 0) {
+ /* no parent == root of tree */
+ ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS);
+ if (ret < 0)
+ goto out;
+ }
+ if (ref->count && ref->parent) {
+ if (extent_item_pos && !ref->inode_list &&
+ ref->level == 0) {
+ u32 bsz;
+ struct extent_buffer *eb;
+ bsz = btrfs_level_size(fs_info->extent_root,
+ ref->level);
+ eb = read_tree_block(fs_info->extent_root,
+ ref->parent, bsz, 0);
+ if (!extent_buffer_uptodate(eb)) {
+ free_extent_buffer(eb);
+ ret = -EIO;
+ goto out;
+ }
+ ret = find_extent_in_eb(eb, bytenr,
+ *extent_item_pos, &eie);
+ free_extent_buffer(eb);
+ if (ret < 0)
+ goto out;
+ ref->inode_list = eie;
+ }
+ ret = ulist_add_merge_ptr(refs, ref->parent,
+ ref->inode_list,
+ (void **)&eie, GFP_NOFS);
+ if (ret < 0)
+ goto out;
+ if (!ret && extent_item_pos) {
+ /*
+ * we've recorded that parent, so we must extend
+ * its inode list here
+ */
+ BUG_ON(!eie);
+ while (eie->next)
+ eie = eie->next;
+ eie->next = ref->inode_list;
+ }
+ eie = NULL;
+ }
+ list_del(&ref->list);
+ kfree(ref);
+ }
+
+out:
+ btrfs_free_path(path);
+ while (!list_empty(&prefs)) {
+ ref = list_first_entry(&prefs, struct __prelim_ref, list);
+ list_del(&ref->list);
+ kfree(ref);
+ }
+ if (ret < 0)
+ free_inode_elem_list(eie);
+ return ret;
+}
+
+static void free_leaf_list(struct ulist *blocks)
+{
+ struct ulist_node *node = NULL;
+ struct extent_inode_elem *eie;
+ struct ulist_iterator uiter;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((node = ulist_next(blocks, &uiter))) {
+ if (!node->aux)
+ continue;
+ eie = (struct extent_inode_elem *)(uintptr_t)node->aux;
+ free_inode_elem_list(eie);
+ node->aux = 0;
+ }
+
+ ulist_free(blocks);
+}
+
+/*
+ * Finds all leafs with a reference to the specified combination of bytenr and
+ * offset. key_list_head will point to a list of corresponding keys (caller must
+ * free each list element). The leafs will be stored in the leafs ulist, which
+ * must be freed with ulist_free.
+ *
+ * returns 0 on success, <0 on error
+ */
+static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 time_seq, struct ulist **leafs,
+ const u64 *extent_item_pos)
+{
+ int ret;
+
+ *leafs = ulist_alloc(GFP_NOFS);
+ if (!*leafs)
+ return -ENOMEM;
+
+ ret = find_parent_nodes(trans, fs_info, bytenr,
+ time_seq, *leafs, NULL, extent_item_pos);
+ if (ret < 0 && ret != -ENOENT) {
+ free_leaf_list(*leafs);
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * walk all backrefs for a given extent to find all roots that reference this
+ * extent. Walking a backref means finding all extents that reference this
+ * extent and in turn walk the backrefs of those, too. Naturally this is a
+ * recursive process, but here it is implemented in an iterative fashion: We
+ * find all referencing extents for the extent in question and put them on a
+ * list. In turn, we find all referencing extents for those, further appending
+ * to the list. The way we iterate the list allows adding more elements after
+ * the current while iterating. The process stops when we reach the end of the
+ * list. Found roots are added to the roots list.
+ *
+ * returns 0 on success, < 0 on error.
+ */
+static int __btrfs_find_all_roots(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 time_seq, struct ulist **roots)
+{
+ struct ulist *tmp;
+ struct ulist_node *node = NULL;
+ struct ulist_iterator uiter;
+ int ret;
+
+ tmp = ulist_alloc(GFP_NOFS);
+ if (!tmp)
+ return -ENOMEM;
+ *roots = ulist_alloc(GFP_NOFS);
+ if (!*roots) {
+ ulist_free(tmp);
+ return -ENOMEM;
+ }
+
+ ULIST_ITER_INIT(&uiter);
+ while (1) {
+ ret = find_parent_nodes(trans, fs_info, bytenr,
+ time_seq, tmp, *roots, NULL);
+ if (ret < 0 && ret != -ENOENT) {
+ ulist_free(tmp);
+ ulist_free(*roots);
+ return ret;
+ }
+ node = ulist_next(tmp, &uiter);
+ if (!node)
+ break;
+ bytenr = node->val;
+ cond_resched();
+ }
+
+ ulist_free(tmp);
+ return 0;
+}
+
+int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 time_seq, struct ulist **roots)
+{
+ return __btrfs_find_all_roots(trans, fs_info, bytenr, time_seq, roots);
+}
+
+/*
+ * this makes the path point to (inum INODE_ITEM ioff)
+ */
+int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
+ struct btrfs_path *path)
+{
+ struct btrfs_key key;
+ return btrfs_find_item(fs_root, path, inum, ioff,
+ BTRFS_INODE_ITEM_KEY, &key);
+}
+
+static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
+ struct btrfs_path *path,
+ struct btrfs_key *found_key)
+{
+ return btrfs_find_item(fs_root, path, inum, ioff,
+ BTRFS_INODE_REF_KEY, found_key);
+}
+
+int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
+ u64 start_off, struct btrfs_path *path,
+ struct btrfs_inode_extref **ret_extref,
+ u64 *found_off)
+{
+ int ret, slot;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct btrfs_inode_extref *extref;
+ struct extent_buffer *leaf;
+ unsigned long ptr;
+
+ key.objectid = inode_objectid;
+ btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY);
+ key.offset = start_off;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ while (1) {
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(leaf)) {
+ /*
+ * If the item at offset is not found,
+ * btrfs_search_slot will point us to the slot
+ * where it should be inserted. In our case
+ * that will be the slot directly before the
+ * next INODE_REF_KEY_V2 item. In the case
+ * that we're pointing to the last slot in a
+ * leaf, we must move one leaf over.
+ */
+ ret = btrfs_next_leaf(root, path);
+ if (ret) {
+ if (ret >= 1)
+ ret = -ENOENT;
+ break;
+ }
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+
+ /*
+ * Check that we're still looking at an extended ref key for
+ * this particular objectid. If we have different
+ * objectid or type then there are no more to be found
+ * in the tree and we can exit.
+ */
+ ret = -ENOENT;
+ if (found_key.objectid != inode_objectid)
+ break;
+ if (btrfs_key_type(&found_key) != BTRFS_INODE_EXTREF_KEY)
+ break;
+
+ ret = 0;
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ extref = (struct btrfs_inode_extref *)ptr;
+ *ret_extref = extref;
+ if (found_off)
+ *found_off = found_key.offset;
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * this iterates to turn a name (from iref/extref) into a full filesystem path.
+ * Elements of the path are separated by '/' and the path is guaranteed to be
+ * 0-terminated. the path is only given within the current file system.
+ * Therefore, it never starts with a '/'. the caller is responsible to provide
+ * "size" bytes in "dest". the dest buffer will be filled backwards. finally,
+ * the start point of the resulting string is returned. this pointer is within
+ * dest, normally.
+ * in case the path buffer would overflow, the pointer is decremented further
+ * as if output was written to the buffer, though no more output is actually
+ * generated. that way, the caller can determine how much space would be
+ * required for the path to fit into the buffer. in that case, the returned
+ * value will be smaller than dest. callers must check this!
+ */
+char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
+ u32 name_len, unsigned long name_off,
+ struct extent_buffer *eb_in, u64 parent,
+ char *dest, u32 size)
+{
+ int slot;
+ u64 next_inum;
+ int ret;
+ s64 bytes_left = ((s64)size) - 1;
+ struct extent_buffer *eb = eb_in;
+ struct btrfs_key found_key;
+ struct btrfs_inode_ref *iref;
+
+ if (bytes_left >= 0)
+ dest[bytes_left] = '\0';
+
+ while (1) {
+ bytes_left -= name_len;
+ if (bytes_left >= 0)
+ read_extent_buffer(eb, dest + bytes_left,
+ name_off, name_len);
+ if (eb != eb_in)
+ free_extent_buffer(eb);
+ ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
+ if (ret > 0)
+ ret = -ENOENT;
+ if (ret)
+ break;
+
+ next_inum = found_key.offset;
+
+ /* regular exit ahead */
+ if (parent == next_inum)
+ break;
+
+ slot = path->slots[0];
+ eb = path->nodes[0];
+ /* make sure we can use eb after releasing the path */
+ if (eb != eb_in)
+ eb->refs++;
+ btrfs_release_path(path);
+ iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
+
+ name_len = btrfs_inode_ref_name_len(eb, iref);
+ name_off = (unsigned long)(iref + 1);
+
+ parent = next_inum;
+ --bytes_left;
+ if (bytes_left >= 0)
+ dest[bytes_left] = '/';
+ }
+
+ btrfs_release_path(path);
+
+ if (ret)
+ return ERR_PTR(ret);
+
+ return dest + bytes_left;
+}
+
+/*
+ * this makes the path point to (logical EXTENT_ITEM *)
+ * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for
+ * tree blocks and <0 on error.
+ */
+int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
+ struct btrfs_path *path, struct btrfs_key *found_key,
+ u64 *flags_ret)
+{
+ int ret;
+ u64 flags;
+ u64 size = 0;
+ u32 item_size;
+ struct extent_buffer *eb;
+ struct btrfs_extent_item *ei;
+ struct btrfs_key key;
+
+ if (btrfs_fs_incompat(fs_info,
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ else
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.objectid = logical;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ ret = btrfs_previous_extent_item(fs_info->extent_root, path, 0);
+ if (ret) {
+ if (ret > 0)
+ ret = -ENOENT;
+ return ret;
+ }
+ btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]);
+ if (found_key->type == BTRFS_METADATA_ITEM_KEY)
+ size = fs_info->extent_root->leafsize;
+ else if (found_key->type == BTRFS_EXTENT_ITEM_KEY)
+ size = found_key->offset;
+
+ if (found_key->objectid > logical ||
+ found_key->objectid + size <= logical) {
+ pr_debug("logical %llu is not within any extent\n", logical);
+ return -ENOENT;
+ }
+
+ eb = path->nodes[0];
+ item_size = btrfs_item_size_nr(eb, path->slots[0]);
+ BUG_ON(item_size < sizeof(*ei));
+
+ ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
+ flags = btrfs_extent_flags(eb, ei);
+
+ pr_debug("logical %llu is at position %llu within the extent (%llu "
+ "EXTENT_ITEM %llu) flags %#llx size %u\n",
+ logical, logical - found_key->objectid, found_key->objectid,
+ found_key->offset, flags, item_size);
+
+ if (flags_ret) {
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
+ *flags_ret = BTRFS_EXTENT_FLAG_TREE_BLOCK;
+ else if (flags & BTRFS_EXTENT_FLAG_DATA)
+ *flags_ret = BTRFS_EXTENT_FLAG_DATA;
+ else
+ BUG_ON(1);
+ return 0;
+ } else {
+ WARN_ON(1);
+ return -EIO;
+ }
+}
+
+/*
+ * helper function to iterate extent inline refs. ptr must point to a 0 value
+ * for the first call and may be modified. it is used to track state.
+ * if more refs exist, 0 is returned and the next call to
+ * __get_extent_inline_ref must pass the modified ptr parameter to get the
+ * next ref. after the last ref was processed, 1 is returned.
+ * returns <0 on error
+ */
+static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
+ struct btrfs_key *key,
+ struct btrfs_extent_item *ei, u32 item_size,
+ struct btrfs_extent_inline_ref **out_eiref,
+ int *out_type)
+{
+ unsigned long end;
+ u64 flags;
+ struct btrfs_tree_block_info *info;
+
+ if (!*ptr) {
+ /* first call */
+ flags = btrfs_extent_flags(eb, ei);
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ if (key->type == BTRFS_METADATA_ITEM_KEY) {
+ /* a skinny metadata extent */
+ *out_eiref =
+ (struct btrfs_extent_inline_ref *)(ei + 1);
+ } else {
+ WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY);
+ info = (struct btrfs_tree_block_info *)(ei + 1);
+ *out_eiref =
+ (struct btrfs_extent_inline_ref *)(info + 1);
+ }
+ } else {
+ *out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1);
+ }
+ *ptr = (unsigned long)*out_eiref;
+ if ((unsigned long)(*ptr) >= (unsigned long)ei + item_size)
+ return -ENOENT;
+ }
+
+ end = (unsigned long)ei + item_size;
+ *out_eiref = (struct btrfs_extent_inline_ref *)(*ptr);
+ *out_type = btrfs_extent_inline_ref_type(eb, *out_eiref);
+
+ *ptr += btrfs_extent_inline_ref_size(*out_type);
+ WARN_ON(*ptr > end);
+ if (*ptr == end)
+ return 1; /* last */
+
+ return 0;
+}
+
+/*
+ * reads the tree block backref for an extent. tree level and root are returned
+ * through out_level and out_root. ptr must point to a 0 value for the first
+ * call and may be modified (see __get_extent_inline_ref comment).
+ * returns 0 if data was provided, 1 if there was no more data to provide or
+ * <0 on error.
+ */
+int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
+ struct btrfs_key *key, struct btrfs_extent_item *ei,
+ u32 item_size, u64 *out_root, u8 *out_level)
+{
+ int ret;
+ int type;
+ struct btrfs_tree_block_info *info;
+ struct btrfs_extent_inline_ref *eiref;
+
+ if (*ptr == (unsigned long)-1)
+ return 1;
+
+ while (1) {
+ ret = __get_extent_inline_ref(ptr, eb, key, ei, item_size,
+ &eiref, &type);
+ if (ret < 0)
+ return ret;
+
+ if (type == BTRFS_TREE_BLOCK_REF_KEY ||
+ type == BTRFS_SHARED_BLOCK_REF_KEY)
+ break;
+
+ if (ret == 1)
+ return 1;
+ }
+
+ /* we can treat both ref types equally here */
+ info = (struct btrfs_tree_block_info *)(ei + 1);
+ *out_root = btrfs_extent_inline_ref_offset(eb, eiref);
+ *out_level = btrfs_tree_block_level(eb, info);
+
+ if (ret == 1)
+ *ptr = (unsigned long)-1;
+
+ return 0;
+}
+
+static int iterate_leaf_refs(struct extent_inode_elem *inode_list,
+ u64 root, u64 extent_item_objectid,
+ iterate_extent_inodes_t *iterate, void *ctx)
+{
+ struct extent_inode_elem *eie;
+ int ret = 0;
+
+ for (eie = inode_list; eie; eie = eie->next) {
+ pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), "
+ "root %llu\n", extent_item_objectid,
+ eie->inum, eie->offset, root);
+ ret = iterate(eie->inum, eie->offset, root, ctx);
+ if (ret) {
+ pr_debug("stopping iteration for %llu due to ret=%d\n",
+ extent_item_objectid, ret);
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * calls iterate() for every inode that references the extent identified by
+ * the given parameters.
+ * when the iterator function returns a non-zero value, iteration stops.
+ */
+int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
+ u64 extent_item_objectid, u64 extent_item_pos,
+ int search_commit_root,
+ iterate_extent_inodes_t *iterate, void *ctx)
+{
+ int ret;
+ struct btrfs_trans_handle *trans = NULL;
+ struct ulist *refs = NULL;
+ struct ulist *roots = NULL;
+ struct ulist_node *ref_node = NULL;
+ struct ulist_node *root_node = NULL;
+ struct ulist_iterator ref_uiter;
+ struct ulist_iterator root_uiter;
+
+ pr_debug("resolving all inodes for extent %llu\n",
+ extent_item_objectid);
+
+ ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
+ 0, &refs, &extent_item_pos);
+ if (ret)
+ goto out;
+
+ ULIST_ITER_INIT(&ref_uiter);
+ while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
+ ret = __btrfs_find_all_roots(trans, fs_info, ref_node->val,
+ 0, &roots);
+ if (ret)
+ break;
+ ULIST_ITER_INIT(&root_uiter);
+ while (!ret && (root_node = ulist_next(roots, &root_uiter))) {
+ pr_debug("root %llu references leaf %llu, data list "
+ "%#llx\n", root_node->val, ref_node->val,
+ ref_node->aux);
+ ret = iterate_leaf_refs((struct extent_inode_elem *)
+ (uintptr_t)ref_node->aux,
+ root_node->val,
+ extent_item_objectid,
+ iterate, ctx);
+ }
+ ulist_free(roots);
+ }
+
+ free_leaf_list(refs);
+out:
+ return ret;
+}
+
+int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path,
+ iterate_extent_inodes_t *iterate, void *ctx)
+{
+ int ret;
+ u64 extent_item_pos;
+ u64 flags = 0;
+ struct btrfs_key found_key;
+ int search_commit_root = 0;
+
+ ret = extent_from_logical(fs_info, logical, path, &found_key, &flags);
+ btrfs_release_path(path);
+ if (ret < 0)
+ return ret;
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
+ return -EINVAL;
+
+ extent_item_pos = logical - found_key.objectid;
+ ret = iterate_extent_inodes(fs_info, found_key.objectid,
+ extent_item_pos, search_commit_root,
+ iterate, ctx);
+
+ return ret;
+}
+
+typedef int (iterate_irefs_t)(u64 parent, u32 name_len, unsigned long name_off,
+ struct extent_buffer *eb, void *ctx);
+
+static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
+ struct btrfs_path *path,
+ iterate_irefs_t *iterate, void *ctx)
+{
+ int ret = 0;
+ int slot;
+ u32 cur;
+ u32 len;
+ u32 name_len;
+ u64 parent = 0;
+ int found = 0;
+ struct extent_buffer *eb;
+ struct btrfs_item *item;
+ struct btrfs_inode_ref *iref;
+ struct btrfs_key found_key;
+
+ while (!ret) {
+ ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
+ &found_key);
+ if (ret < 0)
+ break;
+ if (ret) {
+ ret = found ? 0 : -ENOENT;
+ break;
+ }
+ ++found;
+
+ parent = found_key.offset;
+ slot = path->slots[0];
+ eb = btrfs_clone_extent_buffer(path->nodes[0]);
+ if (!eb) {
+ ret = -ENOMEM;
+ break;
+ }
+ extent_buffer_get(eb);
+ btrfs_release_path(path);
+
+ item = btrfs_item_nr(slot);
+ iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
+
+ for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) {
+ name_len = btrfs_inode_ref_name_len(eb, iref);
+ /* path must be released before calling iterate()! */
+ pr_debug("following ref at offset %u for inode %llu in "
+ "tree %llu\n", cur, found_key.objectid,
+ fs_root->objectid);
+ ret = iterate(parent, name_len,
+ (unsigned long)(iref + 1), eb, ctx);
+ if (ret)
+ break;
+ len = sizeof(*iref) + name_len;
+ iref = (struct btrfs_inode_ref *)((char *)iref + len);
+ }
+ free_extent_buffer(eb);
+ }
+
+ btrfs_release_path(path);
+
+ return ret;
+}
+
+static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
+ struct btrfs_path *path,
+ iterate_irefs_t *iterate, void *ctx)
+{
+ int ret;
+ int slot;
+ u64 offset = 0;
+ u64 parent;
+ int found = 0;
+ struct extent_buffer *eb;
+ struct btrfs_inode_extref *extref;
+ struct extent_buffer *leaf;
+ u32 item_size;
+ u32 cur_offset;
+ unsigned long ptr;
+
+ while (1) {
+ ret = btrfs_find_one_extref(fs_root, inum, offset, path, &extref,
+ &offset);
+ if (ret < 0)
+ break;
+ if (ret) {
+ ret = found ? 0 : -ENOENT;
+ break;
+ }
+ ++found;
+
+ slot = path->slots[0];
+ eb = btrfs_clone_extent_buffer(path->nodes[0]);
+ if (!eb) {
+ ret = -ENOMEM;
+ break;
+ }
+ extent_buffer_get(eb);
+
+ btrfs_release_path(path);
+
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, slot);
+ ptr = btrfs_item_ptr_offset(leaf, slot);
+ cur_offset = 0;
+
+ while (cur_offset < item_size) {
+ u32 name_len;
+
+ extref = (struct btrfs_inode_extref *)(ptr + cur_offset);
+ parent = btrfs_inode_extref_parent(eb, extref);
+ name_len = btrfs_inode_extref_name_len(eb, extref);
+ ret = iterate(parent, name_len,
+ (unsigned long)&extref->name, eb, ctx);
+ if (ret)
+ break;
+
+ cur_offset += btrfs_inode_extref_name_len(leaf, extref);
+ cur_offset += sizeof(*extref);
+ }
+ free_extent_buffer(eb);
+
+ offset++;
+ }
+
+ btrfs_release_path(path);
+
+ return ret;
+}
+
+static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
+ struct btrfs_path *path, iterate_irefs_t *iterate,
+ void *ctx)
+{
+ int ret;
+ int found_refs = 0;
+
+ ret = iterate_inode_refs(inum, fs_root, path, iterate, ctx);
+ if (!ret)
+ ++found_refs;
+ else if (ret != -ENOENT)
+ return ret;
+
+ ret = iterate_inode_extrefs(inum, fs_root, path, iterate, ctx);
+ if (ret == -ENOENT && found_refs)
+ return 0;
+
+ return ret;
+}
+
+/*
+ * returns 0 if the path could be dumped (probably truncated)
+ * returns <0 in case of an error
+ */
+static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
+ struct extent_buffer *eb, void *ctx)
+{
+ struct inode_fs_paths *ipath = ctx;
+ char *fspath;
+ char *fspath_min;
+ int i = ipath->fspath->elem_cnt;
+ const int s_ptr = sizeof(char *);
+ u32 bytes_left;
+
+ bytes_left = ipath->fspath->bytes_left > s_ptr ?
+ ipath->fspath->bytes_left - s_ptr : 0;
+
+ fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr;
+ fspath = btrfs_ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len,
+ name_off, eb, inum, fspath_min, bytes_left);
+ if (IS_ERR(fspath))
+ return PTR_ERR(fspath);
+
+ if (fspath > fspath_min) {
+ ipath->fspath->val[i] = (u64)(unsigned long)fspath;
+ ++ipath->fspath->elem_cnt;
+ ipath->fspath->bytes_left = fspath - fspath_min;
+ } else {
+ ++ipath->fspath->elem_missed;
+ ipath->fspath->bytes_missing += fspath_min - fspath;
+ ipath->fspath->bytes_left = 0;
+ }
+
+ return 0;
+}
+
+/*
+ * this dumps all file system paths to the inode into the ipath struct, provided
+ * is has been created large enough. each path is zero-terminated and accessed
+ * from ipath->fspath->val[i].
+ * when it returns, there are ipath->fspath->elem_cnt number of paths available
+ * in ipath->fspath->val[]. when the allocated space wasn't sufficient, the
+ * number of missed paths in recored in ipath->fspath->elem_missed, otherwise,
+ * it's zero. ipath->fspath->bytes_missing holds the number of bytes that would
+ * have been needed to return all paths.
+ */
+int paths_from_inode(u64 inum, struct inode_fs_paths *ipath)
+{
+ return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path,
+ inode_to_path, ipath);
+}
+
+struct btrfs_data_container *init_data_container(u32 total_bytes)
+{
+ struct btrfs_data_container *data;
+ size_t alloc_bytes;
+
+ alloc_bytes = max_t(size_t, total_bytes, sizeof(*data));
+ data = vmalloc(alloc_bytes);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ if (total_bytes >= sizeof(*data)) {
+ data->bytes_left = total_bytes - sizeof(*data);
+ data->bytes_missing = 0;
+ } else {
+ data->bytes_missing = sizeof(*data) - total_bytes;
+ data->bytes_left = 0;
+ }
+
+ data->elem_cnt = 0;
+ data->elem_missed = 0;
+
+ return data;
+}
+
+/*
+ * allocates space to return multiple file system paths for an inode.
+ * total_bytes to allocate are passed, note that space usable for actual path
+ * information will be total_bytes - sizeof(struct inode_fs_paths).
+ * the returned pointer must be freed with free_ipath() in the end.
+ */
+struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
+ struct btrfs_path *path)
+{
+ struct inode_fs_paths *ifp;
+ struct btrfs_data_container *fspath;
+
+ fspath = init_data_container(total_bytes);
+ if (IS_ERR(fspath))
+ return (void *)fspath;
+
+ ifp = kmalloc(sizeof(*ifp), GFP_NOFS);
+ if (!ifp) {
+ kfree(fspath);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ifp->btrfs_path = path;
+ ifp->fspath = fspath;
+ ifp->fs_root = fs_root;
+
+ return ifp;
+}
+
+void free_ipath(struct inode_fs_paths *ipath)
+{
+ if (!ipath)
+ return;
+ vfree(ipath->fspath);
+ kfree(ipath);
+}
diff --git a/backref.h b/backref.h
new file mode 100644
index 00000000..3d2ed458
--- /dev/null
+++ b/backref.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2011 STRATO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_BACKREF_H__
+#define __BTRFS_BACKREF_H__
+
+#include "ulist.h"
+#include "extent_io.h"
+
+struct inode_fs_paths {
+ struct btrfs_path *btrfs_path;
+ struct btrfs_root *fs_root;
+ struct btrfs_data_container *fspath;
+};
+
+typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
+ void *ctx);
+
+int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
+ struct btrfs_path *path);
+
+int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
+ struct btrfs_path *path, struct btrfs_key *found_key,
+ u64 *flags);
+
+int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
+ struct btrfs_key *key, struct btrfs_extent_item *ei,
+ u32 item_size, u64 *out_root, u8 *out_level);
+
+int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
+ u64 extent_item_objectid,
+ u64 extent_offset, int search_commit_root,
+ iterate_extent_inodes_t *iterate, void *ctx);
+
+int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path,
+ iterate_extent_inodes_t *iterate, void *ctx);
+
+int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
+
+int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 time_seq, struct ulist **roots);
+char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
+ u32 name_len, unsigned long name_off,
+ struct extent_buffer *eb_in, u64 parent,
+ char *dest, u32 size);
+
+struct btrfs_data_container *init_data_container(u32 total_bytes);
+struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
+ struct btrfs_path *path);
+void free_ipath(struct inode_fs_paths *ipath);
+
+int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
+ u64 start_off, struct btrfs_path *path,
+ struct btrfs_inode_extref **ret_extref,
+ u64 *found_off);
+#endif
diff --git a/bcp b/bcp
new file mode 100755
index 00000000..e7ca641a
--- /dev/null
+++ b/bcp
@@ -0,0 +1,159 @@
+#!/usr/bin/env python
+# Copyright (C) 2007 Oracle. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public
+# License v2 as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this program; if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 021110-1307, USA.
+#
+import sys, os, stat, fcntl
+from optparse import OptionParser
+
+def copylink(srcname, dst, filename, statinfo, force_name):
+ dstname = os.path.join(dst, force_name or filename)
+ if not os.path.exists(dstname):
+ link_target = os.readlink(srcname)
+ os.symlink(link_target, dstname)
+
+def copydev(srcname, dst, filename, statinfo, force_name):
+ devbits = statinfo.st_mode & (stat.S_IFBLK | stat.S_IFCHR)
+ mode = stat.S_IMODE(statinfo.st_mode) | devbits
+ dstname = os.path.join(dst, force_name or filename)
+ if not os.path.exists(dstname):
+ os.mknod(dstname, mode, statinfo.st_rdev)
+
+def copyfile(srcname, dst, filename, statinfo, force_name):
+ written = 0
+ dstname = os.path.join(dst, force_name or filename)
+
+ st_mode = statinfo.st_mode
+ if stat.S_ISLNK(st_mode):
+ copylink(srcname, dst, part, statinfo, None)
+ return
+ elif stat.S_ISBLK(st_mode) or stat.S_ISCHR(st_mode):
+ copydev(srcname, dst, part, statinfo, None)
+ return
+ elif not stat.S_ISREG(st_mode):
+ return
+
+ try:
+ os.unlink(dstname)
+ except:
+ pass
+
+ if options.link:
+ os.link(srcname, dstname)
+ return
+
+ dstf = file(dstname, 'w')
+ srcf = file(srcname, 'r')
+
+ ret = 1
+
+ try:
+ if not options.copy:
+ ret = fcntl.ioctl(dstf.fileno(), 1074041865, srcf.fileno())
+ except:
+ pass
+
+ if ret != 0:
+ while True:
+ buf = srcf.read(256 * 1024)
+ if not buf:
+ break
+ written += len(buf)
+ dstf.write(buf)
+
+ os.chmod(dstname, stat.S_IMODE(statinfo.st_mode))
+ os.chown(dstname, statinfo.st_uid, statinfo.st_gid)
+
+
+usage = "usage: %prog [options]"
+parser = OptionParser(usage=usage)
+parser.add_option("-l", "--link", help="Create hard links", default=False,
+ action="store_true")
+parser.add_option("-c", "--copy", help="Copy file bytes (don't cow)",
+ default=False, action="store_true")
+
+(options,args) = parser.parse_args()
+
+if len(args) < 2:
+ sys.stderr.write("source or destination not specified\n")
+ sys.exit(1)
+
+if options.link and options.copy:
+ sys.stderr.write("Both -l and -c specified, using copy mode\n")
+ options.link = False
+
+
+total_args = len(args)
+src_args = total_args - 1
+orig_dst = args[-1]
+
+if src_args > 1:
+ if not os.path.exists(orig_dst):
+ os.makedirs(orig_dst)
+ if not os.path.isdir(orig_dst):
+ sys.stderr.write("Destination %s is not a directory\n" % orig_dst)
+ exit(1)
+
+for srci in xrange(0, src_args):
+ src = args[srci]
+ if os.path.isfile(src):
+ statinfo = os.lstat(src)
+ force_name = None
+ if src_args == 1:
+ if not os.path.isdir(orig_dst):
+ force_name = os.path.basename(orig_dst)
+ orig_dst = os.path.dirname(orig_dst) or '.'
+ copyfile(src, orig_dst, os.path.basename(src), statinfo, force_name)
+ continue
+
+ if src_args > 1 or os.path.exists(orig_dst):
+ dst = os.path.join(orig_dst, os.path.basename(src))
+ else:
+ dst = orig_dst
+
+ if not os.path.exists(dst):
+ os.makedirs(dst)
+ statinfo = os.stat(src)
+ os.chmod(dst, stat.S_IMODE(statinfo.st_mode))
+ os.chown(dst, statinfo.st_uid, statinfo.st_gid)
+
+ iter = os.walk(src, topdown=True)
+
+ for (dirpath, dirnames, filenames) in iter:
+ for x in dirnames:
+ srcname = os.path.join(dirpath, x)
+ statinfo = os.lstat(srcname)
+
+ part = os.path.relpath(srcname, src)
+
+ if stat.S_ISLNK(statinfo.st_mode):
+ copylink(srcname, dst, part, statinfo, None)
+ continue
+
+ dst_dir = os.path.join(dst, part)
+ if not os.path.exists(dst_dir):
+ os.makedirs(dst_dir)
+
+ os.chmod(dst_dir, stat.S_IMODE(statinfo.st_mode))
+ os.chown(dst_dir, statinfo.st_uid, statinfo.st_gid)
+
+ for f in filenames:
+ srcname = os.path.join(dirpath, f)
+ part = os.path.relpath(srcname, src)
+
+ statinfo = os.lstat(srcname)
+ copyfile(srcname, dst, part, statinfo, None)
+
+
diff --git a/bitops.h b/bitops.h
new file mode 100644
index 00000000..5b35f9fc
--- /dev/null
+++ b/bitops.h
@@ -0,0 +1,224 @@
+#ifndef _PERF_LINUX_BITOPS_H_
+#define _PERF_LINUX_BITOPS_H_
+
+#include <linux/kernel.h>
+
+#ifndef DIV_ROUND_UP
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#endif
+
+#define BITS_PER_BYTE 8
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
+#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
+
+#define for_each_set_bit(bit, addr, size) \
+ for ((bit) = find_first_bit((addr), (size)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_set_bit() but use bit as value to start with */
+#define for_each_set_bit_from(bit, addr, size) \
+ for ((bit) = find_next_bit((addr), (size), (bit)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+static inline void set_bit(int nr, unsigned long *addr)
+{
+ addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
+}
+
+static inline void clear_bit(int nr, unsigned long *addr)
+{
+ addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG));
+}
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+
+static inline unsigned int hweight32(unsigned int w)
+{
+ unsigned int res = w - ((w >> 1) & 0x55555555);
+ res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+ res = (res + (res >> 4)) & 0x0F0F0F0F;
+ res = res + (res >> 8);
+ return (res + (res >> 16)) & 0x000000FF;
+}
+
+static inline unsigned long hweight64(__u64 w)
+{
+#if BITS_PER_LONG == 32
+ return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w);
+#elif BITS_PER_LONG == 64
+ __u64 res = w - ((w >> 1) & 0x5555555555555555ul);
+ res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
+ res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful;
+ res = res + (res >> 8);
+ res = res + (res >> 16);
+ return (res + (res >> 32)) & 0x00000000000000FFul;
+#endif
+}
+
+static inline unsigned long hweight_long(unsigned long w)
+{
+ return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
+}
+
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static __always_inline unsigned long __ffs(unsigned long word)
+{
+ int num = 0;
+
+#if BITS_PER_LONG == 64
+ if ((word & 0xffffffff) == 0) {
+ num += 32;
+ word >>= 32;
+ }
+#endif
+ if ((word & 0xffff) == 0) {
+ num += 16;
+ word >>= 16;
+ }
+ if ((word & 0xff) == 0) {
+ num += 8;
+ word >>= 8;
+ }
+ if ((word & 0xf) == 0) {
+ num += 4;
+ word >>= 4;
+ }
+ if ((word & 0x3) == 0) {
+ num += 2;
+ word >>= 2;
+ }
+ if ((word & 0x1) == 0)
+ num += 1;
+ return num;
+}
+
+#define ffz(x) __ffs(~(x))
+
+/*
+ * Find the first set bit in a memory region.
+ */
+static inline unsigned long
+find_first_bit(const unsigned long *addr, unsigned long size)
+{
+ const unsigned long *p = addr;
+ unsigned long result = 0;
+ unsigned long tmp;
+
+ while (size & ~(BITS_PER_LONG-1)) {
+ if ((tmp = *(p++)))
+ goto found;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+
+ tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+found:
+ return result + __ffs(tmp);
+}
+
+/*
+ * Find the next set bit in a memory region.
+ */
+static inline unsigned long
+find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp &= (~0UL << offset);
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if ((tmp = *(p++)))
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+
+found_first:
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + __ffs(tmp);
+}
+
+/*
+ * This implementation of find_{first,next}_zero_bit was stolen from
+ * Linus' asm-alpha/bitops.h.
+ */
+static inline unsigned long
+find_next_zero_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp |= ~0UL >> (BITS_PER_LONG - offset);
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (~tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if (~(tmp = *(p++)))
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+
+found_first:
+ tmp |= ~0UL << size;
+ if (tmp == ~0UL) /* Are any bits zero? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + ffz(tmp);
+}
+#endif
diff --git a/btrfs-calc-size.c b/btrfs-calc-size.c
new file mode 100644
index 00000000..e3f02d87
--- /dev/null
+++ b/btrfs-calc-size.c
@@ -0,0 +1,506 @@
+/*
+ * Copyright (C) 2011 Red Hat. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <zlib.h>
+#include "kerncompat.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "volumes.h"
+#include "utils.h"
+
+static int verbose = 0;
+static int no_pretty = 0;
+
+struct seek {
+ u64 distance;
+ u64 count;
+ struct rb_node n;
+};
+
+struct root_stats {
+ u64 total_nodes;
+ u64 total_leaves;
+ u64 total_bytes;
+ u64 total_inline;
+ u64 total_seeks;
+ u64 forward_seeks;
+ u64 backward_seeks;
+ u64 total_seek_len;
+ u64 max_seek_len;
+ u64 total_clusters;
+ u64 total_cluster_size;
+ u64 min_cluster_size;
+ u64 max_cluster_size;
+ u64 lowest_bytenr;
+ u64 highest_bytenr;
+ struct rb_root seek_root;
+ int total_levels;
+};
+
+static int add_seek(struct rb_root *root, u64 dist)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct seek *seek = NULL;
+
+ while (*p) {
+ parent = *p;
+ seek = rb_entry(parent, struct seek, n);
+
+ if (dist < seek->distance) {
+ p = &(*p)->rb_left;
+ } else if (dist > seek->distance) {
+ p = &(*p)->rb_right;
+ } else {
+ seek->count++;
+ return 0;
+ }
+ }
+
+ seek = malloc(sizeof(struct seek));
+ if (!seek)
+ return -ENOMEM;
+ seek->distance = dist;
+ seek->count = 1;
+ rb_link_node(&seek->n, parent, p);
+ rb_insert_color(&seek->n, root);
+ return 0;
+}
+
+static int walk_leaf(struct btrfs_root *root, struct btrfs_path *path,
+ struct root_stats *stat, int find_inline)
+{
+ struct extent_buffer *b = path->nodes[0];
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_key found_key;
+ int i;
+
+ stat->total_bytes += root->leafsize;
+ stat->total_leaves++;
+
+ if (!find_inline)
+ return 0;
+
+ for (i = 0; i < btrfs_header_nritems(b); i++) {
+ btrfs_item_key_to_cpu(b, &found_key, i);
+ if (found_key.type != BTRFS_EXTENT_DATA_KEY)
+ continue;
+
+ fi = btrfs_item_ptr(b, i, struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(b, fi) == BTRFS_FILE_EXTENT_INLINE)
+ stat->total_inline +=
+ btrfs_file_extent_inline_item_len(b,
+ btrfs_item_nr(i));
+ }
+
+ return 0;
+}
+
+static u64 calc_distance(u64 block1, u64 block2)
+{
+ if (block1 < block2)
+ return block2 - block1;
+ return block1 - block2;
+}
+
+static int walk_nodes(struct btrfs_root *root, struct btrfs_path *path,
+ struct root_stats *stat, int level, int find_inline)
+{
+ struct extent_buffer *b = path->nodes[level];
+ u64 last_block;
+ u64 cluster_size = root->leafsize;
+ int i;
+ int ret = 0;
+
+ stat->total_bytes += root->nodesize;
+ stat->total_nodes++;
+
+ last_block = btrfs_header_bytenr(b);
+ for (i = 0; i < btrfs_header_nritems(b); i++) {
+ struct extent_buffer *tmp = NULL;
+ u64 cur_blocknr = btrfs_node_blockptr(b, i);
+
+ path->slots[level] = i;
+ if ((level - 1) > 0 || find_inline) {
+ tmp = read_tree_block(root, cur_blocknr,
+ btrfs_level_size(root, level - 1),
+ btrfs_node_ptr_generation(b, i));
+ if (!extent_buffer_uptodate(tmp)) {
+ fprintf(stderr, "Failed to read blocknr %Lu\n",
+ btrfs_node_blockptr(b, i));
+ continue;
+ }
+ path->nodes[level - 1] = tmp;
+ }
+ if (level - 1)
+ ret = walk_nodes(root, path, stat, level - 1,
+ find_inline);
+ else
+ ret = walk_leaf(root, path, stat, find_inline);
+ if (last_block + root->leafsize != cur_blocknr) {
+ u64 distance = calc_distance(last_block +
+ root->leafsize,
+ cur_blocknr);
+ stat->total_seeks++;
+ stat->total_seek_len += distance;
+ if (stat->max_seek_len < distance)
+ stat->max_seek_len = distance;
+ if (add_seek(&stat->seek_root, distance)) {
+ fprintf(stderr, "Error adding new seek\n");
+ ret = -ENOMEM;
+ break;
+ }
+
+ if (last_block < cur_blocknr)
+ stat->forward_seeks++;
+ else
+ stat->backward_seeks++;
+ if (cluster_size != root->leafsize) {
+ stat->total_cluster_size += cluster_size;
+ stat->total_clusters++;
+ if (cluster_size < stat->min_cluster_size)
+ stat->min_cluster_size = cluster_size;
+ if (cluster_size > stat->max_cluster_size)
+ stat->max_cluster_size = cluster_size;
+ }
+ cluster_size = root->leafsize;
+ } else {
+ cluster_size += root->leafsize;
+ }
+ last_block = cur_blocknr;
+ if (cur_blocknr < stat->lowest_bytenr)
+ stat->lowest_bytenr = cur_blocknr;
+ if (cur_blocknr > stat->highest_bytenr)
+ stat->highest_bytenr = cur_blocknr;
+ free_extent_buffer(tmp);
+ if (ret) {
+ fprintf(stderr, "Error walking down path\n");
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void print_seek_histogram(struct root_stats *stat)
+{
+ struct rb_node *n = rb_first(&stat->seek_root);
+ struct seek *seek;
+ u64 tick_interval;
+ u64 group_start = 0;
+ u64 group_count = 0;
+ u64 group_end = 0;
+ u64 i;
+ u64 max_seek = stat->max_seek_len;
+ int digits = 1;
+
+ if (stat->total_seeks < 20)
+ return;
+
+ while ((max_seek /= 10))
+ digits++;
+
+ /* Make a tick count as 5% of the total seeks */
+ tick_interval = stat->total_seeks / 20;
+ printf("\tSeek histogram\n");
+ for (; n; n = rb_next(n)) {
+ u64 ticks, gticks = 0;
+
+ seek = rb_entry(n, struct seek, n);
+ ticks = seek->count / tick_interval;
+ if (group_count)
+ gticks = group_count / tick_interval;
+
+ if (ticks <= 2 && gticks <= 2) {
+ if (group_count == 0)
+ group_start = seek->distance;
+ group_end = seek->distance;
+ group_count += seek->count;
+ continue;
+ }
+
+ if (group_count) {
+
+ gticks = group_count / tick_interval;
+ printf("\t\t%*Lu - %*Lu: %*Lu ", digits, group_start,
+ digits, group_end, digits, group_count);
+ if (gticks) {
+ for (i = 0; i < gticks; i++)
+ printf("#");
+ printf("\n");
+ } else {
+ printf("|\n");
+ }
+ group_count = 0;
+ }
+
+ if (ticks <= 2)
+ continue;
+
+ printf("\t\t%*Lu - %*Lu: %*Lu ", digits, seek->distance,
+ digits, seek->distance, digits, seek->count);
+ for (i = 0; i < ticks; i++)
+ printf("#");
+ printf("\n");
+ }
+ if (group_count) {
+ u64 gticks;
+
+ gticks = group_count / tick_interval;
+ printf("\t\t%*Lu - %*Lu: %*Lu ", digits, group_start,
+ digits, group_end, digits, group_count);
+ if (gticks) {
+ for (i = 0; i < gticks; i++)
+ printf("#");
+ printf("\n");
+ } else {
+ printf("|\n");
+ }
+ group_count = 0;
+ }
+}
+
+static void timeval_subtract(struct timeval *result,struct timeval *x,
+ struct timeval *y)
+{
+ if (x->tv_usec < y->tv_usec) {
+ int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
+ y->tv_usec -= 1000000 * nsec;
+ y->tv_sec += nsec;
+ }
+
+ if (x->tv_usec - y->tv_usec > 1000000) {
+ int nsec = (x->tv_usec - y->tv_usec) / 1000000;
+ y->tv_usec += 1000000 * nsec;
+ y->tv_sec -= nsec;
+ }
+
+ result->tv_sec = x->tv_sec - y->tv_sec;
+ result->tv_usec = x->tv_usec - y->tv_usec;
+}
+
+static int calc_root_size(struct btrfs_root *tree_root, struct btrfs_key *key,
+ int find_inline)
+{
+ struct btrfs_root *root;
+ struct btrfs_path *path;
+ struct rb_node *n;
+ struct timeval start, end, diff = {0};
+ struct root_stats stat;
+ int level;
+ int ret = 0;
+ int size_fail = 0;
+
+ root = btrfs_read_fs_root(tree_root->fs_info, key);
+ if (IS_ERR(root)) {
+ fprintf(stderr, "Failed to read root %Lu\n", key->objectid);
+ return 1;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ fprintf(stderr, "Could not allocate path\n");
+ return 1;
+ }
+
+ memset(&stat, 0, sizeof(stat));
+ level = btrfs_header_level(root->node);
+ stat.lowest_bytenr = btrfs_header_bytenr(root->node);
+ stat.highest_bytenr = stat.lowest_bytenr;
+ stat.min_cluster_size = (u64)-1;
+ stat.max_cluster_size = root->leafsize;
+ path->nodes[level] = root->node;
+ if (gettimeofday(&start, NULL)) {
+ fprintf(stderr, "Error getting time: %d\n", errno);
+ goto out;
+ }
+ if (!level) {
+ ret = walk_leaf(root, path, &stat, find_inline);
+ if (ret)
+ goto out;
+ goto out_print;
+ }
+
+ ret = walk_nodes(root, path, &stat, level, find_inline);
+ if (ret)
+ goto out;
+ if (gettimeofday(&end, NULL)) {
+ fprintf(stderr, "Error getting time: %d\n", errno);
+ goto out;
+ }
+ timeval_subtract(&diff, &end, &start);
+out_print:
+ if (stat.min_cluster_size == (u64)-1) {
+ stat.min_cluster_size = 0;
+ stat.total_clusters = 1;
+ }
+
+ if (no_pretty || size_fail) {
+ printf("\tTotal size: %Lu\n", stat.total_bytes);
+ printf("\t\tInline data: %Lu\n", stat.total_inline);
+ printf("\tTotal seeks: %Lu\n", stat.total_seeks);
+ printf("\t\tForward seeks: %Lu\n", stat.forward_seeks);
+ printf("\t\tBackward seeks: %Lu\n", stat.backward_seeks);
+ printf("\t\tAvg seek len: %llu\n", stat.total_seeks ?
+ stat.total_seek_len / stat.total_seeks : 0);
+ print_seek_histogram(&stat);
+ printf("\tTotal clusters: %Lu\n", stat.total_clusters);
+ printf("\t\tAvg cluster size: %Lu\n", stat.total_cluster_size /
+ stat.total_clusters);
+ printf("\t\tMin cluster size: %Lu\n", stat.min_cluster_size);
+ printf("\t\tMax cluster size: %Lu\n", stat.max_cluster_size);
+ printf("\tTotal disk spread: %Lu\n", stat.highest_bytenr -
+ stat.lowest_bytenr);
+ printf("\tTotal read time: %d s %d us\n", (int)diff.tv_sec,
+ (int)diff.tv_usec);
+ printf("\tLevels: %d\n", level + 1);
+ } else {
+ printf("\tTotal size: %s\n", pretty_size(stat.total_bytes));
+ printf("\t\tInline data: %s\n", pretty_size(stat.total_inline));
+ printf("\tTotal seeks: %Lu\n", stat.total_seeks);
+ printf("\t\tForward seeks: %Lu\n", stat.forward_seeks);
+ printf("\t\tBackward seeks: %Lu\n", stat.backward_seeks);
+ printf("\t\tAvg seek len: %s\n", stat.total_seeks ?
+ pretty_size(stat.total_seek_len / stat.total_seeks) :
+ pretty_size(0));
+ print_seek_histogram(&stat);
+ printf("\tTotal clusters: %Lu\n", stat.total_clusters);
+ printf("\t\tAvg cluster size: %s\n",
+ pretty_size((stat.total_cluster_size /
+ stat.total_clusters)));
+ printf("\t\tMin cluster size: %s\n",
+ pretty_size(stat.min_cluster_size));
+ printf("\t\tMax cluster size: %s\n",
+ pretty_size(stat.max_cluster_size));
+ printf("\tTotal disk spread: %s\n",
+ pretty_size(stat.highest_bytenr -
+ stat.lowest_bytenr));
+ printf("\tTotal read time: %d s %d us\n", (int)diff.tv_sec,
+ (int)diff.tv_usec);
+ printf("\tLevels: %d\n", level + 1);
+ }
+out:
+ while ((n = rb_first(&stat.seek_root)) != NULL) {
+ struct seek *seek = rb_entry(n, struct seek, n);
+ rb_erase(n, &stat.seek_root);
+ free(seek);
+ }
+
+ /*
+ * We only use path to save node data in iterating,
+ * without holding eb's ref_cnt in path.
+ * Don't use btrfs_free_path() here, it will free these
+ * eb again, and cause many problems, as negative ref_cnt
+ * or invalid memory access.
+ */
+ free(path);
+ return ret;
+}
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: calc-size [-v] [-b] <device>\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct btrfs_key key;
+ struct btrfs_root *root;
+ int opt;
+ int ret = 0;
+
+ while ((opt = getopt(argc, argv, "vb")) != -1) {
+ switch (opt) {
+ case 'v':
+ verbose++;
+ break;
+ case 'b':
+ no_pretty = 1;
+ break;
+ default:
+ usage();
+ exit(1);
+ }
+ }
+
+ set_argv0(argv);
+ argc = argc - optind;
+ if (check_argc_min(argc, 1)) {
+ usage();
+ exit(1);
+ }
+
+ /*
+ if ((ret = check_mounted(argv[optind])) < 0) {
+ fprintf(stderr, "Could not check mount status: %d\n", ret);
+ if (ret == -EACCES)
+ fprintf(stderr, "Maybe you need to run as root?\n");
+ return ret;
+ } else if (ret) {
+ fprintf(stderr, "%s is currently mounted. Aborting.\n",
+ argv[optind]);
+ return -EBUSY;
+ }
+ */
+
+ root = open_ctree(argv[optind], 0, 0);
+ if (!root) {
+ fprintf(stderr, "Couldn't open ctree\n");
+ exit(1);
+ }
+
+ printf("Calculating size of root tree\n");
+ key.objectid = BTRFS_ROOT_TREE_OBJECTID;
+ ret = calc_root_size(root, &key, 0);
+ if (ret)
+ goto out;
+
+ printf("Calculating size of extent tree\n");
+ key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
+ ret = calc_root_size(root, &key, 0);
+ if (ret)
+ goto out;
+
+ printf("Calculating size of csum tree\n");
+ key.objectid = BTRFS_CSUM_TREE_OBJECTID;
+ ret = calc_root_size(root, &key, 0);
+ if (ret)
+ goto out;
+
+ key.objectid = BTRFS_FS_TREE_OBJECTID;
+ key.offset = (u64)-1;
+ printf("Calculatin' size of fs tree\n");
+ ret = calc_root_size(root, &key, 1);
+ if (ret)
+ goto out;
+out:
+ close_ctree(root);
+ btrfs_close_all_devices();
+ return ret;
+}
diff --git a/btrfs-completion b/btrfs-completion
new file mode 100644
index 00000000..a34191bd
--- /dev/null
+++ b/btrfs-completion
@@ -0,0 +1,152 @@
+# original by Alfredo Esteban <aedelatorre at xxxxxxxxx>
+# (http://www.spinics.net/lists/linux-btrfs/msg15899.html)
+# edited by Joseph Wang <joequant at gmail.com>
+# (http://lists.alioth.debian.org/pipermail/bash-completion-devel/2013-June/004868.html)
+# edited by John C F <john.ch.fr at gmail.com> on 2015-02-02
+
+_btrfs_devs()
+{
+ local DEVS
+ DEVS=''; while read dev; do DEVS+="$dev "; done < <(lsblk -pnro name)
+ COMPREPLY+=( $( compgen -W "$DEVS" -- "$cur" ) )
+}
+
+_btrfs_mnts()
+{
+ local MNTS
+ MNTS=''
+ while read mnt; do MNTS+="$mnt "
+ done < <(mount | awk '{print $3}')
+ COMPREPLY+=( $( compgen -W "$MNTS" -- "$cur" ) )
+}
+
+_btrfs()
+{
+ local cur prev words cword
+ _init_completion || return
+
+ COMPREPLY=()
+
+ local cmd=${words[1]}
+
+ commands='subvolume filesystem balance device scrub check rescue restore inspect-internal property send receive quota qgroup replace help version'
+ commands_subvolume='create delete list snapshot find-new get-default set-default show sync'
+ commands_filesystem='defragment sync resize show df label usage'
+ commands_balance='start pause cancel resume status'
+ commands_device='scan add delete remove ready stats usage'
+ commands_scrub='start cancel resume status'
+ commands_rescue='chunk-recover super-recover'
+ commands_inspect_internal='inode-resolve logical-resolve subvolid-resolve rootid min-dev-size'
+ commands_property='get set list'
+ commands_quota='enable disable rescan'
+ commands_qgroup='assign remove create destroy show limit'
+ commands_replace='start status cancel'
+
+ if [[ "$cur" == -* && $cword -le 3 && "$cmd" != "help" ]]; then
+ COMPREPLY=( $( compgen -W '--help' -- "$cur" ) )
+ return 0
+ fi
+
+ if [[ $cword -eq 1 ]]; then
+ COMPREPLY=( $( compgen -W "$commands" -- "$cur" ) )
+ return 0
+ elif [[ $cword -eq 2 ]]; then
+ case $cmd in
+ subvolume)
+ opts="$commands_subvolume"
+ ;;
+ filesystem)
+ opts="$commands_filesystem"
+ ;;
+ balance)
+ opts="$commands_balance"
+ ;;
+ device)
+ opts="$commands_device"
+ ;;
+ scrub)
+ opts="$commands_scrub"
+ ;;
+ check)
+ _btrfs_devs
+ return 0
+ ;;
+ rescue)
+ opts="$commands_rescue"
+ ;;
+ restore)
+ _btrfs_devs
+ return 0
+ ;;
+ inspect-internal)
+ opts="$commands_inspect_internal"
+ ;;
+ property)
+ opts="$commands_property"
+ ;;
+ send|receive)
+ _filedir -d
+ return 0
+ ;;
+ quota)
+ opts="$commands_quota"
+ ;;
+ qgroup)
+ opts="$commands_qgroup"
+ ;;
+ replace)
+ opts="$commands_replace"
+ ;;
+ help)
+ opts="--full"
+ ;;
+ version)
+ return 0
+ ;;
+ esac
+ COMPREPLY=( $( compgen -W "$opts" -- "$cur" ) )
+ return 0
+ elif [[ $cword -eq 3 ]]; then
+ case $cmd in
+ filesystem)
+ case $prev in
+ defragment)
+ _filedir
+ return 0
+ ;;
+ label)
+ _btrfs_mnts
+ _btrfs_devs
+ return 0
+ ;;
+ esac
+ ;;
+ device|rescue)
+ _btrfs_devs
+ return 0
+ ;;
+ replace)
+ case $prev in
+ status|cancel)
+ _btrfs_mnts
+ return 0
+ ;;
+ start)
+ _btrfs_devs
+ return 0
+ ;;
+ esac
+ ;;
+ esac
+ fi
+
+ if [[ "$cmd" == "receive" && "$prev" == "-f" ]]; then
+ _filedir
+ return 0
+ fi
+
+ _filedir -d
+ return 0
+}
+
+complete -F _btrfs btrfs
diff --git a/btrfs-convert.c b/btrfs-convert.c
new file mode 100644
index 00000000..4baa68ec
--- /dev/null
+++ b/btrfs-convert.c
@@ -0,0 +1,3206 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "kerncompat.h"
+
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <uuid/uuid.h>
+#include <linux/limits.h>
+#include <getopt.h>
+
+#include "ctree.h"
+#include "disk-io.h"
+#include "volumes.h"
+#include "transaction.h"
+#include "crc32c.h"
+#include "utils.h"
+#include "task-utils.h"
+#include <ext2fs/ext2_fs.h>
+#include <ext2fs/ext2fs.h>
+#include <ext2fs/ext2_ext_attr.h>
+
+#define INO_OFFSET (BTRFS_FIRST_FREE_OBJECTID - EXT2_ROOT_INO)
+#define CONV_IMAGE_SUBVOL_OBJECTID BTRFS_FIRST_FREE_OBJECTID
+
+struct task_ctx {
+ uint32_t max_copy_inodes;
+ uint32_t cur_copy_inodes;
+ struct task_info *info;
+};
+
+static void *print_copied_inodes(void *p)
+{
+ struct task_ctx *priv = p;
+ const char work_indicator[] = { '.', 'o', 'O', 'o' };
+ uint32_t count = 0;
+
+ task_period_start(priv->info, 1000 /* 1s */);
+ while (1) {
+ count++;
+ printf("copy inodes [%c] [%10d/%10d]\r",
+ work_indicator[count % 4], priv->cur_copy_inodes,
+ priv->max_copy_inodes);
+ fflush(stdout);
+ task_period_wait(priv->info);
+ }
+
+ return NULL;
+}
+
+static int after_copied_inodes(void *p)
+{
+ printf("\n");
+ fflush(stdout);
+
+ return 0;
+}
+
+struct btrfs_convert_context;
+struct btrfs_convert_operations {
+ const char *name;
+ int (*open_fs)(struct btrfs_convert_context *cctx, const char *devname);
+ int (*alloc_block)(struct btrfs_convert_context *cctx, u64 goal,
+ u64 *block_ret);
+ int (*alloc_block_range)(struct btrfs_convert_context *cctx, u64 goal,
+ int num, u64 *block_ret);
+ int (*test_block)(struct btrfs_convert_context *cctx, u64 block);
+ void (*free_block)(struct btrfs_convert_context *cctx, u64 block);
+ void (*free_block_range)(struct btrfs_convert_context *cctx, u64 block,
+ int num);
+ int (*copy_inodes)(struct btrfs_convert_context *cctx,
+ struct btrfs_root *root, int datacsum,
+ int packing, int noxattr, struct task_ctx *p);
+ void (*close_fs)(struct btrfs_convert_context *cctx);
+};
+
+struct btrfs_convert_context {
+ u32 blocksize;
+ u32 first_data_block;
+ u32 block_count;
+ u32 inodes_count;
+ u32 free_inodes_count;
+ u64 total_bytes;
+ char *volume_name;
+ const struct btrfs_convert_operations *convert_ops;
+
+ /* The accurate used space of old filesystem */
+ struct cache_tree used;
+
+ /* Batched ranges which must be covered by data chunks */
+ struct cache_tree data_chunks;
+
+ /* Free space which is not covered by data_chunks */
+ struct cache_tree free;
+
+ void *fs_data;
+};
+
+static void init_convert_context(struct btrfs_convert_context *cctx)
+{
+ cache_tree_init(&cctx->used);
+ cache_tree_init(&cctx->data_chunks);
+ cache_tree_init(&cctx->free);
+}
+
+static void clean_convert_context(struct btrfs_convert_context *cctx)
+{
+ free_extent_cache_tree(&cctx->used);
+ free_extent_cache_tree(&cctx->data_chunks);
+ free_extent_cache_tree(&cctx->free);
+}
+
+static inline int convert_alloc_block(struct btrfs_convert_context *cctx,
+ u64 goal, u64 *ret)
+{
+ return cctx->convert_ops->alloc_block(cctx, goal, ret);
+}
+
+static inline int convert_alloc_block_range(struct btrfs_convert_context *cctx,
+ u64 goal, int num, u64 *ret)
+{
+ return cctx->convert_ops->alloc_block_range(cctx, goal, num, ret);
+}
+
+static inline int convert_test_block(struct btrfs_convert_context *cctx,
+ u64 block)
+{
+ return cctx->convert_ops->test_block(cctx, block);
+}
+
+static inline void convert_free_block(struct btrfs_convert_context *cctx,
+ u64 block)
+{
+ cctx->convert_ops->free_block(cctx, block);
+}
+
+static inline void convert_free_block_range(struct btrfs_convert_context *cctx,
+ u64 block, int num)
+{
+ cctx->convert_ops->free_block_range(cctx, block, num);
+}
+
+static inline int copy_inodes(struct btrfs_convert_context *cctx,
+ struct btrfs_root *root, int datacsum,
+ int packing, int noxattr, struct task_ctx *p)
+{
+ return cctx->convert_ops->copy_inodes(cctx, root, datacsum, packing,
+ noxattr, p);
+}
+
+static inline void convert_close_fs(struct btrfs_convert_context *cctx)
+{
+ cctx->convert_ops->close_fs(cctx);
+}
+
+/*
+ * Open Ext2fs in readonly mode, read block allocation bitmap and
+ * inode bitmap into memory.
+ */
+static int ext2_open_fs(struct btrfs_convert_context *cctx, const char *name)
+{
+ errcode_t ret;
+ ext2_filsys ext2_fs;
+ ext2_ino_t ino;
+ ret = ext2fs_open(name, 0, 0, 0, unix_io_manager, &ext2_fs);
+ if (ret) {
+ fprintf(stderr, "ext2fs_open: %s\n", error_message(ret));
+ goto fail;
+ }
+ ret = ext2fs_read_inode_bitmap(ext2_fs);
+ if (ret) {
+ fprintf(stderr, "ext2fs_read_inode_bitmap: %s\n",
+ error_message(ret));
+ goto fail;
+ }
+ ret = ext2fs_read_block_bitmap(ext2_fs);
+ if (ret) {
+ fprintf(stderr, "ext2fs_read_block_bitmap: %s\n",
+ error_message(ret));
+ goto fail;
+ }
+ /*
+ * search each block group for a free inode. this set up
+ * uninit block/inode bitmaps appropriately.
+ */
+ ino = 1;
+ while (ino <= ext2_fs->super->s_inodes_count) {
+ ext2_ino_t foo;
+ ext2fs_new_inode(ext2_fs, ino, 0, NULL, &foo);
+ ino += EXT2_INODES_PER_GROUP(ext2_fs->super);
+ }
+
+ if (!(ext2_fs->super->s_feature_incompat &
+ EXT2_FEATURE_INCOMPAT_FILETYPE)) {
+ fprintf(stderr, "filetype feature is missing\n");
+ goto fail;
+ }
+
+ cctx->fs_data = ext2_fs;
+ cctx->blocksize = ext2_fs->blocksize;
+ cctx->block_count = ext2_fs->super->s_blocks_count;
+ cctx->total_bytes = ext2_fs->blocksize * ext2_fs->super->s_blocks_count;
+ cctx->volume_name = strndup(ext2_fs->super->s_volume_name, 16);
+ cctx->first_data_block = ext2_fs->super->s_first_data_block;
+ cctx->inodes_count = ext2_fs->super->s_inodes_count;
+ cctx->free_inodes_count = ext2_fs->super->s_free_inodes_count;
+ return 0;
+fail:
+ return -1;
+}
+
+static void ext2_close_fs(struct btrfs_convert_context *cctx)
+{
+ if (cctx->volume_name) {
+ free(cctx->volume_name);
+ cctx->volume_name = NULL;
+ }
+ ext2fs_close(cctx->fs_data);
+}
+
+static int ext2_alloc_block(struct btrfs_convert_context *cctx,
+ u64 goal, u64 *block_ret)
+{
+ ext2_filsys fs = cctx->fs_data;
+ blk_t block;
+
+ if (!ext2fs_new_block(fs, goal, NULL, &block)) {
+ ext2fs_fast_mark_block_bitmap(fs->block_map, block);
+ *block_ret = block;
+ return 0;
+ }
+ return -ENOSPC;
+}
+
+static int ext2_alloc_block_range(struct btrfs_convert_context *cctx, u64 goal,
+ int num, u64 *block_ret)
+{
+ ext2_filsys fs = cctx->fs_data;
+ blk_t block;
+ ext2fs_block_bitmap bitmap = fs->block_map;
+ blk_t start = ext2fs_get_block_bitmap_start(bitmap);
+ blk_t end = ext2fs_get_block_bitmap_end(bitmap);
+
+ for (block = max_t(u64, goal, start); block + num < end; block++) {
+ if (ext2fs_fast_test_block_bitmap_range(bitmap, block, num)) {
+ ext2fs_fast_mark_block_bitmap_range(bitmap, block,
+ num);
+ *block_ret = block;
+ return 0;
+ }
+ }
+ return -ENOSPC;
+}
+
+static void ext2_free_block(struct btrfs_convert_context *cctx, u64 block)
+{
+ ext2_filsys fs = cctx->fs_data;
+
+ BUG_ON(block != (blk_t)block);
+ ext2fs_fast_unmark_block_bitmap(fs->block_map, block);
+}
+
+static void ext2_free_block_range(struct btrfs_convert_context *cctx, u64 block, int num)
+{
+ ext2_filsys fs = cctx->fs_data;
+
+ BUG_ON(block != (blk_t)block);
+ ext2fs_fast_unmark_block_bitmap_range(fs->block_map, block, num);
+}
+
+static int cache_free_extents(struct btrfs_root *root,
+ struct btrfs_convert_context *cctx)
+
+{
+ int i, ret = 0;
+ blk_t block;
+ u64 bytenr;
+ u64 blocksize = cctx->blocksize;
+
+ block = cctx->first_data_block;
+ for (; block < cctx->block_count; block++) {
+ if (convert_test_block(cctx, block))
+ continue;
+ bytenr = block * blocksize;
+ ret = set_extent_dirty(&root->fs_info->free_space_cache,
+ bytenr, bytenr + blocksize - 1, 0);
+ BUG_ON(ret);
+ }
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ bytenr &= ~((u64)BTRFS_STRIPE_LEN - 1);
+ if (bytenr >= blocksize * cctx->block_count)
+ break;
+ clear_extent_dirty(&root->fs_info->free_space_cache, bytenr,
+ bytenr + BTRFS_STRIPE_LEN - 1, 0);
+ }
+
+ clear_extent_dirty(&root->fs_info->free_space_cache,
+ 0, BTRFS_SUPER_INFO_OFFSET - 1, 0);
+
+ return 0;
+}
+
+static int custom_alloc_extent(struct btrfs_root *root, u64 num_bytes,
+ u64 hint_byte, struct btrfs_key *ins,
+ int metadata)
+{
+ u64 start;
+ u64 end;
+ u64 last = hint_byte;
+ int ret;
+ int wrapped = 0;
+ struct btrfs_block_group_cache *cache;
+
+ while(1) {
+ ret = find_first_extent_bit(&root->fs_info->free_space_cache,
+ last, &start, &end, EXTENT_DIRTY);
+ if (ret) {
+ if (wrapped++ == 0) {
+ last = 0;
+ continue;
+ } else {
+ goto fail;
+ }
+ }
+
+ start = max(last, start);
+ last = end + 1;
+ if (last - start < num_bytes)
+ continue;
+
+ last = start + num_bytes;
+ if (test_range_bit(&root->fs_info->pinned_extents,
+ start, last - 1, EXTENT_DIRTY, 0))
+ continue;
+
+ cache = btrfs_lookup_block_group(root->fs_info, start);
+ BUG_ON(!cache);
+ if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM ||
+ last > cache->key.objectid + cache->key.offset) {
+ last = cache->key.objectid + cache->key.offset;
+ continue;
+ }
+
+ if (metadata) {
+ BUG_ON(num_bytes != root->nodesize);
+ if (check_crossing_stripes(start, num_bytes)) {
+ last = round_down(start + num_bytes,
+ BTRFS_STRIPE_LEN);
+ continue;
+ }
+ }
+ clear_extent_dirty(&root->fs_info->free_space_cache,
+ start, start + num_bytes - 1, 0);
+
+ ins->objectid = start;
+ ins->offset = num_bytes;
+ ins->type = BTRFS_EXTENT_ITEM_KEY;
+ return 0;
+ }
+fail:
+ fprintf(stderr, "not enough free space\n");
+ return -ENOSPC;
+}
+
+static int intersect_with_sb(u64 bytenr, u64 num_bytes)
+{
+ int i;
+ u64 offset;
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ offset = btrfs_sb_offset(i);
+ offset &= ~((u64)BTRFS_STRIPE_LEN - 1);
+
+ if (bytenr < offset + BTRFS_STRIPE_LEN &&
+ bytenr + num_bytes > offset)
+ return 1;
+ }
+ return 0;
+}
+
+static int custom_free_extent(struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes)
+{
+ return intersect_with_sb(bytenr, num_bytes);
+}
+
+static struct btrfs_extent_ops extent_ops = {
+ .alloc_extent = custom_alloc_extent,
+ .free_extent = custom_free_extent,
+};
+
+static int convert_insert_dirent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const char *name, size_t name_len,
+ u64 dir, u64 objectid,
+ u8 file_type, u64 index_cnt,
+ struct btrfs_inode_item *inode)
+{
+ int ret;
+ u64 inode_size;
+ struct btrfs_key location = {
+ .objectid = objectid,
+ .offset = 0,
+ .type = BTRFS_INODE_ITEM_KEY,
+ };
+
+ ret = btrfs_insert_dir_item(trans, root, name, name_len,
+ dir, &location, file_type, index_cnt);
+ if (ret)
+ return ret;
+ ret = btrfs_insert_inode_ref(trans, root, name, name_len,
+ objectid, dir, index_cnt);
+ if (ret)
+ return ret;
+ inode_size = btrfs_stack_inode_size(inode) + name_len * 2;
+ btrfs_set_stack_inode_size(inode, inode_size);
+
+ return 0;
+}
+
+struct dir_iterate_data {
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root;
+ struct btrfs_inode_item *inode;
+ u64 objectid;
+ u64 index_cnt;
+ u64 parent;
+ int errcode;
+};
+
+static u8 filetype_conversion_table[EXT2_FT_MAX] = {
+ [EXT2_FT_UNKNOWN] = BTRFS_FT_UNKNOWN,
+ [EXT2_FT_REG_FILE] = BTRFS_FT_REG_FILE,
+ [EXT2_FT_DIR] = BTRFS_FT_DIR,
+ [EXT2_FT_CHRDEV] = BTRFS_FT_CHRDEV,
+ [EXT2_FT_BLKDEV] = BTRFS_FT_BLKDEV,
+ [EXT2_FT_FIFO] = BTRFS_FT_FIFO,
+ [EXT2_FT_SOCK] = BTRFS_FT_SOCK,
+ [EXT2_FT_SYMLINK] = BTRFS_FT_SYMLINK,
+};
+
+static int dir_iterate_proc(ext2_ino_t dir, int entry,
+ struct ext2_dir_entry *dirent,
+ int offset, int blocksize,
+ char *buf,void *priv_data)
+{
+ int ret;
+ int file_type;
+ u64 objectid;
+ char dotdot[] = "..";
+ struct dir_iterate_data *idata = (struct dir_iterate_data *)priv_data;
+ int name_len;
+
+ name_len = dirent->name_len & 0xFF;
+
+ objectid = dirent->inode + INO_OFFSET;
+ if (!strncmp(dirent->name, dotdot, name_len)) {
+ if (name_len == 2) {
+ BUG_ON(idata->parent != 0);
+ idata->parent = objectid;
+ }
+ return 0;
+ }
+ if (dirent->inode < EXT2_GOOD_OLD_FIRST_INO)
+ return 0;
+
+ file_type = dirent->name_len >> 8;
+ BUG_ON(file_type > EXT2_FT_SYMLINK);
+
+ ret = convert_insert_dirent(idata->trans, idata->root, dirent->name,
+ name_len, idata->objectid, objectid,
+ filetype_conversion_table[file_type],
+ idata->index_cnt, idata->inode);
+ if (ret < 0) {
+ idata->errcode = ret;
+ return BLOCK_ABORT;
+ }
+
+ idata->index_cnt++;
+ return 0;
+}
+
+static int create_dir_entries(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ struct btrfs_inode_item *btrfs_inode,
+ ext2_filsys ext2_fs, ext2_ino_t ext2_ino)
+{
+ int ret;
+ errcode_t err;
+ struct dir_iterate_data data = {
+ .trans = trans,
+ .root = root,
+ .inode = btrfs_inode,
+ .objectid = objectid,
+ .index_cnt = 2,
+ .parent = 0,
+ .errcode = 0,
+ };
+
+ err = ext2fs_dir_iterate2(ext2_fs, ext2_ino, 0, NULL,
+ dir_iterate_proc, &data);
+ if (err)
+ goto error;
+ ret = data.errcode;
+ if (ret == 0 && data.parent == objectid) {
+ ret = btrfs_insert_inode_ref(trans, root, "..", 2,
+ objectid, objectid, 0);
+ }
+ return ret;
+error:
+ fprintf(stderr, "ext2fs_dir_iterate2: %s\n", error_message(err));
+ return -1;
+}
+
+static int read_disk_extent(struct btrfs_root *root, u64 bytenr,
+ u32 num_bytes, char *buffer)
+{
+ int ret;
+ struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
+
+ ret = pread(fs_devs->latest_bdev, buffer, num_bytes, bytenr);
+ if (ret != num_bytes)
+ goto fail;
+ ret = 0;
+fail:
+ if (ret > 0)
+ ret = -1;
+ return ret;
+}
+
+static int csum_disk_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 disk_bytenr, u64 num_bytes)
+{
+ u32 blocksize = root->sectorsize;
+ u64 offset;
+ char *buffer;
+ int ret = 0;
+
+ buffer = malloc(blocksize);
+ if (!buffer)
+ return -ENOMEM;
+ for (offset = 0; offset < num_bytes; offset += blocksize) {
+ ret = read_disk_extent(root, disk_bytenr + offset,
+ blocksize, buffer);
+ if (ret)
+ break;
+ ret = btrfs_csum_file_block(trans,
+ root->fs_info->csum_root,
+ disk_bytenr + num_bytes,
+ disk_bytenr + offset,
+ buffer, blocksize);
+ if (ret)
+ break;
+ }
+ free(buffer);
+ return ret;
+}
+
+struct blk_iterate_data {
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root;
+ struct btrfs_inode_item *inode;
+ u64 objectid;
+ u64 first_block;
+ u64 disk_block;
+ u64 num_blocks;
+ u64 boundary;
+ int checksum;
+ int errcode;
+};
+
+static void init_blk_iterate_data(struct blk_iterate_data *data,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_inode_item *inode,
+ u64 objectid, int checksum)
+{
+ data->trans = trans;
+ data->root = root;
+ data->inode = inode;
+ data->objectid = objectid;
+ data->first_block = 0;
+ data->disk_block = 0;
+ data->num_blocks = 0;
+ data->boundary = (u64)-1;
+ data->checksum = checksum;
+ data->errcode = 0;
+}
+
+static int record_file_blocks(struct blk_iterate_data *data,
+ u64 file_block, u64 disk_block, u64 num_blocks)
+{
+ int ret;
+ struct btrfs_root *root = data->root;
+ u64 file_pos = file_block * root->sectorsize;
+ u64 disk_bytenr = disk_block * root->sectorsize;
+ u64 num_bytes = num_blocks * root->sectorsize;
+ ret = btrfs_record_file_extent(data->trans, data->root,
+ data->objectid, data->inode, file_pos,
+ disk_bytenr, num_bytes);
+
+ if (ret || !data->checksum || disk_bytenr == 0)
+ return ret;
+
+ return csum_disk_extent(data->trans, data->root, disk_bytenr,
+ num_bytes);
+}
+
+static int block_iterate_proc(u64 disk_block, u64 file_block,
+ struct blk_iterate_data *idata)
+{
+ int ret = 0;
+ int sb_region;
+ int do_barrier;
+ struct btrfs_root *root = idata->root;
+ struct btrfs_block_group_cache *cache;
+ u64 bytenr = disk_block * root->sectorsize;
+
+ sb_region = intersect_with_sb(bytenr, root->sectorsize);
+ do_barrier = sb_region || disk_block >= idata->boundary;
+ if ((idata->num_blocks > 0 && do_barrier) ||
+ (file_block > idata->first_block + idata->num_blocks) ||
+ (disk_block != idata->disk_block + idata->num_blocks)) {
+ if (idata->num_blocks > 0) {
+ ret = record_file_blocks(idata, idata->first_block,
+ idata->disk_block,
+ idata->num_blocks);
+ if (ret)
+ goto fail;
+ idata->first_block += idata->num_blocks;
+ idata->num_blocks = 0;
+ }
+ if (file_block > idata->first_block) {
+ ret = record_file_blocks(idata, idata->first_block,
+ 0, file_block - idata->first_block);
+ if (ret)
+ goto fail;
+ }
+
+ if (sb_region) {
+ bytenr += BTRFS_STRIPE_LEN - 1;
+ bytenr &= ~((u64)BTRFS_STRIPE_LEN - 1);
+ } else {
+ cache = btrfs_lookup_block_group(root->fs_info, bytenr);
+ BUG_ON(!cache);
+ bytenr = cache->key.objectid + cache->key.offset;
+ }
+
+ idata->first_block = file_block;
+ idata->disk_block = disk_block;
+ idata->boundary = bytenr / root->sectorsize;
+ }
+ idata->num_blocks++;
+fail:
+ return ret;
+}
+
+static int __block_iterate_proc(ext2_filsys fs, blk_t *blocknr,
+ e2_blkcnt_t blockcnt, blk_t ref_block,
+ int ref_offset, void *priv_data)
+{
+ int ret;
+ struct blk_iterate_data *idata;
+ idata = (struct blk_iterate_data *)priv_data;
+ ret = block_iterate_proc(*blocknr, blockcnt, idata);
+ if (ret) {
+ idata->errcode = ret;
+ return BLOCK_ABORT;
+ }
+ return 0;
+}
+
+/*
+ * traverse file's data blocks, record these data blocks as file extents.
+ */
+static int create_file_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ struct btrfs_inode_item *btrfs_inode,
+ ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
+ int datacsum, int packing)
+{
+ int ret;
+ char *buffer = NULL;
+ errcode_t err;
+ u32 last_block;
+ u32 sectorsize = root->sectorsize;
+ u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
+ struct blk_iterate_data data;
+
+ init_blk_iterate_data(&data, trans, root, btrfs_inode, objectid,
+ datacsum);
+
+ err = ext2fs_block_iterate2(ext2_fs, ext2_ino, BLOCK_FLAG_DATA_ONLY,
+ NULL, __block_iterate_proc, &data);
+ if (err)
+ goto error;
+ ret = data.errcode;
+ if (ret)
+ goto fail;
+ if (packing && data.first_block == 0 && data.num_blocks > 0 &&
+ inode_size <= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
+ u64 num_bytes = data.num_blocks * sectorsize;
+ u64 disk_bytenr = data.disk_block * sectorsize;
+ u64 nbytes;
+
+ buffer = malloc(num_bytes);
+ if (!buffer)
+ return -ENOMEM;
+ ret = read_disk_extent(root, disk_bytenr, num_bytes, buffer);
+ if (ret)
+ goto fail;
+ if (num_bytes > inode_size)
+ num_bytes = inode_size;
+ ret = btrfs_insert_inline_extent(trans, root, objectid,
+ 0, buffer, num_bytes);
+ if (ret)
+ goto fail;
+ nbytes = btrfs_stack_inode_nbytes(btrfs_inode) + num_bytes;
+ btrfs_set_stack_inode_nbytes(btrfs_inode, nbytes);
+ } else if (data.num_blocks > 0) {
+ ret = record_file_blocks(&data, data.first_block,
+ data.disk_block, data.num_blocks);
+ if (ret)
+ goto fail;
+ }
+ data.first_block += data.num_blocks;
+ last_block = (inode_size + sectorsize - 1) / sectorsize;
+ if (last_block > data.first_block) {
+ ret = record_file_blocks(&data, data.first_block, 0,
+ last_block - data.first_block);
+ }
+fail:
+ free(buffer);
+ return ret;
+error:
+ fprintf(stderr, "ext2fs_block_iterate2: %s\n", error_message(err));
+ return -1;
+}
+
+static int create_symbol_link(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ struct btrfs_inode_item *btrfs_inode,
+ ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
+ struct ext2_inode *ext2_inode)
+{
+ int ret;
+ char *pathname;
+ u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
+ if (ext2fs_inode_data_blocks(ext2_fs, ext2_inode)) {
+ btrfs_set_stack_inode_size(btrfs_inode, inode_size + 1);
+ ret = create_file_extents(trans, root, objectid, btrfs_inode,
+ ext2_fs, ext2_ino, 1, 1);
+ btrfs_set_stack_inode_size(btrfs_inode, inode_size);
+ return ret;
+ }
+
+ pathname = (char *)&(ext2_inode->i_block[0]);
+ BUG_ON(pathname[inode_size] != 0);
+ ret = btrfs_insert_inline_extent(trans, root, objectid, 0,
+ pathname, inode_size + 1);
+ btrfs_set_stack_inode_nbytes(btrfs_inode, inode_size + 1);
+ return ret;
+}
+
+/*
+ * Following xattr/acl related codes are based on codes in
+ * fs/ext3/xattr.c and fs/ext3/acl.c
+ */
+#define EXT2_XATTR_BHDR(ptr) ((struct ext2_ext_attr_header *)(ptr))
+#define EXT2_XATTR_BFIRST(ptr) \
+ ((struct ext2_ext_attr_entry *)(EXT2_XATTR_BHDR(ptr) + 1))
+#define EXT2_XATTR_IHDR(inode) \
+ ((struct ext2_ext_attr_header *) ((void *)(inode) + \
+ EXT2_GOOD_OLD_INODE_SIZE + (inode)->i_extra_isize))
+#define EXT2_XATTR_IFIRST(inode) \
+ ((struct ext2_ext_attr_entry *) ((void *)EXT2_XATTR_IHDR(inode) + \
+ sizeof(EXT2_XATTR_IHDR(inode)->h_magic)))
+
+static int ext2_xattr_check_names(struct ext2_ext_attr_entry *entry,
+ const void *end)
+{
+ struct ext2_ext_attr_entry *next;
+
+ while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
+ next = EXT2_EXT_ATTR_NEXT(entry);
+ if ((void *)next >= end)
+ return -EIO;
+ entry = next;
+ }
+ return 0;
+}
+
+static int ext2_xattr_check_block(const char *buf, size_t size)
+{
+ int error;
+ struct ext2_ext_attr_header *header = EXT2_XATTR_BHDR(buf);
+
+ if (header->h_magic != EXT2_EXT_ATTR_MAGIC ||
+ header->h_blocks != 1)
+ return -EIO;
+ error = ext2_xattr_check_names(EXT2_XATTR_BFIRST(buf), buf + size);
+ return error;
+}
+
+static int ext2_xattr_check_entry(struct ext2_ext_attr_entry *entry,
+ size_t size)
+{
+ size_t value_size = entry->e_value_size;
+
+ if (entry->e_value_block != 0 || value_size > size ||
+ entry->e_value_offs + value_size > size)
+ return -EIO;
+ return 0;
+}
+
+#define EXT2_ACL_VERSION 0x0001
+
+/* 23.2.5 acl_tag_t values */
+
+#define ACL_UNDEFINED_TAG (0x00)
+#define ACL_USER_OBJ (0x01)
+#define ACL_USER (0x02)
+#define ACL_GROUP_OBJ (0x04)
+#define ACL_GROUP (0x08)
+#define ACL_MASK (0x10)
+#define ACL_OTHER (0x20)
+
+/* 23.2.7 ACL qualifier constants */
+
+#define ACL_UNDEFINED_ID ((id_t)-1)
+
+typedef struct {
+ __le16 e_tag;
+ __le16 e_perm;
+ __le32 e_id;
+} ext2_acl_entry;
+
+typedef struct {
+ __le16 e_tag;
+ __le16 e_perm;
+} ext2_acl_entry_short;
+
+typedef struct {
+ __le32 a_version;
+} ext2_acl_header;
+
+static inline int ext2_acl_count(size_t size)
+{
+ ssize_t s;
+ size -= sizeof(ext2_acl_header);
+ s = size - 4 * sizeof(ext2_acl_entry_short);
+ if (s < 0) {
+ if (size % sizeof(ext2_acl_entry_short))
+ return -1;
+ return size / sizeof(ext2_acl_entry_short);
+ } else {
+ if (s % sizeof(ext2_acl_entry))
+ return -1;
+ return s / sizeof(ext2_acl_entry) + 4;
+ }
+}
+
+#define ACL_EA_VERSION 0x0002
+
+typedef struct {
+ __le16 e_tag;
+ __le16 e_perm;
+ __le32 e_id;
+} acl_ea_entry;
+
+typedef struct {
+ __le32 a_version;
+ acl_ea_entry a_entries[0];
+} acl_ea_header;
+
+static inline size_t acl_ea_size(int count)
+{
+ return sizeof(acl_ea_header) + count * sizeof(acl_ea_entry);
+}
+
+static int ext2_acl_to_xattr(void *dst, const void *src,
+ size_t dst_size, size_t src_size)
+{
+ int i, count;
+ const void *end = src + src_size;
+ acl_ea_header *ext_acl = (acl_ea_header *)dst;
+ acl_ea_entry *dst_entry = ext_acl->a_entries;
+ ext2_acl_entry *src_entry;
+
+ if (src_size < sizeof(ext2_acl_header))
+ goto fail;
+ if (((ext2_acl_header *)src)->a_version !=
+ cpu_to_le32(EXT2_ACL_VERSION))
+ goto fail;
+ src += sizeof(ext2_acl_header);
+ count = ext2_acl_count(src_size);
+ if (count <= 0)
+ goto fail;
+
+ BUG_ON(dst_size < acl_ea_size(count));
+ ext_acl->a_version = cpu_to_le32(ACL_EA_VERSION);
+ for (i = 0; i < count; i++, dst_entry++) {
+ src_entry = (ext2_acl_entry *)src;
+ if (src + sizeof(ext2_acl_entry_short) > end)
+ goto fail;
+ dst_entry->e_tag = src_entry->e_tag;
+ dst_entry->e_perm = src_entry->e_perm;
+ switch (le16_to_cpu(src_entry->e_tag)) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ src += sizeof(ext2_acl_entry_short);
+ dst_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
+ break;
+ case ACL_USER:
+ case ACL_GROUP:
+ src += sizeof(ext2_acl_entry);
+ if (src > end)
+ goto fail;
+ dst_entry->e_id = src_entry->e_id;
+ break;
+ default:
+ goto fail;
+ }
+ }
+ if (src != end)
+ goto fail;
+ return 0;
+fail:
+ return -EINVAL;
+}
+
+static char *xattr_prefix_table[] = {
+ [1] = "user.",
+ [2] = "system.posix_acl_access",
+ [3] = "system.posix_acl_default",
+ [4] = "trusted.",
+ [6] = "security.",
+};
+
+static int copy_single_xattr(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ struct ext2_ext_attr_entry *entry,
+ const void *data, u32 datalen)
+{
+ int ret = 0;
+ int name_len;
+ int name_index;
+ void *databuf = NULL;
+ char namebuf[XATTR_NAME_MAX + 1];
+
+ name_index = entry->e_name_index;
+ if (name_index >= ARRAY_SIZE(xattr_prefix_table) ||
+ xattr_prefix_table[name_index] == NULL)
+ return -EOPNOTSUPP;
+ name_len = strlen(xattr_prefix_table[name_index]) +
+ entry->e_name_len;
+ if (name_len >= sizeof(namebuf))
+ return -ERANGE;
+
+ if (name_index == 2 || name_index == 3) {
+ size_t bufsize = acl_ea_size(ext2_acl_count(datalen));
+ databuf = malloc(bufsize);
+ if (!databuf)
+ return -ENOMEM;
+ ret = ext2_acl_to_xattr(databuf, data, bufsize, datalen);
+ if (ret)
+ goto out;
+ data = databuf;
+ datalen = bufsize;
+ }
+ strncpy(namebuf, xattr_prefix_table[name_index], XATTR_NAME_MAX);
+ strncat(namebuf, EXT2_EXT_ATTR_NAME(entry), entry->e_name_len);
+ if (name_len + datalen > BTRFS_LEAF_DATA_SIZE(root) -
+ sizeof(struct btrfs_item) - sizeof(struct btrfs_dir_item)) {
+ fprintf(stderr, "skip large xattr on inode %Lu name %.*s\n",
+ objectid - INO_OFFSET, name_len, namebuf);
+ goto out;
+ }
+ ret = btrfs_insert_xattr_item(trans, root, namebuf, name_len,
+ data, datalen, objectid);
+out:
+ free(databuf);
+ return ret;
+}
+
+static int copy_extended_attrs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ struct btrfs_inode_item *btrfs_inode,
+ ext2_filsys ext2_fs, ext2_ino_t ext2_ino)
+{
+ int ret = 0;
+ int inline_ea = 0;
+ errcode_t err;
+ u32 datalen;
+ u32 block_size = ext2_fs->blocksize;
+ u32 inode_size = EXT2_INODE_SIZE(ext2_fs->super);
+ struct ext2_inode_large *ext2_inode;
+ struct ext2_ext_attr_entry *entry;
+ void *data;
+ char *buffer = NULL;
+ char inode_buf[EXT2_GOOD_OLD_INODE_SIZE];
+
+ if (inode_size <= EXT2_GOOD_OLD_INODE_SIZE) {
+ ext2_inode = (struct ext2_inode_large *)inode_buf;
+ } else {
+ ext2_inode = (struct ext2_inode_large *)malloc(inode_size);
+ if (!ext2_inode)
+ return -ENOMEM;
+ }
+ err = ext2fs_read_inode_full(ext2_fs, ext2_ino, (void *)ext2_inode,
+ inode_size);
+ if (err) {
+ fprintf(stderr, "ext2fs_read_inode_full: %s\n",
+ error_message(err));
+ ret = -1;
+ goto out;
+ }
+
+ if (ext2_ino > ext2_fs->super->s_first_ino &&
+ inode_size > EXT2_GOOD_OLD_INODE_SIZE) {
+ if (EXT2_GOOD_OLD_INODE_SIZE +
+ ext2_inode->i_extra_isize > inode_size) {
+ ret = -EIO;
+ goto out;
+ }
+ if (ext2_inode->i_extra_isize != 0 &&
+ EXT2_XATTR_IHDR(ext2_inode)->h_magic ==
+ EXT2_EXT_ATTR_MAGIC) {
+ inline_ea = 1;
+ }
+ }
+ if (inline_ea) {
+ int total;
+ void *end = (void *)ext2_inode + inode_size;
+ entry = EXT2_XATTR_IFIRST(ext2_inode);
+ total = end - (void *)entry;
+ ret = ext2_xattr_check_names(entry, end);
+ if (ret)
+ goto out;
+ while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
+ ret = ext2_xattr_check_entry(entry, total);
+ if (ret)
+ goto out;
+ data = (void *)EXT2_XATTR_IFIRST(ext2_inode) +
+ entry->e_value_offs;
+ datalen = entry->e_value_size;
+ ret = copy_single_xattr(trans, root, objectid,
+ entry, data, datalen);
+ if (ret)
+ goto out;
+ entry = EXT2_EXT_ATTR_NEXT(entry);
+ }
+ }
+
+ if (ext2_inode->i_file_acl == 0)
+ goto out;
+
+ buffer = malloc(block_size);
+ if (!buffer) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ err = ext2fs_read_ext_attr(ext2_fs, ext2_inode->i_file_acl, buffer);
+ if (err) {
+ fprintf(stderr, "ext2fs_read_ext_attr: %s\n",
+ error_message(err));
+ ret = -1;
+ goto out;
+ }
+ ret = ext2_xattr_check_block(buffer, block_size);
+ if (ret)
+ goto out;
+
+ entry = EXT2_XATTR_BFIRST(buffer);
+ while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
+ ret = ext2_xattr_check_entry(entry, block_size);
+ if (ret)
+ goto out;
+ data = buffer + entry->e_value_offs;
+ datalen = entry->e_value_size;
+ ret = copy_single_xattr(trans, root, objectid,
+ entry, data, datalen);
+ if (ret)
+ goto out;
+ entry = EXT2_EXT_ATTR_NEXT(entry);
+ }
+out:
+ free(buffer);
+ if ((void *)ext2_inode != inode_buf)
+ free(ext2_inode);
+ return ret;
+}
+#define MINORBITS 20
+#define MKDEV(ma, mi) (((ma) << MINORBITS) | (mi))
+
+static inline dev_t old_decode_dev(u16 val)
+{
+ return MKDEV((val >> 8) & 255, val & 255);
+}
+
+static inline dev_t new_decode_dev(u32 dev)
+{
+ unsigned major = (dev & 0xfff00) >> 8;
+ unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
+ return MKDEV(major, minor);
+}
+
+static int copy_inode_item(struct btrfs_inode_item *dst,
+ struct ext2_inode *src, u32 blocksize)
+{
+ btrfs_set_stack_inode_generation(dst, 1);
+ btrfs_set_stack_inode_sequence(dst, 0);
+ btrfs_set_stack_inode_transid(dst, 1);
+ btrfs_set_stack_inode_size(dst, src->i_size);
+ btrfs_set_stack_inode_nbytes(dst, 0);
+ btrfs_set_stack_inode_block_group(dst, 0);
+ btrfs_set_stack_inode_nlink(dst, src->i_links_count);
+ btrfs_set_stack_inode_uid(dst, src->i_uid | (src->i_uid_high << 16));
+ btrfs_set_stack_inode_gid(dst, src->i_gid | (src->i_gid_high << 16));
+ btrfs_set_stack_inode_mode(dst, src->i_mode);
+ btrfs_set_stack_inode_rdev(dst, 0);
+ btrfs_set_stack_inode_flags(dst, 0);
+ btrfs_set_stack_timespec_sec(&dst->atime, src->i_atime);
+ btrfs_set_stack_timespec_nsec(&dst->atime, 0);
+ btrfs_set_stack_timespec_sec(&dst->ctime, src->i_ctime);
+ btrfs_set_stack_timespec_nsec(&dst->ctime, 0);
+ btrfs_set_stack_timespec_sec(&dst->mtime, src->i_mtime);
+ btrfs_set_stack_timespec_nsec(&dst->mtime, 0);
+ btrfs_set_stack_timespec_sec(&dst->otime, 0);
+ btrfs_set_stack_timespec_nsec(&dst->otime, 0);
+
+ if (S_ISDIR(src->i_mode)) {
+ btrfs_set_stack_inode_size(dst, 0);
+ btrfs_set_stack_inode_nlink(dst, 1);
+ }
+ if (S_ISREG(src->i_mode)) {
+ btrfs_set_stack_inode_size(dst, (u64)src->i_size_high << 32 |
+ (u64)src->i_size);
+ }
+ if (!S_ISREG(src->i_mode) && !S_ISDIR(src->i_mode) &&
+ !S_ISLNK(src->i_mode)) {
+ if (src->i_block[0]) {
+ btrfs_set_stack_inode_rdev(dst,
+ old_decode_dev(src->i_block[0]));
+ } else {
+ btrfs_set_stack_inode_rdev(dst,
+ new_decode_dev(src->i_block[1]));
+ }
+ }
+ memset(&dst->reserved, 0, sizeof(dst->reserved));
+
+ return 0;
+}
+
+/*
+ * copy a single inode. do all the required works, such as cloning
+ * inode item, creating file extents and creating directory entries.
+ */
+static int copy_single_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
+ struct ext2_inode *ext2_inode,
+ int datacsum, int packing, int noxattr)
+{
+ int ret;
+ struct btrfs_inode_item btrfs_inode;
+
+ if (ext2_inode->i_links_count == 0)
+ return 0;
+
+ copy_inode_item(&btrfs_inode, ext2_inode, ext2_fs->blocksize);
+ if (!datacsum && S_ISREG(ext2_inode->i_mode)) {
+ u32 flags = btrfs_stack_inode_flags(&btrfs_inode) |
+ BTRFS_INODE_NODATASUM;
+ btrfs_set_stack_inode_flags(&btrfs_inode, flags);
+ }
+
+ switch (ext2_inode->i_mode & S_IFMT) {
+ case S_IFREG:
+ ret = create_file_extents(trans, root, objectid, &btrfs_inode,
+ ext2_fs, ext2_ino, datacsum, packing);
+ break;
+ case S_IFDIR:
+ ret = create_dir_entries(trans, root, objectid, &btrfs_inode,
+ ext2_fs, ext2_ino);
+ break;
+ case S_IFLNK:
+ ret = create_symbol_link(trans, root, objectid, &btrfs_inode,
+ ext2_fs, ext2_ino, ext2_inode);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+ if (ret)
+ return ret;
+
+ if (!noxattr) {
+ ret = copy_extended_attrs(trans, root, objectid, &btrfs_inode,
+ ext2_fs, ext2_ino);
+ if (ret)
+ return ret;
+ }
+ return btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
+}
+
+static int copy_disk_extent(struct btrfs_root *root, u64 dst_bytenr,
+ u64 src_bytenr, u32 num_bytes)
+{
+ int ret;
+ char *buffer;
+ struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
+
+ buffer = malloc(num_bytes);
+ if (!buffer)
+ return -ENOMEM;
+ ret = pread(fs_devs->latest_bdev, buffer, num_bytes, src_bytenr);
+ if (ret != num_bytes)
+ goto fail;
+ ret = pwrite(fs_devs->latest_bdev, buffer, num_bytes, dst_bytenr);
+ if (ret != num_bytes)
+ goto fail;
+ ret = 0;
+fail:
+ free(buffer);
+ if (ret > 0)
+ ret = -1;
+ return ret;
+}
+/*
+ * scan ext2's inode bitmap and copy all used inodes.
+ */
+static int ext2_copy_inodes(struct btrfs_convert_context *cctx,
+ struct btrfs_root *root,
+ int datacsum, int packing, int noxattr, struct task_ctx *p)
+{
+ ext2_filsys ext2_fs = cctx->fs_data;
+ int ret;
+ errcode_t err;
+ ext2_inode_scan ext2_scan;
+ struct ext2_inode ext2_inode;
+ ext2_ino_t ext2_ino;
+ u64 objectid;
+ struct btrfs_trans_handle *trans;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (!trans)
+ return -ENOMEM;
+ err = ext2fs_open_inode_scan(ext2_fs, 0, &ext2_scan);
+ if (err) {
+ fprintf(stderr, "ext2fs_open_inode_scan: %s\n", error_message(err));
+ return -1;
+ }
+ while (!(err = ext2fs_get_next_inode(ext2_scan, &ext2_ino,
+ &ext2_inode))) {
+ /* no more inodes */
+ if (ext2_ino == 0)
+ break;
+ /* skip special inode in ext2fs */
+ if (ext2_ino < EXT2_GOOD_OLD_FIRST_INO &&
+ ext2_ino != EXT2_ROOT_INO)
+ continue;
+ objectid = ext2_ino + INO_OFFSET;
+ ret = copy_single_inode(trans, root,
+ objectid, ext2_fs, ext2_ino,
+ &ext2_inode, datacsum, packing,
+ noxattr);
+ p->cur_copy_inodes++;
+ if (ret)
+ return ret;
+ if (trans->blocks_used >= 4096) {
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+ trans = btrfs_start_transaction(root, 1);
+ BUG_ON(!trans);
+ }
+ }
+ if (err) {
+ fprintf(stderr, "ext2fs_get_next_inode: %s\n", error_message(err));
+ return -1;
+ }
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+ ext2fs_close_inode_scan(ext2_scan);
+
+ return ret;
+}
+
+static int ext2_test_block(struct btrfs_convert_context *cctx, u64 block)
+{
+ ext2_filsys ext2_fs = cctx->fs_data;
+
+ BUG_ON(block != (u32)block);
+ return ext2fs_fast_test_block_bitmap(ext2_fs->block_map, block);
+}
+
+/*
+ * Construct a range of ext2fs image file.
+ * scan block allocation bitmap, find all blocks used by the ext2fs
+ * in this range and create file extents that point to these blocks.
+ *
+ * Note: Before calling the function, no file extent points to blocks
+ * in this range
+ */
+static int create_image_file_range(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ struct btrfs_inode_item *inode,
+ u64 start_byte, u64 end_byte,
+ struct btrfs_convert_context *cctx, int datacsum)
+{
+ u32 blocksize = cctx->blocksize;
+ u32 block = start_byte / blocksize;
+ u32 last_block = (end_byte + blocksize - 1) / blocksize;
+ int ret = 0;
+ struct blk_iterate_data data;
+
+ init_blk_iterate_data(&data, trans, root, inode, objectid, datacsum);
+ data.first_block = block;
+
+ for (; start_byte < end_byte; block++, start_byte += blocksize) {
+ if (!convert_test_block(cctx, block))
+ continue;
+ ret = block_iterate_proc(block, block, &data);
+ if (ret < 0)
+ goto fail;
+ }
+ if (data.num_blocks > 0) {
+ ret = record_file_blocks(&data, data.first_block,
+ data.disk_block, data.num_blocks);
+ if (ret)
+ goto fail;
+ data.first_block += data.num_blocks;
+ }
+ if (last_block > data.first_block) {
+ ret = record_file_blocks(&data, data.first_block, 0,
+ last_block - data.first_block);
+ if (ret)
+ goto fail;
+ }
+fail:
+ return ret;
+}
+/*
+ * Create the fs image file.
+ */
+static int create_image(struct btrfs_convert_context *cctx,
+ struct btrfs_root *root, const char *name, int datacsum)
+{
+ int ret;
+ struct btrfs_key key;
+ struct btrfs_key location;
+ struct btrfs_path path;
+ struct btrfs_inode_item btrfs_inode;
+ struct btrfs_inode_item *inode_item;
+ struct extent_buffer *leaf;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_root *extent_root = fs_info->extent_root;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_extent_item *ei;
+ struct btrfs_extent_inline_ref *iref;
+ struct btrfs_extent_data_ref *dref;
+ u64 bytenr;
+ u64 num_bytes;
+ u64 objectid;
+ u64 last_byte;
+ u64 first_free;
+ u64 total_bytes;
+ u64 flags = BTRFS_INODE_READONLY;
+ u32 sectorsize = root->sectorsize;
+
+ total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
+ first_free = BTRFS_SUPER_INFO_OFFSET + sectorsize * 2 - 1;
+ first_free &= ~((u64)sectorsize - 1);
+ if (!datacsum)
+ flags |= BTRFS_INODE_NODATASUM;
+
+ memset(&btrfs_inode, 0, sizeof(btrfs_inode));
+ btrfs_set_stack_inode_generation(&btrfs_inode, 1);
+ btrfs_set_stack_inode_size(&btrfs_inode, total_bytes);
+ btrfs_set_stack_inode_nlink(&btrfs_inode, 1);
+ btrfs_set_stack_inode_nbytes(&btrfs_inode, 0);
+ btrfs_set_stack_inode_mode(&btrfs_inode, S_IFREG | 0400);
+ btrfs_set_stack_inode_flags(&btrfs_inode, flags);
+ btrfs_init_path(&path);
+ trans = btrfs_start_transaction(root, 1);
+ BUG_ON(!trans);
+
+ objectid = btrfs_root_dirid(&root->root_item);
+ ret = btrfs_find_free_objectid(trans, root, objectid, &objectid);
+ if (ret)
+ goto fail;
+
+ /*
+ * copy blocks covered by extent #0 to new positions. extent #0 is
+ * special, we can't rely on relocate_extents_range to relocate it.
+ */
+ for (last_byte = 0; last_byte < first_free; last_byte += sectorsize) {
+ ret = custom_alloc_extent(root, sectorsize, 0, &key, 0);
+ if (ret)
+ goto fail;
+ ret = copy_disk_extent(root, key.objectid, last_byte,
+ sectorsize);
+ if (ret)
+ goto fail;
+ ret = btrfs_record_file_extent(trans, root, objectid,
+ &btrfs_inode, last_byte,
+ key.objectid, sectorsize);
+ if (ret)
+ goto fail;
+ if (datacsum) {
+ ret = csum_disk_extent(trans, root, key.objectid,
+ sectorsize);
+ if (ret)
+ goto fail;
+ }
+ }
+
+ while(1) {
+ key.objectid = last_byte;
+ key.offset = 0;
+ btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+ ret = btrfs_search_slot(trans, fs_info->extent_root,
+ &key, &path, 0, 0);
+ if (ret < 0)
+ goto fail;
+next:
+ leaf = path.nodes[0];
+ if (path.slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(extent_root, &path);
+ if (ret < 0)
+ goto fail;
+ if (ret > 0)
+ break;
+ leaf = path.nodes[0];
+ }
+ btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+ if (last_byte > key.objectid ||
+ key.type != BTRFS_EXTENT_ITEM_KEY) {
+ path.slots[0]++;
+ goto next;
+ }
+
+ bytenr = key.objectid;
+ num_bytes = key.offset;
+ ei = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_extent_item);
+ if (!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_DATA)) {
+ path.slots[0]++;
+ goto next;
+ }
+
+ BUG_ON(btrfs_item_size_nr(leaf, path.slots[0]) != sizeof(*ei) +
+ btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY));
+
+ iref = (struct btrfs_extent_inline_ref *)(ei + 1);
+ key.type = btrfs_extent_inline_ref_type(leaf, iref);
+ BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY);
+ dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+ if (btrfs_extent_data_ref_root(leaf, dref) !=
+ BTRFS_FS_TREE_OBJECTID) {
+ path.slots[0]++;
+ goto next;
+ }
+
+ if (bytenr > last_byte) {
+ ret = create_image_file_range(trans, root, objectid,
+ &btrfs_inode, last_byte,
+ bytenr, cctx,
+ datacsum);
+ if (ret)
+ goto fail;
+ }
+ ret = btrfs_record_file_extent(trans, root, objectid,
+ &btrfs_inode, bytenr, bytenr,
+ num_bytes);
+ if (ret)
+ goto fail;
+ last_byte = bytenr + num_bytes;
+ btrfs_release_path(&path);
+
+ if (trans->blocks_used >= 4096) {
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+ trans = btrfs_start_transaction(root, 1);
+ BUG_ON(!trans);
+ }
+ }
+ btrfs_release_path(&path);
+ if (total_bytes > last_byte) {
+ ret = create_image_file_range(trans, root, objectid,
+ &btrfs_inode, last_byte,
+ total_bytes, cctx,
+ datacsum);
+ if (ret)
+ goto fail;
+ }
+
+ ret = btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
+ if (ret)
+ goto fail;
+
+ location.objectid = objectid;
+ location.offset = 0;
+ btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
+ ret = btrfs_insert_dir_item(trans, root, name, strlen(name),
+ btrfs_root_dirid(&root->root_item),
+ &location, BTRFS_FT_REG_FILE, objectid);
+ if (ret)
+ goto fail;
+ ret = btrfs_insert_inode_ref(trans, root, name, strlen(name),
+ objectid,
+ btrfs_root_dirid(&root->root_item),
+ objectid);
+ if (ret)
+ goto fail;
+ location.objectid = btrfs_root_dirid(&root->root_item);
+ location.offset = 0;
+ btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
+ ret = btrfs_lookup_inode(trans, root, &path, &location, 1);
+ if (ret)
+ goto fail;
+ leaf = path.nodes[0];
+ inode_item = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_inode_item);
+ btrfs_set_inode_size(leaf, inode_item, strlen(name) * 2 +
+ btrfs_inode_size(leaf, inode_item));
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(&path);
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+fail:
+ btrfs_release_path(&path);
+ return ret;
+}
+
+static struct btrfs_root * link_subvol(struct btrfs_root *root,
+ const char *base, u64 root_objectid)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_root *new_root = NULL;
+ struct btrfs_path *path;
+ struct btrfs_inode_item *inode_item;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ u64 dirid = btrfs_root_dirid(&root->root_item);
+ u64 index = 2;
+ char buf[BTRFS_NAME_LEN + 1]; /* for snprintf null */
+ int len;
+ int i;
+ int ret;
+
+ len = strlen(base);
+ if (len == 0 || len > BTRFS_NAME_LEN)
+ return NULL;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+
+ key.objectid = dirid;
+ key.type = BTRFS_DIR_INDEX_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ BUG_ON(ret <= 0);
+
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.objectid == dirid && key.type == BTRFS_DIR_INDEX_KEY)
+ index = key.offset + 1;
+ }
+ btrfs_release_path(path);
+
+ trans = btrfs_start_transaction(root, 1);
+ BUG_ON(!trans);
+
+ key.objectid = dirid;
+ key.offset = 0;
+ key.type = BTRFS_INODE_ITEM_KEY;
+
+ ret = btrfs_lookup_inode(trans, root, path, &key, 1);
+ BUG_ON(ret);
+ leaf = path->nodes[0];
+ inode_item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_inode_item);
+
+ key.objectid = root_objectid;
+ key.offset = (u64)-1;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+
+ memcpy(buf, base, len);
+ for (i = 0; i < 1024; i++) {
+ ret = btrfs_insert_dir_item(trans, root, buf, len,
+ dirid, &key, BTRFS_FT_DIR, index);
+ if (ret != -EEXIST)
+ break;
+ len = snprintf(buf, ARRAY_SIZE(buf), "%s%d", base, i);
+ if (len < 1 || len > BTRFS_NAME_LEN) {
+ ret = -EINVAL;
+ break;
+ }
+ }
+ if (ret)
+ goto fail;
+
+ btrfs_set_inode_size(leaf, inode_item, len * 2 +
+ btrfs_inode_size(leaf, inode_item));
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(path);
+
+ /* add the backref first */
+ ret = btrfs_add_root_ref(trans, tree_root, root_objectid,
+ BTRFS_ROOT_BACKREF_KEY,
+ root->root_key.objectid,
+ dirid, index, buf, len);
+ BUG_ON(ret);
+
+ /* now add the forward ref */
+ ret = btrfs_add_root_ref(trans, tree_root, root->root_key.objectid,
+ BTRFS_ROOT_REF_KEY, root_objectid,
+ dirid, index, buf, len);
+
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+
+ new_root = btrfs_read_fs_root(fs_info, &key);
+ if (IS_ERR(new_root))
+ new_root = NULL;
+fail:
+ btrfs_free_path(path);
+ return new_root;
+}
+
+static int create_chunk_mapping(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_root *chunk_root = info->chunk_root;
+ struct btrfs_root *extent_root = info->extent_root;
+ struct btrfs_device *device;
+ struct btrfs_block_group_cache *cache;
+ struct btrfs_dev_extent *extent;
+ struct extent_buffer *leaf;
+ struct btrfs_chunk chunk;
+ struct btrfs_key key;
+ struct btrfs_path path;
+ u64 cur_start;
+ u64 total_bytes;
+ u64 chunk_objectid;
+ int ret;
+
+ btrfs_init_path(&path);
+
+ total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
+ chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+
+ BUG_ON(list_empty(&info->fs_devices->devices));
+ device = list_entry(info->fs_devices->devices.next,
+ struct btrfs_device, dev_list);
+ BUG_ON(device->devid != info->fs_devices->latest_devid);
+
+ /* delete device extent created by make_btrfs */
+ key.objectid = device->devid;
+ key.offset = 0;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+ ret = btrfs_search_slot(trans, device->dev_root, &key, &path, -1, 1);
+ if (ret < 0)
+ goto err;
+
+ BUG_ON(ret > 0);
+ ret = btrfs_del_item(trans, device->dev_root, &path);
+ if (ret)
+ goto err;
+ btrfs_release_path(&path);
+
+ /* delete chunk item created by make_btrfs */
+ key.objectid = chunk_objectid;
+ key.offset = 0;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+ ret = btrfs_search_slot(trans, chunk_root, &key, &path, -1, 1);
+ if (ret < 0)
+ goto err;
+
+ BUG_ON(ret > 0);
+ ret = btrfs_del_item(trans, chunk_root, &path);
+ if (ret)
+ goto err;
+ btrfs_release_path(&path);
+
+ /* for each block group, create device extent and chunk item */
+ cur_start = 0;
+ while (cur_start < total_bytes) {
+ cache = btrfs_lookup_block_group(root->fs_info, cur_start);
+ BUG_ON(!cache);
+
+ /* insert device extent */
+ key.objectid = device->devid;
+ key.offset = cache->key.objectid;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+ ret = btrfs_insert_empty_item(trans, device->dev_root, &path,
+ &key, sizeof(*extent));
+ if (ret)
+ goto err;
+
+ leaf = path.nodes[0];
+ extent = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_dev_extent);
+
+ btrfs_set_dev_extent_chunk_tree(leaf, extent,
+ chunk_root->root_key.objectid);
+ btrfs_set_dev_extent_chunk_objectid(leaf, extent,
+ chunk_objectid);
+ btrfs_set_dev_extent_chunk_offset(leaf, extent,
+ cache->key.objectid);
+ btrfs_set_dev_extent_length(leaf, extent, cache->key.offset);
+ write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
+ (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
+ BTRFS_UUID_SIZE);
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(&path);
+
+ /* insert chunk item */
+ btrfs_set_stack_chunk_length(&chunk, cache->key.offset);
+ btrfs_set_stack_chunk_owner(&chunk,
+ extent_root->root_key.objectid);
+ btrfs_set_stack_chunk_stripe_len(&chunk, BTRFS_STRIPE_LEN);
+ btrfs_set_stack_chunk_type(&chunk, cache->flags);
+ btrfs_set_stack_chunk_io_align(&chunk, device->io_align);
+ btrfs_set_stack_chunk_io_width(&chunk, device->io_width);
+ btrfs_set_stack_chunk_sector_size(&chunk, device->sector_size);
+ btrfs_set_stack_chunk_num_stripes(&chunk, 1);
+ btrfs_set_stack_chunk_sub_stripes(&chunk, 0);
+ btrfs_set_stack_stripe_devid(&chunk.stripe, device->devid);
+ btrfs_set_stack_stripe_offset(&chunk.stripe,
+ cache->key.objectid);
+ memcpy(&chunk.stripe.dev_uuid, device->uuid, BTRFS_UUID_SIZE);
+
+ key.objectid = chunk_objectid;
+ key.offset = cache->key.objectid;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+
+ ret = btrfs_insert_item(trans, chunk_root, &key, &chunk,
+ btrfs_chunk_item_size(1));
+ if (ret)
+ goto err;
+
+ cur_start = cache->key.objectid + cache->key.offset;
+ }
+
+ device->bytes_used = total_bytes;
+ ret = btrfs_update_device(trans, device);
+err:
+ btrfs_release_path(&path);
+ return ret;
+}
+
+static int create_subvol(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 root_objectid)
+{
+ struct extent_buffer *tmp;
+ struct btrfs_root *new_root;
+ struct btrfs_key key;
+ struct btrfs_root_item root_item;
+ int ret;
+
+ ret = btrfs_copy_root(trans, root, root->node, &tmp,
+ root_objectid);
+ BUG_ON(ret);
+
+ memcpy(&root_item, &root->root_item, sizeof(root_item));
+ btrfs_set_root_bytenr(&root_item, tmp->start);
+ btrfs_set_root_level(&root_item, btrfs_header_level(tmp));
+ btrfs_set_root_generation(&root_item, trans->transid);
+ free_extent_buffer(tmp);
+
+ key.objectid = root_objectid;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = trans->transid;
+ ret = btrfs_insert_root(trans, root->fs_info->tree_root,
+ &key, &root_item);
+
+ key.offset = (u64)-1;
+ new_root = btrfs_read_fs_root(root->fs_info, &key);
+ BUG_ON(!new_root || IS_ERR(new_root));
+
+ ret = btrfs_make_root_dir(trans, new_root, BTRFS_FIRST_FREE_OBJECTID);
+ BUG_ON(ret);
+
+ return 0;
+}
+
+static int init_btrfs(struct btrfs_root *root)
+{
+ int ret;
+ struct btrfs_key location;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct extent_buffer *tmp;
+
+ trans = btrfs_start_transaction(root, 1);
+ BUG_ON(!trans);
+ ret = btrfs_make_block_groups(trans, root);
+ if (ret)
+ goto err;
+ ret = btrfs_fix_block_accounting(trans, root);
+ if (ret)
+ goto err;
+ ret = create_chunk_mapping(trans, root);
+ if (ret)
+ goto err;
+ ret = btrfs_make_root_dir(trans, fs_info->tree_root,
+ BTRFS_ROOT_TREE_DIR_OBJECTID);
+ if (ret)
+ goto err;
+ memcpy(&location, &root->root_key, sizeof(location));
+ location.offset = (u64)-1;
+ ret = btrfs_insert_dir_item(trans, fs_info->tree_root, "default", 7,
+ btrfs_super_root_dir(fs_info->super_copy),
+ &location, BTRFS_FT_DIR, 0);
+ if (ret)
+ goto err;
+ ret = btrfs_insert_inode_ref(trans, fs_info->tree_root, "default", 7,
+ location.objectid,
+ btrfs_super_root_dir(fs_info->super_copy), 0);
+ if (ret)
+ goto err;
+ btrfs_set_root_dirid(&fs_info->fs_root->root_item,
+ BTRFS_FIRST_FREE_OBJECTID);
+
+ /* subvol for fs image file */
+ ret = create_subvol(trans, root, CONV_IMAGE_SUBVOL_OBJECTID);
+ BUG_ON(ret);
+ /* subvol for data relocation */
+ ret = create_subvol(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
+ BUG_ON(ret);
+
+ extent_buffer_get(fs_info->csum_root->node);
+ ret = __btrfs_cow_block(trans, fs_info->csum_root,
+ fs_info->csum_root->node, NULL, 0, &tmp, 0, 0);
+ BUG_ON(ret);
+ free_extent_buffer(tmp);
+
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+err:
+ return ret;
+}
+
+/*
+ * Migrate super block to its default position and zero 0 ~ 16k
+ */
+static int migrate_super_block(int fd, u64 old_bytenr, u32 sectorsize)
+{
+ int ret;
+ struct extent_buffer *buf;
+ struct btrfs_super_block *super;
+ u32 len;
+ u32 bytenr;
+
+ BUG_ON(sectorsize < sizeof(*super));
+ buf = malloc(sizeof(*buf) + sectorsize);
+ if (!buf)
+ return -ENOMEM;
+
+ buf->len = sectorsize;
+ ret = pread(fd, buf->data, sectorsize, old_bytenr);
+ if (ret != sectorsize)
+ goto fail;
+
+ super = (struct btrfs_super_block *)buf->data;
+ BUG_ON(btrfs_super_bytenr(super) != old_bytenr);
+ btrfs_set_super_bytenr(super, BTRFS_SUPER_INFO_OFFSET);
+
+ csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
+ ret = pwrite(fd, buf->data, sectorsize, BTRFS_SUPER_INFO_OFFSET);
+ if (ret != sectorsize)
+ goto fail;
+
+ ret = fsync(fd);
+ if (ret)
+ goto fail;
+
+ memset(buf->data, 0, sectorsize);
+ for (bytenr = 0; bytenr < BTRFS_SUPER_INFO_OFFSET; ) {
+ len = BTRFS_SUPER_INFO_OFFSET - bytenr;
+ if (len > sectorsize)
+ len = sectorsize;
+ ret = pwrite(fd, buf->data, len, bytenr);
+ if (ret != len) {
+ fprintf(stderr, "unable to zero fill device\n");
+ break;
+ }
+ bytenr += len;
+ }
+ ret = 0;
+ fsync(fd);
+fail:
+ free(buf);
+ if (ret > 0)
+ ret = -1;
+ return ret;
+}
+
+static int prepare_system_chunk_sb(struct btrfs_super_block *super)
+{
+ struct btrfs_chunk *chunk;
+ struct btrfs_disk_key *key;
+ u32 sectorsize = btrfs_super_sectorsize(super);
+
+ key = (struct btrfs_disk_key *)(super->sys_chunk_array);
+ chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
+ sizeof(struct btrfs_disk_key));
+
+ btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
+ btrfs_set_disk_key_offset(key, 0);
+
+ btrfs_set_stack_chunk_length(chunk, btrfs_super_total_bytes(super));
+ btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
+ btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
+ btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
+ btrfs_set_stack_chunk_io_align(chunk, sectorsize);
+ btrfs_set_stack_chunk_io_width(chunk, sectorsize);
+ btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
+ btrfs_set_stack_chunk_num_stripes(chunk, 1);
+ btrfs_set_stack_chunk_sub_stripes(chunk, 0);
+ chunk->stripe.devid = super->dev_item.devid;
+ btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
+ memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
+ btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
+ return 0;
+}
+
+static int prepare_system_chunk(int fd, u64 sb_bytenr)
+{
+ int ret;
+ struct extent_buffer *buf;
+ struct btrfs_super_block *super;
+
+ BUG_ON(BTRFS_SUPER_INFO_SIZE < sizeof(*super));
+ buf = malloc(sizeof(*buf) + BTRFS_SUPER_INFO_SIZE);
+ if (!buf)
+ return -ENOMEM;
+
+ buf->len = BTRFS_SUPER_INFO_SIZE;
+ ret = pread(fd, buf->data, BTRFS_SUPER_INFO_SIZE, sb_bytenr);
+ if (ret != BTRFS_SUPER_INFO_SIZE)
+ goto fail;
+
+ super = (struct btrfs_super_block *)buf->data;
+ BUG_ON(btrfs_super_bytenr(super) != sb_bytenr);
+ BUG_ON(btrfs_super_num_devices(super) != 1);
+
+ ret = prepare_system_chunk_sb(super);
+ if (ret)
+ goto fail;
+
+ csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
+ ret = pwrite(fd, buf->data, BTRFS_SUPER_INFO_SIZE, sb_bytenr);
+ if (ret != BTRFS_SUPER_INFO_SIZE)
+ goto fail;
+
+ ret = 0;
+fail:
+ free(buf);
+ if (ret > 0)
+ ret = -1;
+ return ret;
+}
+
+static int relocate_one_reference(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 extent_start, u64 extent_size,
+ struct btrfs_key *extent_key,
+ struct extent_io_tree *reloc_tree)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_key key;
+ struct btrfs_path path;
+ struct btrfs_inode_item inode;
+ struct blk_iterate_data data;
+ u64 bytenr;
+ u64 num_bytes;
+ u64 cur_offset;
+ u64 new_pos;
+ u64 nbytes;
+ u64 sector_end;
+ u32 sectorsize = root->sectorsize;
+ unsigned long ptr;
+ int datacsum;
+ int fd;
+ int ret;
+
+ btrfs_init_path(&path);
+ ret = btrfs_search_slot(trans, root, extent_key, &path, -1, 1);
+ if (ret)
+ goto fail;
+
+ leaf = path.nodes[0];
+ fi = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_file_extent_item);
+ BUG_ON(btrfs_file_extent_offset(leaf, fi) > 0);
+ if (extent_start != btrfs_file_extent_disk_bytenr(leaf, fi) ||
+ extent_size != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
+ ret = 1;
+ goto fail;
+ }
+
+ bytenr = extent_start + btrfs_file_extent_offset(leaf, fi);
+ num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
+
+ ret = btrfs_del_item(trans, root, &path);
+ if (ret)
+ goto fail;
+
+ ret = btrfs_free_extent(trans, root, extent_start, extent_size, 0,
+ root->root_key.objectid,
+ extent_key->objectid, extent_key->offset);
+ if (ret)
+ goto fail;
+
+ btrfs_release_path(&path);
+
+ key.objectid = extent_key->objectid;
+ key.offset = 0;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ ret = btrfs_lookup_inode(trans, root, &path, &key, 0);
+ if (ret)
+ goto fail;
+
+ leaf = path.nodes[0];
+ ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
+ read_extent_buffer(leaf, &inode, ptr, sizeof(inode));
+ btrfs_release_path(&path);
+
+ BUG_ON(num_bytes & (sectorsize - 1));
+ nbytes = btrfs_stack_inode_nbytes(&inode) - num_bytes;
+ btrfs_set_stack_inode_nbytes(&inode, nbytes);
+ datacsum = !(btrfs_stack_inode_flags(&inode) & BTRFS_INODE_NODATASUM);
+
+ init_blk_iterate_data(&data, trans, root, &inode, extent_key->objectid,
+ datacsum);
+ data.first_block = extent_key->offset;
+
+ cur_offset = extent_key->offset;
+ while (num_bytes > 0) {
+ sector_end = bytenr + sectorsize - 1;
+ if (test_range_bit(reloc_tree, bytenr, sector_end,
+ EXTENT_LOCKED, 1)) {
+ ret = get_state_private(reloc_tree, bytenr, &new_pos);
+ BUG_ON(ret);
+ } else {
+ ret = custom_alloc_extent(root, sectorsize, 0, &key, 0);
+ if (ret)
+ goto fail;
+ new_pos = key.objectid;
+
+ if (cur_offset == extent_key->offset) {
+ fd = root->fs_info->fs_devices->latest_bdev;
+ readahead(fd, bytenr, num_bytes);
+ }
+ ret = copy_disk_extent(root, new_pos, bytenr,
+ sectorsize);
+ if (ret)
+ goto fail;
+ ret = set_extent_bits(reloc_tree, bytenr, sector_end,
+ EXTENT_LOCKED, GFP_NOFS);
+ BUG_ON(ret);
+ ret = set_state_private(reloc_tree, bytenr, new_pos);
+ BUG_ON(ret);
+ }
+
+ ret = block_iterate_proc(new_pos / sectorsize,
+ cur_offset / sectorsize, &data);
+ if (ret < 0)
+ goto fail;
+
+ cur_offset += sectorsize;
+ bytenr += sectorsize;
+ num_bytes -= sectorsize;
+ }
+
+ if (data.num_blocks > 0) {
+ ret = record_file_blocks(&data, data.first_block,
+ data.disk_block, data.num_blocks);
+ if (ret)
+ goto fail;
+ }
+
+ key.objectid = extent_key->objectid;
+ key.offset = 0;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ ret = btrfs_lookup_inode(trans, root, &path, &key, 1);
+ if (ret)
+ goto fail;
+
+ leaf = path.nodes[0];
+ ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
+ write_extent_buffer(leaf, &inode, ptr, sizeof(inode));
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(&path);
+
+fail:
+ btrfs_release_path(&path);
+ return ret;
+}
+
+static int relocate_extents_range(struct btrfs_root *fs_root,
+ struct btrfs_root *image_root,
+ u64 start_byte, u64 end_byte)
+{
+ struct btrfs_fs_info *info = fs_root->fs_info;
+ struct btrfs_root *extent_root = info->extent_root;
+ struct btrfs_root *cur_root = NULL;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_extent_data_ref *dref;
+ struct btrfs_extent_inline_ref *iref;
+ struct btrfs_extent_item *ei;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ struct btrfs_key extent_key;
+ struct btrfs_path path;
+ struct extent_io_tree reloc_tree;
+ unsigned long ptr;
+ unsigned long end;
+ u64 cur_byte;
+ u64 num_bytes;
+ u64 ref_root;
+ u64 num_extents;
+ int pass = 0;
+ int ret;
+
+ btrfs_init_path(&path);
+ extent_io_tree_init(&reloc_tree);
+
+ key.objectid = start_byte;
+ key.offset = 0;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
+ if (ret < 0)
+ goto fail;
+ if (ret > 0) {
+ ret = btrfs_previous_item(extent_root, &path, 0,
+ BTRFS_EXTENT_ITEM_KEY);
+ if (ret < 0)
+ goto fail;
+ if (ret == 0) {
+ leaf = path.nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+ if (key.objectid + key.offset > start_byte)
+ start_byte = key.objectid;
+ }
+ }
+ btrfs_release_path(&path);
+again:
+ cur_root = (pass % 2 == 0) ? image_root : fs_root;
+ num_extents = 0;
+
+ trans = btrfs_start_transaction(cur_root, 1);
+ BUG_ON(!trans);
+
+ cur_byte = start_byte;
+ while (1) {
+ key.objectid = cur_byte;
+ key.offset = 0;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ ret = btrfs_search_slot(trans, extent_root,
+ &key, &path, 0, 0);
+ if (ret < 0)
+ goto fail;
+next:
+ leaf = path.nodes[0];
+ if (path.slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(extent_root, &path);
+ if (ret < 0)
+ goto fail;
+ if (ret > 0)
+ break;
+ leaf = path.nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+ if (key.objectid < cur_byte ||
+ key.type != BTRFS_EXTENT_ITEM_KEY) {
+ path.slots[0]++;
+ goto next;
+ }
+ if (key.objectid >= end_byte)
+ break;
+
+ num_extents++;
+
+ cur_byte = key.objectid;
+ num_bytes = key.offset;
+ ei = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_extent_item);
+ BUG_ON(!(btrfs_extent_flags(leaf, ei) &
+ BTRFS_EXTENT_FLAG_DATA));
+
+ ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
+ end = ptr + btrfs_item_size_nr(leaf, path.slots[0]);
+
+ ptr += sizeof(struct btrfs_extent_item);
+
+ while (ptr < end) {
+ iref = (struct btrfs_extent_inline_ref *)ptr;
+ key.type = btrfs_extent_inline_ref_type(leaf, iref);
+ BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY);
+ dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+ ref_root = btrfs_extent_data_ref_root(leaf, dref);
+ extent_key.objectid =
+ btrfs_extent_data_ref_objectid(leaf, dref);
+ extent_key.offset =
+ btrfs_extent_data_ref_offset(leaf, dref);
+ extent_key.type = BTRFS_EXTENT_DATA_KEY;
+ BUG_ON(btrfs_extent_data_ref_count(leaf, dref) != 1);
+
+ if (ref_root == cur_root->root_key.objectid)
+ break;
+
+ ptr += btrfs_extent_inline_ref_size(key.type);
+ }
+
+ if (ptr >= end) {
+ path.slots[0]++;
+ goto next;
+ }
+
+ ret = relocate_one_reference(trans, cur_root, cur_byte,
+ num_bytes, &extent_key,
+ &reloc_tree);
+ if (ret < 0)
+ goto fail;
+
+ cur_byte += num_bytes;
+ btrfs_release_path(&path);
+
+ if (trans->blocks_used >= 4096) {
+ ret = btrfs_commit_transaction(trans, cur_root);
+ BUG_ON(ret);
+ trans = btrfs_start_transaction(cur_root, 1);
+ BUG_ON(!trans);
+ }
+ }
+ btrfs_release_path(&path);
+
+ ret = btrfs_commit_transaction(trans, cur_root);
+ BUG_ON(ret);
+
+ if (num_extents > 0 && pass++ < 16)
+ goto again;
+
+ ret = (num_extents > 0) ? -1 : 0;
+fail:
+ btrfs_release_path(&path);
+ extent_io_tree_cleanup(&reloc_tree);
+ return ret;
+}
+
+/*
+ * relocate data in system chunk
+ */
+static int cleanup_sys_chunk(struct btrfs_root *fs_root,
+ struct btrfs_root *image_root)
+{
+ struct btrfs_block_group_cache *cache;
+ int i, ret = 0;
+ u64 offset = 0;
+ u64 end_byte;
+
+ while(1) {
+ cache = btrfs_lookup_block_group(fs_root->fs_info, offset);
+ if (!cache)
+ break;
+
+ end_byte = cache->key.objectid + cache->key.offset;
+ if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
+ ret = relocate_extents_range(fs_root, image_root,
+ cache->key.objectid,
+ end_byte);
+ if (ret)
+ goto fail;
+ }
+ offset = end_byte;
+ }
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ offset = btrfs_sb_offset(i);
+ offset &= ~((u64)BTRFS_STRIPE_LEN - 1);
+
+ ret = relocate_extents_range(fs_root, image_root,
+ offset, offset + BTRFS_STRIPE_LEN);
+ if (ret)
+ goto fail;
+ }
+ ret = 0;
+fail:
+ return ret;
+}
+
+static int fixup_chunk_mapping(struct btrfs_root *root)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_root *chunk_root = info->chunk_root;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ struct btrfs_path path;
+ struct btrfs_chunk chunk;
+ unsigned long ptr;
+ u32 size;
+ u64 type;
+ int ret;
+
+ btrfs_init_path(&path);
+
+ trans = btrfs_start_transaction(root, 1);
+ BUG_ON(!trans);
+
+ /*
+ * recow the whole chunk tree. this will move all chunk tree blocks
+ * into system block group.
+ */
+ memset(&key, 0, sizeof(key));
+ while (1) {
+ ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
+ if (ret < 0)
+ goto err;
+
+ ret = btrfs_next_leaf(chunk_root, &path);
+ if (ret < 0)
+ goto err;
+ if (ret > 0)
+ break;
+
+ btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+ btrfs_release_path(&path);
+ }
+ btrfs_release_path(&path);
+
+ /* fixup the system chunk array in super block */
+ btrfs_set_super_sys_array_size(info->super_copy, 0);
+
+ key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+ key.offset = 0;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+
+ ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 0);
+ if (ret < 0)
+ goto err;
+ BUG_ON(ret != 0);
+ while(1) {
+ leaf = path.nodes[0];
+ if (path.slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(chunk_root, &path);
+ if (ret < 0)
+ goto err;
+ if (ret > 0)
+ break;
+ leaf = path.nodes[0];
+ }
+ btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+ if (key.type != BTRFS_CHUNK_ITEM_KEY)
+ goto next;
+
+ ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
+ size = btrfs_item_size_nr(leaf, path.slots[0]);
+ BUG_ON(size != sizeof(chunk));
+ read_extent_buffer(leaf, &chunk, ptr, size);
+ type = btrfs_stack_chunk_type(&chunk);
+
+ if (!(type & BTRFS_BLOCK_GROUP_SYSTEM))
+ goto next;
+
+ ret = btrfs_add_system_chunk(trans, chunk_root, &key,
+ &chunk, size);
+ if (ret)
+ goto err;
+next:
+ path.slots[0]++;
+ }
+
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+err:
+ btrfs_release_path(&path);
+ return ret;
+}
+
+static const struct btrfs_convert_operations ext2_convert_ops = {
+ .name = "ext2",
+ .open_fs = ext2_open_fs,
+ .alloc_block = ext2_alloc_block,
+ .alloc_block_range = ext2_alloc_block_range,
+ .copy_inodes = ext2_copy_inodes,
+ .test_block = ext2_test_block,
+ .free_block = ext2_free_block,
+ .free_block_range = ext2_free_block_range,
+ .close_fs = ext2_close_fs,
+};
+
+static const struct btrfs_convert_operations *convert_operations[] = {
+ &ext2_convert_ops,
+};
+
+static int convert_open_fs(const char *devname,
+ struct btrfs_convert_context *cctx)
+{
+ int i;
+
+ memset(cctx, 0, sizeof(*cctx));
+
+ for (i = 0; i < ARRAY_SIZE(convert_operations); i++) {
+ int ret = convert_operations[i]->open_fs(cctx, devname);
+
+ if (ret == 0) {
+ cctx->convert_ops = convert_operations[i];
+ return ret;
+ }
+ }
+
+ fprintf(stderr, "No file system found to convert.\n");
+ return -1;
+}
+
+static int do_convert(const char *devname, int datacsum, int packing, int noxattr,
+ u32 nodesize, int copylabel, const char *fslabel, int progress,
+ u64 features)
+{
+ int i, ret, blocks_per_node;
+ int fd = -1;
+ int is_btrfs = 0;
+ u32 blocksize;
+ u64 blocks[7];
+ u64 total_bytes;
+ u64 super_bytenr;
+ struct btrfs_root *root;
+ struct btrfs_root *image_root;
+ struct btrfs_convert_context cctx;
+ char *subvol_name = NULL;
+ struct task_ctx ctx;
+ char features_buf[64];
+ struct btrfs_mkfs_config mkfs_cfg;
+
+ init_convert_context(&cctx);
+ ret = convert_open_fs(devname, &cctx);
+ if (ret)
+ goto fail;
+
+ blocksize = cctx.blocksize;
+ total_bytes = (u64)blocksize * (u64)cctx.block_count;
+ if (blocksize < 4096) {
+ fprintf(stderr, "block size is too small\n");
+ goto fail;
+ }
+ if (btrfs_check_nodesize(nodesize, blocksize, features))
+ goto fail;
+ blocks_per_node = nodesize / blocksize;
+ ret = -blocks_per_node;
+ for (i = 0; i < 7; i++) {
+ if (nodesize == blocksize)
+ ret = convert_alloc_block(&cctx, 0, blocks + i);
+ else
+ ret = convert_alloc_block_range(&cctx,
+ ret + blocks_per_node, blocks_per_node,
+ blocks + i);
+ if (ret) {
+ fprintf(stderr, "not enough free space\n");
+ goto fail;
+ }
+ blocks[i] *= blocksize;
+ }
+ super_bytenr = blocks[0];
+ fd = open(devname, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "unable to open %s\n", devname);
+ goto fail;
+ }
+ btrfs_parse_features_to_string(features_buf, features);
+ if (features == BTRFS_MKFS_DEFAULT_FEATURES)
+ strcat(features_buf, " (default)");
+
+ printf("create btrfs filesystem:\n");
+ printf("\tblocksize: %u\n", blocksize);
+ printf("\tnodesize: %u\n", nodesize);
+ printf("\tfeatures: %s\n", features_buf);
+
+ mkfs_cfg.label = cctx.volume_name;
+ mkfs_cfg.fs_uuid = NULL;
+ memcpy(mkfs_cfg.blocks, blocks, sizeof(blocks));
+ mkfs_cfg.num_bytes = total_bytes;
+ mkfs_cfg.nodesize = nodesize;
+ mkfs_cfg.sectorsize = blocksize;
+ mkfs_cfg.stripesize = blocksize;
+ mkfs_cfg.features = features;
+
+ ret = make_btrfs(fd, &mkfs_cfg);
+ if (ret) {
+ fprintf(stderr, "unable to create initial ctree: %s\n",
+ strerror(-ret));
+ goto fail;
+ }
+ /* create a system chunk that maps the whole device */
+ ret = prepare_system_chunk(fd, super_bytenr);
+ if (ret) {
+ fprintf(stderr, "unable to update system chunk\n");
+ goto fail;
+ }
+ root = open_ctree_fd(fd, devname, super_bytenr, OPEN_CTREE_WRITES);
+ if (!root) {
+ fprintf(stderr, "unable to open ctree\n");
+ goto fail;
+ }
+ ret = cache_free_extents(root, &cctx);
+ if (ret) {
+ fprintf(stderr, "error during cache_free_extents %d\n", ret);
+ goto fail;
+ }
+ root->fs_info->extent_ops = &extent_ops;
+ /* recover block allocation bitmap */
+ for (i = 0; i < 7; i++) {
+ blocks[i] /= blocksize;
+ if (nodesize == blocksize)
+ convert_free_block(&cctx, blocks[i]);
+ else
+ convert_free_block_range(&cctx, blocks[i],
+ blocks_per_node);
+ }
+ ret = init_btrfs(root);
+ if (ret) {
+ fprintf(stderr, "unable to setup the root tree\n");
+ goto fail;
+ }
+ printf("creating btrfs metadata.\n");
+ ctx.max_copy_inodes = (cctx.inodes_count - cctx.free_inodes_count);
+ ctx.cur_copy_inodes = 0;
+
+ if (progress) {
+ ctx.info = task_init(print_copied_inodes, after_copied_inodes, &ctx);
+ task_start(ctx.info);
+ }
+ ret = copy_inodes(&cctx, root, datacsum, packing, noxattr, &ctx);
+ if (ret) {
+ fprintf(stderr, "error during copy_inodes %d\n", ret);
+ goto fail;
+ }
+ if (progress) {
+ task_stop(ctx.info);
+ task_deinit(ctx.info);
+ }
+
+ printf("creating %s image file.\n", cctx.convert_ops->name);
+ ret = asprintf(&subvol_name, "%s_saved", cctx.convert_ops->name);
+ if (ret < 0) {
+ fprintf(stderr, "error allocating subvolume name: %s_saved\n",
+ cctx.convert_ops->name);
+ goto fail;
+ }
+
+ image_root = link_subvol(root, subvol_name, CONV_IMAGE_SUBVOL_OBJECTID);
+
+ free(subvol_name);
+
+ if (!image_root) {
+ fprintf(stderr, "unable to create subvol\n");
+ goto fail;
+ }
+ ret = create_image(&cctx, image_root, "image", datacsum);
+ if (ret) {
+ fprintf(stderr, "error during create_image %d\n", ret);
+ goto fail;
+ }
+ memset(root->fs_info->super_copy->label, 0, BTRFS_LABEL_SIZE);
+ if (copylabel == 1) {
+ strncpy(root->fs_info->super_copy->label,
+ cctx.volume_name, BTRFS_LABEL_SIZE);
+ fprintf(stderr, "copy label '%s'\n",
+ root->fs_info->super_copy->label);
+ } else if (copylabel == -1) {
+ strcpy(root->fs_info->super_copy->label, fslabel);
+ fprintf(stderr, "set label to '%s'\n", fslabel);
+ }
+
+ printf("cleaning up system chunk.\n");
+ ret = cleanup_sys_chunk(root, image_root);
+ if (ret) {
+ fprintf(stderr, "error during cleanup_sys_chunk %d\n", ret);
+ goto fail;
+ }
+ ret = close_ctree(root);
+ if (ret) {
+ fprintf(stderr, "error during close_ctree %d\n", ret);
+ goto fail;
+ }
+ convert_close_fs(&cctx);
+ clean_convert_context(&cctx);
+
+ /*
+ * If this step succeed, we get a mountable btrfs. Otherwise
+ * the source fs is left unchanged.
+ */
+ ret = migrate_super_block(fd, super_bytenr, blocksize);
+ if (ret) {
+ fprintf(stderr, "unable to migrate super block\n");
+ goto fail;
+ }
+ is_btrfs = 1;
+
+ root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES);
+ if (!root) {
+ fprintf(stderr, "unable to open ctree\n");
+ goto fail;
+ }
+ /* move chunk tree into system chunk. */
+ ret = fixup_chunk_mapping(root);
+ if (ret) {
+ fprintf(stderr, "error during fixup_chunk_tree\n");
+ goto fail;
+ }
+ ret = close_ctree(root);
+ close(fd);
+
+ printf("conversion complete.\n");
+ return 0;
+fail:
+ clean_convert_context(&cctx);
+ if (fd != -1)
+ close(fd);
+ if (is_btrfs)
+ fprintf(stderr,
+ "WARNING: an error occured during chunk mapping fixup, filesystem mountable but not finalized\n");
+ else
+ fprintf(stderr, "conversion aborted\n");
+ return -1;
+}
+
+static int may_rollback(struct btrfs_root *root)
+{
+ struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_multi_bio *multi = NULL;
+ u64 bytenr;
+ u64 length;
+ u64 physical;
+ u64 total_bytes;
+ int num_stripes;
+ int ret;
+
+ if (btrfs_super_num_devices(info->super_copy) != 1)
+ goto fail;
+
+ bytenr = BTRFS_SUPER_INFO_OFFSET;
+ total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
+
+ while (1) {
+ ret = btrfs_map_block(&info->mapping_tree, WRITE, bytenr,
+ &length, &multi, 0, NULL);
+ if (ret) {
+ if (ret == -ENOENT) {
+ /* removed block group at the tail */
+ if (length == (u64)-1)
+ break;
+
+ /* removed block group in the middle */
+ goto next;
+ }
+ goto fail;
+ }
+
+ num_stripes = multi->num_stripes;
+ physical = multi->stripes[0].physical;
+ kfree(multi);
+
+ if (num_stripes != 1 || physical != bytenr)
+ goto fail;
+next:
+ bytenr += length;
+ if (bytenr >= total_bytes)
+ break;
+ }
+ return 0;
+fail:
+ return -1;
+}
+
+static int do_rollback(const char *devname)
+{
+ int fd = -1;
+ int ret;
+ int i;
+ struct btrfs_root *root;
+ struct btrfs_root *image_root;
+ struct btrfs_root *chunk_root;
+ struct btrfs_dir_item *dir;
+ struct btrfs_inode_item *inode;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_trans_handle *trans;
+ struct extent_buffer *leaf;
+ struct btrfs_block_group_cache *cache1;
+ struct btrfs_block_group_cache *cache2;
+ struct btrfs_key key;
+ struct btrfs_path path;
+ struct extent_io_tree io_tree;
+ char *buf = NULL;
+ char *name;
+ u64 bytenr;
+ u64 num_bytes;
+ u64 root_dir;
+ u64 objectid;
+ u64 offset;
+ u64 start;
+ u64 end;
+ u64 sb_bytenr;
+ u64 first_free;
+ u64 total_bytes;
+ u32 sectorsize;
+
+ extent_io_tree_init(&io_tree);
+
+ fd = open(devname, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "unable to open %s\n", devname);
+ goto fail;
+ }
+ root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES);
+ if (!root) {
+ fprintf(stderr, "unable to open ctree\n");
+ goto fail;
+ }
+ ret = may_rollback(root);
+ if (ret < 0) {
+ fprintf(stderr, "unable to do rollback\n");
+ goto fail;
+ }
+
+ sectorsize = root->sectorsize;
+ buf = malloc(sectorsize);
+ if (!buf) {
+ fprintf(stderr, "unable to allocate memory\n");
+ goto fail;
+ }
+
+ btrfs_init_path(&path);
+
+ key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
+ key.type = BTRFS_ROOT_BACKREF_KEY;
+ key.offset = BTRFS_FS_TREE_OBJECTID;
+ ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path, 0,
+ 0);
+ btrfs_release_path(&path);
+ if (ret > 0) {
+ fprintf(stderr,
+ "ERROR: unable to convert ext2 image subvolume, is it deleted?\n");
+ goto fail;
+ } else if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: unable to open ext2_subvol, id=%llu: %s\n",
+ (unsigned long long)key.objectid, strerror(-ret));
+ goto fail;
+ }
+
+ key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ image_root = btrfs_read_fs_root(root->fs_info, &key);
+ if (!image_root || IS_ERR(image_root)) {
+ fprintf(stderr, "unable to open subvol %llu\n",
+ (unsigned long long)key.objectid);
+ goto fail;
+ }
+
+ name = "image";
+ root_dir = btrfs_root_dirid(&root->root_item);
+ dir = btrfs_lookup_dir_item(NULL, image_root, &path,
+ root_dir, name, strlen(name), 0);
+ if (!dir || IS_ERR(dir)) {
+ fprintf(stderr, "unable to find file %s\n", name);
+ goto fail;
+ }
+ leaf = path.nodes[0];
+ btrfs_dir_item_key_to_cpu(leaf, dir, &key);
+ btrfs_release_path(&path);
+
+ objectid = key.objectid;
+
+ ret = btrfs_lookup_inode(NULL, image_root, &path, &key, 0);
+ if (ret) {
+ fprintf(stderr, "unable to find inode item\n");
+ goto fail;
+ }
+ leaf = path.nodes[0];
+ inode = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_inode_item);
+ total_bytes = btrfs_inode_size(leaf, inode);
+ btrfs_release_path(&path);
+
+ key.objectid = objectid;
+ key.offset = 0;
+ btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
+ ret = btrfs_search_slot(NULL, image_root, &key, &path, 0, 0);
+ if (ret != 0) {
+ fprintf(stderr, "unable to find first file extent\n");
+ btrfs_release_path(&path);
+ goto fail;
+ }
+
+ /* build mapping tree for the relocated blocks */
+ for (offset = 0; offset < total_bytes; ) {
+ leaf = path.nodes[0];
+ if (path.slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, &path);
+ if (ret != 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+ if (key.objectid != objectid || key.offset != offset ||
+ btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
+ break;
+
+ fi = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
+ break;
+ if (btrfs_file_extent_compression(leaf, fi) ||
+ btrfs_file_extent_encryption(leaf, fi) ||
+ btrfs_file_extent_other_encoding(leaf, fi))
+ break;
+
+ bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ /* skip holes and direct mapped extents */
+ if (bytenr == 0 || bytenr == offset)
+ goto next_extent;
+
+ bytenr += btrfs_file_extent_offset(leaf, fi);
+ num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
+
+ cache1 = btrfs_lookup_block_group(root->fs_info, offset);
+ cache2 = btrfs_lookup_block_group(root->fs_info,
+ offset + num_bytes - 1);
+ if (!cache1 || cache1 != cache2 ||
+ (!(cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
+ !intersect_with_sb(offset, num_bytes)))
+ break;
+
+ set_extent_bits(&io_tree, offset, offset + num_bytes - 1,
+ EXTENT_LOCKED, GFP_NOFS);
+ set_state_private(&io_tree, offset, bytenr);
+next_extent:
+ offset += btrfs_file_extent_num_bytes(leaf, fi);
+ path.slots[0]++;
+ }
+ btrfs_release_path(&path);
+
+ if (offset < total_bytes) {
+ fprintf(stderr, "unable to build extent mapping\n");
+ goto fail;
+ }
+
+ first_free = BTRFS_SUPER_INFO_OFFSET + 2 * sectorsize - 1;
+ first_free &= ~((u64)sectorsize - 1);
+ /* backup for extent #0 should exist */
+ if(!test_range_bit(&io_tree, 0, first_free - 1, EXTENT_LOCKED, 1)) {
+ fprintf(stderr, "no backup for the first extent\n");
+ goto fail;
+ }
+ /* force no allocation from system block group */
+ root->fs_info->system_allocs = -1;
+ trans = btrfs_start_transaction(root, 1);
+ BUG_ON(!trans);
+ /*
+ * recow the whole chunk tree, this will remove all chunk tree blocks
+ * from system block group
+ */
+ chunk_root = root->fs_info->chunk_root;
+ memset(&key, 0, sizeof(key));
+ while (1) {
+ ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
+ if (ret < 0)
+ break;
+
+ ret = btrfs_next_leaf(chunk_root, &path);
+ if (ret)
+ break;
+
+ btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+ btrfs_release_path(&path);
+ }
+ btrfs_release_path(&path);
+
+ offset = 0;
+ num_bytes = 0;
+ while(1) {
+ cache1 = btrfs_lookup_block_group(root->fs_info, offset);
+ if (!cache1)
+ break;
+
+ if (cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM)
+ num_bytes += btrfs_block_group_used(&cache1->item);
+
+ offset = cache1->key.objectid + cache1->key.offset;
+ }
+ /* only extent #0 left in system block group? */
+ if (num_bytes > first_free) {
+ fprintf(stderr, "unable to empty system block group\n");
+ goto fail;
+ }
+ /* create a system chunk that maps the whole device */
+ ret = prepare_system_chunk_sb(root->fs_info->super_copy);
+ if (ret) {
+ fprintf(stderr, "unable to update system chunk\n");
+ goto fail;
+ }
+
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+
+ ret = close_ctree(root);
+ if (ret) {
+ fprintf(stderr, "error during close_ctree %d\n", ret);
+ goto fail;
+ }
+
+ /* zero btrfs super block mirrors */
+ memset(buf, 0, sectorsize);
+ for (i = 1 ; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ if (bytenr >= total_bytes)
+ break;
+ ret = pwrite(fd, buf, sectorsize, bytenr);
+ if (ret != sectorsize) {
+ fprintf(stderr,
+ "error during zeroing supreblock %d: %d\n",
+ i, ret);
+ goto fail;
+ }
+ }
+
+ sb_bytenr = (u64)-1;
+ /* copy all relocated blocks back */
+ while(1) {
+ ret = find_first_extent_bit(&io_tree, 0, &start, &end,
+ EXTENT_LOCKED);
+ if (ret)
+ break;
+
+ ret = get_state_private(&io_tree, start, &bytenr);
+ BUG_ON(ret);
+
+ clear_extent_bits(&io_tree, start, end, EXTENT_LOCKED,
+ GFP_NOFS);
+
+ while (start <= end) {
+ if (start == BTRFS_SUPER_INFO_OFFSET) {
+ sb_bytenr = bytenr;
+ goto next_sector;
+ }
+ ret = pread(fd, buf, sectorsize, bytenr);
+ if (ret < 0) {
+ fprintf(stderr, "error during pread %d\n", ret);
+ goto fail;
+ }
+ BUG_ON(ret != sectorsize);
+ ret = pwrite(fd, buf, sectorsize, start);
+ if (ret < 0) {
+ fprintf(stderr, "error during pwrite %d\n", ret);
+ goto fail;
+ }
+ BUG_ON(ret != sectorsize);
+next_sector:
+ start += sectorsize;
+ bytenr += sectorsize;
+ }
+ }
+
+ ret = fsync(fd);
+ if (ret) {
+ fprintf(stderr, "error during fsync %d\n", ret);
+ goto fail;
+ }
+ /*
+ * finally, overwrite btrfs super block.
+ */
+ ret = pread(fd, buf, sectorsize, sb_bytenr);
+ if (ret < 0) {
+ fprintf(stderr, "error during pread %d\n", ret);
+ goto fail;
+ }
+ BUG_ON(ret != sectorsize);
+ ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET);
+ if (ret < 0) {
+ fprintf(stderr, "error during pwrite %d\n", ret);
+ goto fail;
+ }
+ BUG_ON(ret != sectorsize);
+ ret = fsync(fd);
+ if (ret) {
+ fprintf(stderr, "error during fsync %d\n", ret);
+ goto fail;
+ }
+
+ close(fd);
+ free(buf);
+ extent_io_tree_cleanup(&io_tree);
+ printf("rollback complete.\n");
+ return 0;
+
+fail:
+ if (fd != -1)
+ close(fd);
+ free(buf);
+ fprintf(stderr, "rollback aborted.\n");
+ return -1;
+}
+
+static void print_usage(void)
+{
+ printf("usage: btrfs-convert [options] device\n");
+ printf("options:\n");
+ printf("\t-d|--no-datasum disable data checksum, sets NODATASUM\n");
+ printf("\t-i|--no-xattr ignore xattrs and ACLs\n");
+ printf("\t-n|--no-inline disable inlining of small files to metadata\n");
+ printf("\t-N|--nodesize SIZE set filesystem metadata nodesize\n");
+ printf("\t-r|--rollback roll back to the original filesystem\n");
+ printf("\t-l|--label LABEL set filesystem label\n");
+ printf("\t-L|--copy-label use label from converted filesystem\n");
+ printf("\t-p|--progress show converting progress (default)\n");
+ printf("\t-O|--features LIST comma separated list of filesystem features\n");
+ printf("\t--no-progress show only overview, not the detailed progress\n");
+}
+
+int main(int argc, char *argv[])
+{
+ int ret;
+ int packing = 1;
+ int noxattr = 0;
+ int datacsum = 1;
+ u32 nodesize = max_t(u32, sysconf(_SC_PAGESIZE),
+ BTRFS_MKFS_DEFAULT_NODE_SIZE);
+ int rollback = 0;
+ int copylabel = 0;
+ int usage_error = 0;
+ int progress = 1;
+ char *file;
+ char fslabel[BTRFS_LABEL_SIZE];
+ u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
+
+ while(1) {
+ enum { GETOPT_VAL_NO_PROGRESS = 256 };
+ static const struct option long_options[] = {
+ { "no-progress", no_argument, NULL,
+ GETOPT_VAL_NO_PROGRESS },
+ { "no-datasum", no_argument, NULL, 'd' },
+ { "no-inline", no_argument, NULL, 'n' },
+ { "no-xattr", no_argument, NULL, 'i' },
+ { "rollback", no_argument, NULL, 'r' },
+ { "features", required_argument, NULL, 'O' },
+ { "progress", no_argument, NULL, 'p' },
+ { "label", required_argument, NULL, 'l' },
+ { "copy-label", no_argument, NULL, 'L' },
+ { "nodesize", required_argument, NULL, 'N' },
+ { "help", no_argument, NULL, GETOPT_VAL_HELP},
+ { NULL, 0, NULL, 0 }
+ };
+ int c = getopt_long(argc, argv, "dinN:rl:LpO:", long_options, NULL);
+
+ if (c < 0)
+ break;
+ switch(c) {
+ case 'd':
+ datacsum = 0;
+ break;
+ case 'i':
+ noxattr = 1;
+ break;
+ case 'n':
+ packing = 0;
+ break;
+ case 'N':
+ nodesize = parse_size(optarg);
+ break;
+ case 'r':
+ rollback = 1;
+ break;
+ case 'l':
+ copylabel = -1;
+ if (strlen(optarg) >= BTRFS_LABEL_SIZE) {
+ fprintf(stderr,
+ "WARNING: label too long, trimmed to %d bytes\n",
+ BTRFS_LABEL_SIZE - 1);
+ }
+ strncpy(fslabel, optarg, BTRFS_LABEL_SIZE - 1);
+ fslabel[BTRFS_LABEL_SIZE - 1] = 0;
+ break;
+ case 'L':
+ copylabel = 1;
+ break;
+ case 'p':
+ progress = 1;
+ break;
+ case 'O': {
+ char *orig = strdup(optarg);
+ char *tmp = orig;
+
+ tmp = btrfs_parse_fs_features(tmp, &features);
+ if (tmp) {
+ fprintf(stderr,
+ "Unrecognized filesystem feature '%s'\n",
+ tmp);
+ free(orig);
+ exit(1);
+ }
+ free(orig);
+ if (features & BTRFS_FEATURE_LIST_ALL) {
+ btrfs_list_all_fs_features(
+ ~BTRFS_CONVERT_ALLOWED_FEATURES);
+ exit(0);
+ }
+ if (features & ~BTRFS_CONVERT_ALLOWED_FEATURES) {
+ char buf[64];
+
+ btrfs_parse_features_to_string(buf,
+ features & ~BTRFS_CONVERT_ALLOWED_FEATURES);
+ fprintf(stderr,
+ "ERROR: features not allowed for convert: %s\n",
+ buf);
+ exit(1);
+ }
+
+ break;
+ }
+ case GETOPT_VAL_NO_PROGRESS:
+ progress = 0;
+ break;
+ case GETOPT_VAL_HELP:
+ default:
+ print_usage();
+ return c != GETOPT_VAL_HELP;
+ }
+ }
+ argc = argc - optind;
+ set_argv0(argv);
+ if (check_argc_exact(argc, 1)) {
+ print_usage();
+ return 1;
+ }
+
+ if (rollback && (!datacsum || noxattr || !packing)) {
+ fprintf(stderr,
+ "Usage error: -d, -i, -n options do not apply to rollback\n");
+ usage_error++;
+ }
+
+ if (usage_error) {
+ print_usage();
+ return 1;
+ }
+
+ file = argv[optind];
+ ret = check_mounted(file);
+ if (ret < 0) {
+ fprintf(stderr, "Could not check mount status: %s\n",
+ strerror(-ret));
+ return 1;
+ } else if (ret) {
+ fprintf(stderr, "%s is mounted\n", file);
+ return 1;
+ }
+
+ if (rollback) {
+ ret = do_rollback(file);
+ } else {
+ ret = do_convert(file, datacsum, packing, noxattr, nodesize,
+ copylabel, fslabel, progress, features);
+ }
+ if (ret)
+ return 1;
+ return 0;
+}
diff --git a/btrfs-corrupt-block.c b/btrfs-corrupt-block.c
new file mode 100644
index 00000000..be5cd7ea
--- /dev/null
+++ b/btrfs-corrupt-block.c
@@ -0,0 +1,1310 @@
+/*
+ * Copyright (C) 2009 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <limits.h>
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "volumes.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "utils.h"
+
+#define FIELD_BUF_LEN 80
+
+static struct extent_buffer *debug_corrupt_block(struct btrfs_root *root,
+ u64 bytenr, u32 blocksize, u64 copy)
+{
+ int ret;
+ struct extent_buffer *eb;
+ u64 length;
+ struct btrfs_multi_bio *multi = NULL;
+ struct btrfs_device *device;
+ int num_copies;
+ int mirror_num = 1;
+
+ eb = btrfs_find_create_tree_block(root->fs_info, bytenr, blocksize);
+ if (!eb)
+ return NULL;
+
+ length = blocksize;
+ while (1) {
+ ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
+ eb->start, &length, &multi,
+ mirror_num, NULL);
+ BUG_ON(ret);
+ device = multi->stripes[0].dev;
+ eb->fd = device->fd;
+ device->total_ios++;
+ eb->dev_bytenr = multi->stripes[0].physical;
+
+ fprintf(stdout,
+ "mirror %d logical %llu physical %llu device %s\n",
+ mirror_num, (unsigned long long)bytenr,
+ (unsigned long long)eb->dev_bytenr, device->name);
+ kfree(multi);
+
+ if (!copy || mirror_num == copy) {
+ ret = read_extent_from_disk(eb, 0, eb->len);
+ printf("corrupting %llu copy %d\n", eb->start,
+ mirror_num);
+ memset(eb->data, 0, eb->len);
+ write_extent_to_disk(eb);
+ fsync(eb->fd);
+ }
+
+ num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
+ eb->start, eb->len);
+ if (num_copies == 1)
+ break;
+
+ mirror_num++;
+ if (mirror_num > num_copies)
+ break;
+ }
+ return eb;
+}
+
+static void print_usage(int ret)
+{
+ fprintf(stderr, "usage: btrfs-corrupt-block [options] device\n");
+ fprintf(stderr, "\t-l Logical extent to be corrupted\n");
+ fprintf(stderr, "\t-c Copy of the extent to be corrupted"
+ " (usually 1 or 2, default: 0)\n");
+ fprintf(stderr, "\t-b Number of bytes to be corrupted\n");
+ fprintf(stderr, "\t-e Extent to be corrupted\n");
+ fprintf(stderr, "\t-E The whole extent tree to be corrupted\n");
+ fprintf(stderr, "\t-u Given chunk item to be corrupted\n");
+ fprintf(stderr, "\t-U The whole chunk tree to be corrupted\n");
+ fprintf(stderr, "\t-i The inode item to corrupt (must also specify "
+ "the field to corrupt)\n");
+ fprintf(stderr, "\t-x The file extent item to corrupt (must also "
+ "specify -i for the inode and -f for the field to corrupt)\n");
+ fprintf(stderr, "\t-m The metadata block to corrupt (must also "
+ "specify -f for the field to corrupt)\n");
+ fprintf(stderr, "\t-K The key to corrupt in the format "
+ "<num>,<num>,<num> (must also specify -f for the field)\n");
+ fprintf(stderr, "\t-f The field in the item to corrupt\n");
+ fprintf(stderr, "\t-I An item to corrupt (must also specify the field "
+ "to corrupt and a root+key for the item)\n");
+ fprintf(stderr, "\t-D Corrupt a dir item, must specify key and field\n");
+ fprintf(stderr, "\t-d Delete this item (must specify -K)\n");
+ fprintf(stderr, "\t-r Operate on this root (only works with -d)\n");
+ fprintf(stderr, "\t-C Delete a csum for the specified bytenr. When "
+ "used with -b it'll delete that many bytes, otherwise it's "
+ "just sectorsize\n");
+ exit(ret);
+}
+
+static void corrupt_keys(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *eb)
+{
+ int slot;
+ int bad_slot;
+ int nr;
+ struct btrfs_disk_key bad_key;;
+
+ nr = btrfs_header_nritems(eb);
+ if (nr == 0)
+ return;
+
+ slot = rand() % nr;
+ bad_slot = rand() % nr;
+
+ if (bad_slot == slot)
+ return;
+
+ fprintf(stderr,
+ "corrupting keys in block %llu slot %d swapping with %d\n",
+ (unsigned long long)eb->start, slot, bad_slot);
+
+ if (btrfs_header_level(eb) == 0) {
+ btrfs_item_key(eb, &bad_key, bad_slot);
+ btrfs_set_item_key(eb, &bad_key, slot);
+ } else {
+ btrfs_node_key(eb, &bad_key, bad_slot);
+ btrfs_set_node_key(eb, &bad_key, slot);
+ }
+ btrfs_mark_buffer_dirty(eb);
+ if (!trans) {
+ u16 csum_size =
+ btrfs_super_csum_size(root->fs_info->super_copy);
+ csum_tree_block_size(eb, csum_size, 0);
+ write_extent_to_disk(eb);
+ }
+}
+
+
+static int corrupt_keys_in_block(struct btrfs_root *root, u64 bytenr)
+{
+ struct extent_buffer *eb;
+
+ eb = read_tree_block(root, bytenr, root->leafsize, 0);
+ if (!extent_buffer_uptodate(eb))
+ return -EIO;;
+
+ corrupt_keys(NULL, root, eb);
+ free_extent_buffer(eb);
+ return 0;
+}
+
+static int corrupt_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr, u64 copy)
+{
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ u32 item_size;
+ unsigned long ptr;
+ struct btrfs_path *path;
+ int ret;
+ int slot;
+ int should_del = rand() % 3;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = bytenr;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ while(1) {
+ ret = btrfs_search_slot(trans, root->fs_info->extent_root,
+ &key, path, -1, 1);
+ if (ret < 0)
+ break;
+
+ if (ret > 0) {
+ if (path->slots[0] == 0)
+ break;
+ path->slots[0]--;
+ ret = 0;
+ }
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid != bytenr)
+ break;
+
+ if (key.type != BTRFS_EXTENT_ITEM_KEY &&
+ key.type != BTRFS_TREE_BLOCK_REF_KEY &&
+ key.type != BTRFS_EXTENT_DATA_REF_KEY &&
+ key.type != BTRFS_EXTENT_REF_V0_KEY &&
+ key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
+ key.type != BTRFS_SHARED_DATA_REF_KEY)
+ goto next;
+
+ if (should_del) {
+ fprintf(stderr,
+ "deleting extent record: key %llu %u %llu\n",
+ key.objectid, key.type, key.offset);
+
+ if (key.type == BTRFS_EXTENT_ITEM_KEY) {
+ /* make sure this extent doesn't get
+ * reused for other purposes */
+ btrfs_pin_extent(root->fs_info,
+ key.objectid, key.offset);
+ }
+
+ btrfs_del_item(trans, root, path);
+ } else {
+ fprintf(stderr,
+ "corrupting extent record: key %llu %u %llu\n",
+ key.objectid, key.type, key.offset);
+ ptr = btrfs_item_ptr_offset(leaf, slot);
+ item_size = btrfs_item_size_nr(leaf, slot);
+ memset_extent_buffer(leaf, 0, ptr, item_size);
+ btrfs_mark_buffer_dirty(leaf);
+ }
+next:
+ btrfs_release_path(path);
+
+ if (key.offset > 0)
+ key.offset--;
+ if (key.offset == 0)
+ break;
+ }
+
+ btrfs_free_path(path);
+ return 0;
+}
+
+static void btrfs_corrupt_extent_leaf(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *eb)
+{
+ u32 nr = btrfs_header_nritems(eb);
+ u32 victim = rand() % nr;
+ u64 objectid;
+ struct btrfs_key key;
+
+ btrfs_item_key_to_cpu(eb, &key, victim);
+ objectid = key.objectid;
+ corrupt_extent(trans, root, objectid, 1);
+}
+
+static void btrfs_corrupt_extent_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *eb)
+{
+ int i;
+
+ if (!eb)
+ return;
+
+ if (btrfs_is_leaf(eb)) {
+ btrfs_corrupt_extent_leaf(trans, root, eb);
+ return;
+ }
+
+ if (btrfs_header_level(eb) == 1 && eb != root->node) {
+ if (rand() % 5)
+ return;
+ }
+
+ for (i = 0; i < btrfs_header_nritems(eb); i++) {
+ struct extent_buffer *next;
+
+ next = read_tree_block(root, btrfs_node_blockptr(eb, i),
+ root->leafsize,
+ btrfs_node_ptr_generation(eb, i));
+ if (!extent_buffer_uptodate(next))
+ continue;
+ btrfs_corrupt_extent_tree(trans, root, next);
+ free_extent_buffer(next);
+ }
+}
+
+enum btrfs_inode_field {
+ BTRFS_INODE_FIELD_ISIZE,
+ BTRFS_INODE_FIELD_NBYTES,
+ BTRFS_INODE_FIELD_BAD,
+};
+
+enum btrfs_file_extent_field {
+ BTRFS_FILE_EXTENT_DISK_BYTENR,
+ BTRFS_FILE_EXTENT_BAD,
+};
+
+enum btrfs_dir_item_field {
+ BTRFS_DIR_ITEM_NAME,
+ BTRFS_DIR_ITEM_LOCATION_OBJECTID,
+ BTRFS_DIR_ITEM_BAD,
+};
+
+enum btrfs_metadata_block_field {
+ BTRFS_METADATA_BLOCK_GENERATION,
+ BTRFS_METADATA_BLOCK_SHIFT_ITEMS,
+ BTRFS_METADATA_BLOCK_BAD,
+};
+
+enum btrfs_item_field {
+ BTRFS_ITEM_OFFSET,
+ BTRFS_ITEM_BAD,
+};
+
+enum btrfs_key_field {
+ BTRFS_KEY_OBJECTID,
+ BTRFS_KEY_TYPE,
+ BTRFS_KEY_OFFSET,
+ BTRFS_KEY_BAD,
+};
+
+static enum btrfs_inode_field convert_inode_field(char *field)
+{
+ if (!strncmp(field, "isize", FIELD_BUF_LEN))
+ return BTRFS_INODE_FIELD_ISIZE;
+ if (!strncmp(field, "nbytes", FIELD_BUF_LEN))
+ return BTRFS_INODE_FIELD_NBYTES;
+ return BTRFS_INODE_FIELD_BAD;
+}
+
+static enum btrfs_file_extent_field convert_file_extent_field(char *field)
+{
+ if (!strncmp(field, "disk_bytenr", FIELD_BUF_LEN))
+ return BTRFS_FILE_EXTENT_DISK_BYTENR;
+ return BTRFS_FILE_EXTENT_BAD;
+}
+
+static enum btrfs_metadata_block_field
+convert_metadata_block_field(char *field)
+{
+ if (!strncmp(field, "generation", FIELD_BUF_LEN))
+ return BTRFS_METADATA_BLOCK_GENERATION;
+ if (!strncmp(field, "shift_items", FIELD_BUF_LEN))
+ return BTRFS_METADATA_BLOCK_SHIFT_ITEMS;
+ return BTRFS_METADATA_BLOCK_BAD;
+}
+
+static enum btrfs_key_field convert_key_field(char *field)
+{
+ if (!strncmp(field, "objectid", FIELD_BUF_LEN))
+ return BTRFS_KEY_OBJECTID;
+ if (!strncmp(field, "type", FIELD_BUF_LEN))
+ return BTRFS_KEY_TYPE;
+ if (!strncmp(field, "offset", FIELD_BUF_LEN))
+ return BTRFS_KEY_OFFSET;
+ return BTRFS_KEY_BAD;
+}
+
+static enum btrfs_item_field convert_item_field(char *field)
+{
+ if (!strncmp(field, "offset", FIELD_BUF_LEN))
+ return BTRFS_ITEM_OFFSET;
+ return BTRFS_ITEM_BAD;
+}
+
+static enum btrfs_dir_item_field convert_dir_item_field(char *field)
+{
+ if (!strncmp(field, "name", FIELD_BUF_LEN))
+ return BTRFS_DIR_ITEM_NAME;
+ if (!strncmp(field, "location_objectid", FIELD_BUF_LEN))
+ return BTRFS_DIR_ITEM_LOCATION_OBJECTID;
+ return BTRFS_DIR_ITEM_BAD;
+}
+
+static u64 generate_u64(u64 orig)
+{
+ u64 ret;
+ do {
+ ret = rand();
+ } while (ret == orig);
+ return ret;
+}
+
+static u32 generate_u32(u32 orig)
+{
+ u32 ret;
+ do {
+ ret = rand();
+ } while (ret == orig);
+ return ret;
+}
+
+static u8 generate_u8(u8 orig)
+{
+ u8 ret;
+ do {
+ ret = rand();
+ } while (ret == orig);
+ return ret;
+}
+
+static int corrupt_key(struct btrfs_root *root, struct btrfs_key *key,
+ char *field)
+{
+ enum btrfs_key_field corrupt_field = convert_key_field(field);
+ struct btrfs_path *path;
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ root = root->fs_info->fs_root;
+ if (corrupt_field == BTRFS_KEY_BAD) {
+ fprintf(stderr, "Invalid field %s\n", field);
+ return -EINVAL;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
+ ret = btrfs_search_slot(trans, root, key, path, 0, 1);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ fprintf(stderr, "Couldn't find the key to corrupt\n");
+ ret = -ENOENT;
+ goto out;
+ }
+
+ switch (corrupt_field) {
+ case BTRFS_KEY_OBJECTID:
+ key->objectid = generate_u64(key->objectid);
+ break;
+ case BTRFS_KEY_TYPE:
+ key->type = generate_u8(key->type);
+ break;
+ case BTRFS_KEY_OFFSET:
+ key->offset = generate_u64(key->objectid);
+ break;
+ default:
+ fprintf(stderr, "Invalid field %s, %d\n", field,
+ corrupt_field);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ btrfs_set_item_key_unsafe(root, path, key);
+out:
+ btrfs_free_path(path);
+ btrfs_commit_transaction(trans, root);
+ return ret;
+}
+
+static int corrupt_dir_item(struct btrfs_root *root, struct btrfs_key *key,
+ char *field)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_dir_item *di;
+ struct btrfs_path *path;
+ char name[PATH_MAX];
+ struct btrfs_key location;
+ struct btrfs_disk_key disk_key;
+ unsigned long name_ptr;
+ enum btrfs_dir_item_field corrupt_field =
+ convert_dir_item_field(field);
+ u64 bogus;
+ u16 name_len;
+ int ret;
+
+ if (corrupt_field == BTRFS_DIR_ITEM_BAD) {
+ fprintf(stderr, "Invalid field %s\n", field);
+ return -EINVAL;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
+ ret = btrfs_search_slot(trans, root, key, path, 0, 1);
+ if (ret) {
+ if (ret > 0)
+ ret = -ENOENT;
+ fprintf(stderr, "Error searching for dir item %d\n", ret);
+ goto out;
+ }
+
+ di = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_dir_item);
+
+ switch (corrupt_field) {
+ case BTRFS_DIR_ITEM_NAME:
+ name_len = btrfs_dir_name_len(path->nodes[0], di);
+ name_ptr = (unsigned long)(di + 1);
+ read_extent_buffer(path->nodes[0], name, name_ptr, name_len);
+ name[0]++;
+ write_extent_buffer(path->nodes[0], name, name_ptr, name_len);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ goto out;
+ case BTRFS_DIR_ITEM_LOCATION_OBJECTID:
+ btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
+ bogus = generate_u64(location.objectid);
+ location.objectid = bogus;
+ btrfs_cpu_key_to_disk(&disk_key, &location);
+ btrfs_set_dir_item_key(path->nodes[0], di, &disk_key);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ goto out;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+out:
+ btrfs_commit_transaction(trans, root);
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int corrupt_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 inode, char *field)
+{
+ struct btrfs_inode_item *ei;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ enum btrfs_inode_field corrupt_field = convert_inode_field(field);
+ u64 bogus;
+ u64 orig;
+ int ret;
+
+ if (corrupt_field == BTRFS_INODE_FIELD_BAD) {
+ fprintf(stderr, "Invalid field %s\n", field);
+ return -EINVAL;
+ }
+
+ key.objectid = inode;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ if (!path->slots[0]) {
+ fprintf(stderr, "Couldn't find inode %Lu\n", inode);
+ ret = -ENOENT;
+ goto out;
+ }
+ path->slots[0]--;
+ ret = 0;
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.objectid != inode) {
+ fprintf(stderr, "Couldn't find inode %Lu\n", inode);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ switch (corrupt_field) {
+ case BTRFS_INODE_FIELD_ISIZE:
+ orig = btrfs_inode_size(path->nodes[0], ei);
+ bogus = generate_u64(orig);
+ btrfs_set_inode_size(path->nodes[0], ei, bogus);
+ break;
+ case BTRFS_INODE_FIELD_NBYTES:
+ orig = btrfs_inode_nbytes(path->nodes[0], ei);
+ bogus = generate_u64(orig);
+ btrfs_set_inode_nbytes(path->nodes[0], ei, bogus);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int corrupt_file_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 inode, u64 extent,
+ char *field)
+{
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ enum btrfs_file_extent_field corrupt_field;
+ u64 bogus;
+ u64 orig;
+ int ret = 0;
+
+ corrupt_field = convert_file_extent_field(field);
+ if (corrupt_field == BTRFS_FILE_EXTENT_BAD) {
+ fprintf(stderr, "Invalid field %s\n", field);
+ return -EINVAL;
+ }
+
+ key.objectid = inode;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = extent;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ fprintf(stderr, "Couldn't find extent %llu for inode %llu\n",
+ extent, inode);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+ switch (corrupt_field) {
+ case BTRFS_FILE_EXTENT_DISK_BYTENR:
+ orig = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
+ bogus = generate_u64(orig);
+ btrfs_set_file_extent_disk_bytenr(path->nodes[0], fi, bogus);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static void shift_items(struct btrfs_root *root, struct extent_buffer *eb)
+{
+ int nritems = btrfs_header_nritems(eb);
+ int shift_space = btrfs_leaf_free_space(root, eb) / 2;
+ int slot = nritems / 2;
+ int i = 0;
+ unsigned int data_end = btrfs_item_offset_nr(eb, nritems - 1);
+
+ /* Shift the item data up to and including slot back by shift space */
+ memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end - shift_space,
+ btrfs_leaf_data(eb) + data_end,
+ btrfs_item_offset_nr(eb, slot - 1) - data_end);
+
+ /* Now update the item pointers. */
+ for (i = nritems - 1; i >= slot; i--) {
+ u32 offset = btrfs_item_offset_nr(eb, i);
+ offset -= shift_space;
+ btrfs_set_item_offset(eb, btrfs_item_nr(i), offset);
+ }
+}
+
+static int corrupt_metadata_block(struct btrfs_root *root, u64 block,
+ char *field)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ struct extent_buffer *eb;
+ struct btrfs_key key, root_key;
+ enum btrfs_metadata_block_field corrupt_field;
+ u64 root_objectid;
+ u64 orig, bogus;
+ u8 level;
+ int ret;
+
+ corrupt_field = convert_metadata_block_field(field);
+ if (corrupt_field == BTRFS_METADATA_BLOCK_BAD) {
+ fprintf(stderr, "Invalid field %s\n", field);
+ return -EINVAL;
+ }
+
+ eb = read_tree_block(root, block, root->leafsize, 0);
+ if (!extent_buffer_uptodate(eb)) {
+ fprintf(stderr, "Couldn't read in tree block %s\n", field);
+ return -EINVAL;
+ }
+ root_objectid = btrfs_header_owner(eb);
+ level = btrfs_header_level(eb);
+ if (level)
+ btrfs_node_key_to_cpu(eb, &key, 0);
+ else
+ btrfs_item_key_to_cpu(eb, &key, 0);
+ free_extent_buffer(eb);
+
+ root_key.objectid = root_objectid;
+ root_key.type = BTRFS_ROOT_ITEM_KEY;
+ root_key.offset = (u64)-1;
+
+ root = btrfs_read_fs_root(root->fs_info, &root_key);
+ if (IS_ERR(root)) {
+ fprintf(stderr, "Couldn't finde owner root %llu\n",
+ key.objectid);
+ return PTR_ERR(root);
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ fprintf(stderr, "Couldn't start transaction %ld\n",
+ PTR_ERR(trans));
+ return PTR_ERR(trans);
+ }
+
+ path->lowest_level = level;
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret < 0) {
+ fprintf(stderr, "Error searching to node %d\n", ret);
+ goto out;
+ }
+ eb = path->nodes[level];
+
+ ret = 0;
+ switch (corrupt_field) {
+ case BTRFS_METADATA_BLOCK_GENERATION:
+ orig = btrfs_header_generation(eb);
+ bogus = generate_u64(orig);
+ btrfs_set_header_generation(eb, bogus);
+ break;
+ case BTRFS_METADATA_BLOCK_SHIFT_ITEMS:
+ shift_items(root, path->nodes[level]);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ btrfs_mark_buffer_dirty(path->nodes[level]);
+out:
+ btrfs_commit_transaction(trans, root);
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int corrupt_btrfs_item(struct btrfs_root *root, struct btrfs_key *key,
+ char *field)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ enum btrfs_item_field corrupt_field;
+ u32 orig, bogus;
+ int ret;
+
+ corrupt_field = convert_item_field(field);
+ if (corrupt_field == BTRFS_ITEM_BAD) {
+ fprintf(stderr, "Invalid field %s\n", field);
+ return -EINVAL;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ fprintf(stderr, "Couldn't start transaction %ld\n",
+ PTR_ERR(trans));
+ return PTR_ERR(trans);
+ }
+
+ ret = btrfs_search_slot(trans, root, key, path, 0, 1);
+ if (ret != 0) {
+ fprintf(stderr, "Error searching to node %d\n", ret);
+ goto out;
+ }
+
+ ret = 0;
+ switch (corrupt_field) {
+ case BTRFS_ITEM_OFFSET:
+ orig = btrfs_item_offset_nr(path->nodes[0], path->slots[0]);
+ bogus = generate_u32(orig);
+ btrfs_set_item_offset(path->nodes[0],
+ btrfs_item_nr(path->slots[0]), bogus);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+out:
+ btrfs_commit_transaction(trans, root);
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int delete_item(struct btrfs_root *root, struct btrfs_key *key)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ fprintf(stderr, "Couldn't start transaction %ld\n",
+ PTR_ERR(trans));
+ return PTR_ERR(trans);
+ }
+
+ ret = btrfs_search_slot(trans, root, key, path, -1, 1);
+ if (ret) {
+ if (ret > 0)
+ ret = -ENOENT;
+ fprintf(stderr, "Error searching to node %d\n", ret);
+ goto out;
+ }
+ ret = btrfs_del_item(trans, root, path);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+out:
+ btrfs_commit_transaction(trans, root);
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int delete_csum(struct btrfs_root *root, u64 bytenr, u64 bytes)
+{
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ root = root->fs_info->csum_root;
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ fprintf(stderr, "Couldn't start transaction %ld\n",
+ PTR_ERR(trans));
+ return PTR_ERR(trans);
+ }
+
+ ret = btrfs_del_csums(trans, root, bytenr, bytes);
+ if (ret)
+ fprintf(stderr, "Error deleting csums %d\n", ret);
+ btrfs_commit_transaction(trans, root);
+ return ret;
+}
+
+/* corrupt item using NO cow.
+ * Because chunk recover will recover based on whole partition scaning,
+ * If using COW, chunk recover will use the old item to recover,
+ * which is still OK but we want to check the ability to rebuild chunk
+ * not only restore the old ones */
+static int corrupt_item_nocow(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ int del)
+{
+ int ret = 0;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ unsigned long ptr;
+ int slot;
+ u32 item_size;
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ /* Not deleting the first item of a leaf to keep leaf structure */
+ if (slot == 0)
+ del = 0;
+ /* Only accept valid eb */
+ BUG_ON(!leaf->data || slot >= btrfs_header_nritems(leaf));
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (del) {
+ fprintf(stdout, "Deleting key and data [%llu, %u, %llu].\n",
+ key.objectid, key.type, key.offset);
+ btrfs_del_item(trans, root, path);
+ } else {
+ fprintf(stdout, "Corrupting key and data [%llu, %u, %llu].\n",
+ key.objectid, key.type, key.offset);
+ ptr = btrfs_item_ptr_offset(leaf, slot);
+ item_size = btrfs_item_size_nr(leaf, slot);
+ memset_extent_buffer(leaf, 0, ptr, item_size);
+ btrfs_mark_buffer_dirty(leaf);
+ }
+ return ret;
+}
+static int corrupt_chunk_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ int ret;
+ int del;
+ int slot;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = (u64)-1;
+ key.offset = (u64)-1;
+ key.type = (u8)-1;
+
+ /* Here, cow and ins_len must equals 0 for the following reasons:
+ * 1) chunk recover is based on disk scanning, so COW should be
+ * disabled in case the original chunk being scanned and
+ * recovered using the old chunk.
+ * 2) if cow = 0, ins_len must also be set to 0, or BUG_ON will be
+ * triggered.
+ */
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+ BUG_ON(ret == 0);
+ if (ret < 0) {
+ fprintf(stderr, "Error searching tree\n");
+ goto free_out;
+ }
+ /* corrupt/del dev_item first */
+ while (!btrfs_previous_item(root, path, 0, BTRFS_DEV_ITEM_KEY)) {
+ slot = path->slots[0];
+ leaf = path->nodes[0];
+ del = rand() % 3;
+ /* Never delete the first item to keep the leaf structure */
+ if (path->slots[0] == 0)
+ del = 0;
+ ret = corrupt_item_nocow(trans, root, path, del);
+ if (ret)
+ goto free_out;
+ }
+ btrfs_release_path(path);
+
+ /* Here, cow and ins_len must equals 0 for the following reasons:
+ * 1) chunk recover is based on disk scanning, so COW should be
+ * disabled in case the original chunk being scanned and
+ * recovered using the old chunk.
+ * 2) if cow = 0, ins_len must also be set to 0, or BUG_ON will be
+ * triggered.
+ */
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+ BUG_ON(ret == 0);
+ if (ret < 0) {
+ fprintf(stderr, "Error searching tree\n");
+ goto free_out;
+ }
+ /* corrupt/del chunk then*/
+ while (!btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY)) {
+ slot = path->slots[0];
+ leaf = path->nodes[0];
+ del = rand() % 3;
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+ ret = corrupt_item_nocow(trans, root, path, del);
+ if (ret)
+ goto free_out;
+ }
+free_out:
+ btrfs_free_path(path);
+ return ret;
+}
+static int find_chunk_offset(struct btrfs_root *root,
+ struct btrfs_path *path, u64 offset)
+{
+ struct btrfs_key key;
+ int ret;
+
+ key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+ key.offset = offset;
+
+ /* Here, cow and ins_len must equals 0 for following reasons:
+ * 1) chunk recover is based on disk scanning, so COW should
+ * be disabled in case the original chunk being scanned
+ * and recovered using the old chunk.
+ * 2) if cow = 0, ins_len must also be set to 0, or BUG_ON
+ * will be triggered.
+ */
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret > 0) {
+ fprintf(stderr, "Can't find chunk with given offset %llu\n",
+ offset);
+ goto out;
+ }
+ if (ret < 0) {
+ fprintf(stderr, "Error searching chunk\n");
+ goto out;
+ }
+out:
+ return ret;
+
+}
+int main(int ac, char **av)
+{
+ struct cache_tree root_cache;
+ struct btrfs_key key;
+ struct btrfs_root *root;
+ struct extent_buffer *eb;
+ char *dev;
+ /* chunk offset can be 0,so change to (u64)-1 */
+ u64 logical = (u64)-1;
+ int ret = 0;
+ u64 copy = 0;
+ u64 bytes = 4096;
+ int extent_rec = 0;
+ int extent_tree = 0;
+ int corrupt_block_keys = 0;
+ int chunk_rec = 0;
+ int chunk_tree = 0;
+ int corrupt_item = 0;
+ int corrupt_di = 0;
+ int delete = 0;
+ u64 metadata_block = 0;
+ u64 inode = 0;
+ u64 file_extent = (u64)-1;
+ u64 root_objectid = 0;
+ u64 csum_bytenr = 0;
+ char field[FIELD_BUF_LEN];
+
+ field[0] = '\0';
+ srand(128);
+ memset(&key, 0, sizeof(key));
+
+ while(1) {
+ int c;
+ static const struct option long_options[] = {
+ /* { "byte-count", 1, NULL, 'b' }, */
+ { "logical", required_argument, NULL, 'l' },
+ { "copy", required_argument, NULL, 'c' },
+ { "bytes", required_argument, NULL, 'b' },
+ { "extent-record", no_argument, NULL, 'e' },
+ { "extent-tree", no_argument, NULL, 'E' },
+ { "keys", no_argument, NULL, 'k' },
+ { "chunk-record", no_argument, NULL, 'u' },
+ { "chunk-tree", no_argument, NULL, 'U' },
+ { "inode", required_argument, NULL, 'i'},
+ { "file-extent", required_argument, NULL, 'x'},
+ { "metadata-block", required_argument, NULL, 'm'},
+ { "field", required_argument, NULL, 'f'},
+ { "key", required_argument, NULL, 'K'},
+ { "item", no_argument, NULL, 'I'},
+ { "dir-item", no_argument, NULL, 'D'},
+ { "delete", no_argument, NULL, 'd'},
+ { "root", no_argument, NULL, 'r'},
+ { "csum", required_argument, NULL, 'C'},
+ { "help", no_argument, NULL, GETOPT_VAL_HELP},
+ { NULL, 0, NULL, 0 }
+ };
+
+ c = getopt_long(ac, av, "l:c:b:eEkuUi:f:x:m:K:IDdr:C:",
+ long_options, NULL);
+ if (c < 0)
+ break;
+ switch(c) {
+ case 'l':
+ logical = arg_strtou64(optarg);
+ break;
+ case 'c':
+ copy = arg_strtou64(optarg);
+ break;
+ case 'b':
+ bytes = arg_strtou64(optarg);
+ break;
+ case 'e':
+ extent_rec = 1;
+ break;
+ case 'E':
+ extent_tree = 1;
+ break;
+ case 'k':
+ corrupt_block_keys = 1;
+ break;
+ case 'u':
+ chunk_rec = 1;
+ break;
+ case 'U':
+ chunk_tree = 1;
+ break;
+ case 'i':
+ inode = arg_strtou64(optarg);
+ break;
+ case 'f':
+ strncpy(field, optarg, FIELD_BUF_LEN);
+ break;
+ case 'x':
+ file_extent = arg_strtou64(optarg);
+ break;
+ case 'm':
+ metadata_block = arg_strtou64(optarg);
+ break;
+ case 'K':
+ ret = sscanf(optarg, "%llu,%u,%llu",
+ &key.objectid,
+ (unsigned int *)&key.type,
+ &key.offset);
+ if (ret != 3) {
+ fprintf(stderr, "error reading key "
+ "%d\n", errno);
+ print_usage(1);
+ }
+ break;
+ case 'D':
+ corrupt_di = 1;
+ break;
+ case 'I':
+ corrupt_item = 1;
+ break;
+ case 'd':
+ delete = 1;
+ break;
+ case 'r':
+ root_objectid = arg_strtou64(optarg);
+ break;
+ case 'C':
+ csum_bytenr = arg_strtou64(optarg);
+ break;
+ case GETOPT_VAL_HELP:
+ default:
+ print_usage(c != GETOPT_VAL_HELP);
+ }
+ }
+ set_argv0(av);
+ ac = ac - optind;
+ if (check_argc_min(ac, 1))
+ print_usage(1);
+ dev = av[optind];
+
+ radix_tree_init();
+ cache_tree_init(&root_cache);
+
+ root = open_ctree(dev, 0, OPEN_CTREE_WRITES);
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ exit(1);
+ }
+ if (extent_rec) {
+ struct btrfs_trans_handle *trans;
+
+ if (logical == (u64)-1)
+ print_usage(1);
+ trans = btrfs_start_transaction(root, 1);
+ ret = corrupt_extent (trans, root, logical, 0);
+ btrfs_commit_transaction(trans, root);
+ goto out_close;
+ }
+ if (extent_tree) {
+ struct btrfs_trans_handle *trans;
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_corrupt_extent_tree(trans, root->fs_info->extent_root,
+ root->fs_info->extent_root->node);
+ btrfs_commit_transaction(trans, root);
+ goto out_close;
+ }
+ if (chunk_rec) {
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ int del;
+
+ if (logical == (u64)-1)
+ print_usage(1);
+ del = rand() % 3;
+ path = btrfs_alloc_path();
+ if (!path) {
+ fprintf(stderr, "path allocation failed\n");
+ goto out_close;
+ }
+
+ if (find_chunk_offset(root->fs_info->chunk_root, path,
+ logical) != 0) {
+ btrfs_free_path(path);
+ goto out_close;
+ }
+ trans = btrfs_start_transaction(root, 1);
+ ret = corrupt_item_nocow(trans, root->fs_info->chunk_root,
+ path, del);
+ if (ret < 0)
+ fprintf(stderr, "Failed to corrupt chunk record\n");
+ btrfs_commit_transaction(trans, root);
+ goto out_close;
+ }
+ if (chunk_tree) {
+ struct btrfs_trans_handle *trans;
+ trans = btrfs_start_transaction(root, 1);
+ ret = corrupt_chunk_tree(trans, root->fs_info->chunk_root);
+ if (ret < 0)
+ fprintf(stderr, "Failed to corrupt chunk tree\n");
+ btrfs_commit_transaction(trans, root);
+ goto out_close;
+ }
+ if (inode) {
+ struct btrfs_trans_handle *trans;
+
+ if (*field == 0)
+ print_usage(1);
+
+ trans = btrfs_start_transaction(root, 1);
+ if (file_extent == (u64)-1) {
+ printf("corrupting inode\n");
+ ret = corrupt_inode(trans, root, inode, field);
+ } else {
+ printf("corrupting file extent\n");
+ ret = corrupt_file_extent(trans, root, inode,
+ file_extent, field);
+ }
+ btrfs_commit_transaction(trans, root);
+ goto out_close;
+ }
+ if (metadata_block) {
+ if (*field == 0)
+ print_usage(1);
+ ret = corrupt_metadata_block(root, metadata_block, field);
+ goto out_close;
+ }
+ if (corrupt_di) {
+ if (!key.objectid || *field == 0)
+ print_usage(1);
+ ret = corrupt_dir_item(root, &key, field);
+ goto out_close;
+ }
+ if (csum_bytenr) {
+ ret = delete_csum(root, csum_bytenr, bytes);
+ goto out_close;
+ }
+ if (corrupt_item) {
+ if (!key.objectid)
+ print_usage(1);
+ ret = corrupt_btrfs_item(root, &key, field);
+ }
+ if (delete) {
+ struct btrfs_root *target = root;
+
+ if (!key.objectid)
+ print_usage(1);
+ if (root_objectid) {
+ struct btrfs_key root_key;
+
+ root_key.objectid = root_objectid;
+ root_key.type = BTRFS_ROOT_ITEM_KEY;
+ root_key.offset = (u64)-1;
+
+ target = btrfs_read_fs_root(root->fs_info, &root_key);
+ if (IS_ERR(target)) {
+ fprintf(stderr, "Couldn't find root %llu\n",
+ (unsigned long long)root_objectid);
+ print_usage(1);
+ }
+ }
+ ret = delete_item(target, &key);
+ goto out_close;
+ }
+ if (key.objectid || key.offset || key.type) {
+ if (*field == 0)
+ print_usage(1);
+ ret = corrupt_key(root, &key, field);
+ goto out_close;
+ }
+ /*
+ * If we made it here and we have extent set then we didn't specify
+ * inode and we're screwed.
+ */
+ if (file_extent != (u64)-1)
+ print_usage(1);
+
+ if (logical == (u64)-1)
+ print_usage(1);
+
+ if (bytes == 0)
+ bytes = root->sectorsize;
+
+ bytes = (bytes + root->sectorsize - 1) / root->sectorsize;
+ bytes *= root->sectorsize;
+
+ while (bytes > 0) {
+ if (corrupt_block_keys) {
+ corrupt_keys_in_block(root, logical);
+ } else {
+ eb = debug_corrupt_block(root, logical,
+ root->sectorsize, copy);
+ free_extent_buffer(eb);
+ }
+ logical += root->sectorsize;
+ bytes -= root->sectorsize;
+ }
+ return ret;
+out_close:
+ close_ctree(root);
+ return ret;
+}
diff --git a/btrfs-crc.c b/btrfs-crc.c
new file mode 100644
index 00000000..723e0b7a
--- /dev/null
+++ b/btrfs-crc.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2013 STRATO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "crc32c.h"
+#include "utils.h"
+
+void usage(void)
+{
+ printf("usage: btrfs-crc filename\n");
+ printf(" print out the btrfs crc for \"filename\"\n");
+ printf("usage: btrfs-crc filename -c crc [-s seed] [-l length]\n");
+ printf(" brute force search for file names with the given crc\n");
+ printf(" -s seed the random seed (default: random)\n");
+ printf(" -l length the length of the file names (default: 10)\n");
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ int c;
+ unsigned long checksum = 0;
+ char *str;
+ char *buf;
+ int length = 10;
+ int seed = getpid() ^ getppid();
+ int loop = 0;
+ int i;
+
+ while ((c = getopt(argc, argv, "l:c:s:h")) != -1) {
+ switch (c) {
+ case 'l':
+ length = atol(optarg);
+ break;
+ case 'c':
+ sscanf(optarg, "%li", &checksum);
+ loop = 1;
+ break;
+ case 's':
+ seed = atol(optarg);
+ break;
+ case 'h':
+ usage();
+ case '?':
+ return 255;
+ }
+ }
+
+ set_argv0(argv);
+ str = argv[optind];
+
+ if (!loop) {
+ if (check_argc_min(argc - optind, 1))
+ return 255;
+
+ printf("%12u - %s\n", crc32c(~1, str, strlen(str)), str);
+ return 0;
+ }
+
+ buf = malloc(length);
+ if (!buf)
+ return -ENOMEM;
+ srand(seed);
+
+ while (1) {
+ for (i = 0; i < length; i++)
+ buf[i] = rand() % 94 + 33;
+ if (crc32c(~1, buf, length) == checksum)
+ printf("%12lu - %.*s\n", checksum, length, buf);
+ }
+
+ return 0;
+}
diff --git a/btrfs-debug-tree.c b/btrfs-debug-tree.c
new file mode 100644
index 00000000..266176f3
--- /dev/null
+++ b/btrfs-debug-tree.c
@@ -0,0 +1,462 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <uuid/uuid.h>
+#include <getopt.h>
+
+#include "kerncompat.h"
+#include "radix-tree.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "volumes.h"
+#include "utils.h"
+
+static int print_usage(int ret)
+{
+ fprintf(stderr, "usage: btrfs-debug-tree [-e] [-d] [-r] [-R] [-u]\n");
+ fprintf(stderr, " [-b block_num ] device\n");
+ fprintf(stderr, "\t-e : print detailed extents info\n");
+ fprintf(stderr, "\t-d : print info of btrfs device and root tree dirs"
+ " only\n");
+ fprintf(stderr, "\t-r : print info of roots only\n");
+ fprintf(stderr, "\t-R : print info of roots and root backups\n");
+ fprintf(stderr, "\t-u : print info of uuid tree only\n");
+ fprintf(stderr, "\t-b block_num : print info of the specified block"
+ " only\n");
+ fprintf(stderr,
+ "\t-t tree_id : print only the tree with the given id\n");
+ fprintf(stderr, "%s\n", PACKAGE_STRING);
+ exit(ret);
+}
+
+static void print_extents(struct btrfs_root *root, struct extent_buffer *eb)
+{
+ int i;
+ u32 nr;
+ u32 size;
+
+ if (!eb)
+ return;
+
+ if (btrfs_is_leaf(eb)) {
+ btrfs_print_leaf(root, eb);
+ return;
+ }
+
+ size = btrfs_level_size(root, btrfs_header_level(eb) - 1);
+ nr = btrfs_header_nritems(eb);
+ for (i = 0; i < nr; i++) {
+ struct extent_buffer *next = read_tree_block(root,
+ btrfs_node_blockptr(eb, i),
+ size,
+ btrfs_node_ptr_generation(eb, i));
+ if (!extent_buffer_uptodate(next))
+ continue;
+ if (btrfs_is_leaf(next) &&
+ btrfs_header_level(eb) != 1)
+ BUG();
+ if (btrfs_header_level(next) !=
+ btrfs_header_level(eb) - 1)
+ BUG();
+ print_extents(root, next);
+ free_extent_buffer(next);
+ }
+}
+
+static void print_old_roots(struct btrfs_super_block *super)
+{
+ struct btrfs_root_backup *backup;
+ int i;
+
+ for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
+ backup = super->super_roots + i;
+ printf("btrfs root backup slot %d\n", i);
+ printf("\ttree root gen %llu block %llu\n",
+ (unsigned long long)btrfs_backup_tree_root_gen(backup),
+ (unsigned long long)btrfs_backup_tree_root(backup));
+
+ printf("\t\textent root gen %llu block %llu\n",
+ (unsigned long long)btrfs_backup_extent_root_gen(backup),
+ (unsigned long long)btrfs_backup_extent_root(backup));
+
+ printf("\t\tchunk root gen %llu block %llu\n",
+ (unsigned long long)btrfs_backup_chunk_root_gen(backup),
+ (unsigned long long)btrfs_backup_chunk_root(backup));
+
+ printf("\t\tdevice root gen %llu block %llu\n",
+ (unsigned long long)btrfs_backup_dev_root_gen(backup),
+ (unsigned long long)btrfs_backup_dev_root(backup));
+
+ printf("\t\tcsum root gen %llu block %llu\n",
+ (unsigned long long)btrfs_backup_csum_root_gen(backup),
+ (unsigned long long)btrfs_backup_csum_root(backup));
+
+ printf("\t\tfs root gen %llu block %llu\n",
+ (unsigned long long)btrfs_backup_fs_root_gen(backup),
+ (unsigned long long)btrfs_backup_fs_root(backup));
+
+ printf("\t\t%llu used %llu total %llu devices\n",
+ (unsigned long long)btrfs_backup_bytes_used(backup),
+ (unsigned long long)btrfs_backup_total_bytes(backup),
+ (unsigned long long)btrfs_backup_num_devices(backup));
+ }
+}
+
+int main(int ac, char **av)
+{
+ struct btrfs_root *root;
+ struct btrfs_fs_info *info;
+ struct btrfs_path path;
+ struct btrfs_key key;
+ struct btrfs_root_item ri;
+ struct extent_buffer *leaf;
+ struct btrfs_disk_key disk_key;
+ struct btrfs_key found_key;
+ char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
+ int ret;
+ int slot;
+ int extent_only = 0;
+ int device_only = 0;
+ int uuid_tree_only = 0;
+ int roots_only = 0;
+ int root_backups = 0;
+ u64 block_only = 0;
+ struct btrfs_root *tree_root_scan;
+ u64 tree_id = 0;
+
+ radix_tree_init();
+
+ while(1) {
+ int c;
+ static const struct option long_options[] = {
+ { "help", no_argument, NULL, GETOPT_VAL_HELP},
+ { NULL, 0, NULL, 0 }
+ };
+
+ c = getopt_long(ac, av, "deb:rRut:", long_options, NULL);
+ if (c < 0)
+ break;
+ switch(c) {
+ case 'e':
+ extent_only = 1;
+ break;
+ case 'd':
+ device_only = 1;
+ break;
+ case 'r':
+ roots_only = 1;
+ break;
+ case 'u':
+ uuid_tree_only = 1;
+ break;
+ case 'R':
+ roots_only = 1;
+ root_backups = 1;
+ break;
+ case 'b':
+ block_only = arg_strtou64(optarg);
+ break;
+ case 't':
+ tree_id = arg_strtou64(optarg);
+ break;
+ case GETOPT_VAL_HELP:
+ default:
+ print_usage(c != GETOPT_VAL_HELP);
+ }
+ }
+ set_argv0(av);
+ ac = ac - optind;
+ if (check_argc_exact(ac, 1))
+ print_usage(1);
+
+ ret = check_arg_type(av[optind]);
+ if (ret != BTRFS_ARG_BLKDEV && ret != BTRFS_ARG_REG) {
+ fprintf(stderr, "'%s' is not a block device or regular file\n",
+ av[optind]);
+ exit(1);
+ }
+
+ info = open_ctree_fs_info(av[optind], 0, 0, OPEN_CTREE_PARTIAL);
+ if (!info) {
+ fprintf(stderr, "unable to open %s\n", av[optind]);
+ exit(1);
+ }
+
+ root = info->fs_root;
+ if (!root) {
+ fprintf(stderr, "unable to open %s\n", av[optind]);
+ exit(1);
+ }
+
+ if (block_only) {
+ leaf = read_tree_block(root,
+ block_only,
+ root->leafsize, 0);
+
+ if (extent_buffer_uptodate(leaf) &&
+ btrfs_header_level(leaf) != 0) {
+ free_extent_buffer(leaf);
+ leaf = NULL;
+ }
+
+ if (!leaf) {
+ leaf = read_tree_block(root,
+ block_only,
+ root->nodesize, 0);
+ }
+ if (!extent_buffer_uptodate(leaf)) {
+ fprintf(stderr, "failed to read %llu\n",
+ (unsigned long long)block_only);
+ goto close_root;
+ }
+ btrfs_print_tree(root, leaf, 0);
+ free_extent_buffer(leaf);
+ goto close_root;
+ }
+
+ if (!(extent_only || uuid_tree_only || tree_id)) {
+ if (roots_only) {
+ printf("root tree: %llu level %d\n",
+ (unsigned long long)info->tree_root->node->start,
+ btrfs_header_level(info->tree_root->node));
+ printf("chunk tree: %llu level %d\n",
+ (unsigned long long)info->chunk_root->node->start,
+ btrfs_header_level(info->chunk_root->node));
+ } else {
+ if (info->tree_root->node) {
+ printf("root tree\n");
+ btrfs_print_tree(info->tree_root,
+ info->tree_root->node, 1);
+ }
+
+ if (info->chunk_root->node) {
+ printf("chunk tree\n");
+ btrfs_print_tree(info->chunk_root,
+ info->chunk_root->node, 1);
+ }
+ }
+ }
+ tree_root_scan = info->tree_root;
+
+ btrfs_init_path(&path);
+again:
+ if (!extent_buffer_uptodate(tree_root_scan->node))
+ goto no_node;
+
+ /*
+ * Tree's that are not pointed by the tree of tree roots
+ */
+ if (tree_id && tree_id == BTRFS_ROOT_TREE_OBJECTID) {
+ if (!info->tree_root->node) {
+ error("cannot print root tree, invalid pointer");
+ goto no_node;
+ }
+ printf("root tree\n");
+ btrfs_print_tree(info->tree_root, info->tree_root->node, 1);
+ goto no_node;
+ }
+
+ if (tree_id && tree_id == BTRFS_CHUNK_TREE_OBJECTID) {
+ if (!info->chunk_root->node) {
+ error("cannot print chunk tree, invalid pointer");
+ goto no_node;
+ }
+ printf("chunk tree\n");
+ btrfs_print_tree(info->chunk_root, info->chunk_root->node, 1);
+ goto no_node;
+ }
+
+ key.offset = 0;
+ key.objectid = 0;
+ btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
+ ret = btrfs_search_slot(NULL, tree_root_scan, &key, &path, 0, 0);
+ BUG_ON(ret < 0);
+ while(1) {
+ leaf = path.nodes[0];
+ slot = path.slots[0];
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(tree_root_scan, &path);
+ if (ret != 0)
+ break;
+ leaf = path.nodes[0];
+ slot = path.slots[0];
+ }
+ btrfs_item_key(leaf, &disk_key, path.slots[0]);
+ btrfs_disk_key_to_cpu(&found_key, &disk_key);
+ if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
+ unsigned long offset;
+ struct extent_buffer *buf;
+ int skip = extent_only | device_only | uuid_tree_only;
+
+ offset = btrfs_item_ptr_offset(leaf, slot);
+ read_extent_buffer(leaf, &ri, offset, sizeof(ri));
+ buf = read_tree_block(tree_root_scan,
+ btrfs_root_bytenr(&ri),
+ btrfs_level_size(tree_root_scan,
+ btrfs_root_level(&ri)),
+ 0);
+ if (!extent_buffer_uptodate(buf))
+ goto next;
+ if (tree_id && found_key.objectid != tree_id) {
+ free_extent_buffer(buf);
+ goto next;
+ }
+
+ switch(found_key.objectid) {
+ case BTRFS_ROOT_TREE_OBJECTID:
+ if (!skip)
+ printf("root");
+ break;
+ case BTRFS_EXTENT_TREE_OBJECTID:
+ if (!device_only && !uuid_tree_only)
+ skip = 0;
+ if (!skip)
+ printf("extent");
+ break;
+ case BTRFS_CHUNK_TREE_OBJECTID:
+ if (!skip) {
+ printf("chunk");
+ }
+ break;
+ case BTRFS_DEV_TREE_OBJECTID:
+ if (!uuid_tree_only)
+ skip = 0;
+ if (!skip)
+ printf("device");
+ break;
+ case BTRFS_FS_TREE_OBJECTID:
+ if (!skip) {
+ printf("fs");
+ }
+ break;
+ case BTRFS_ROOT_TREE_DIR_OBJECTID:
+ skip = 0;
+ printf("directory");
+ break;
+ case BTRFS_CSUM_TREE_OBJECTID:
+ if (!skip) {
+ printf("checksum");
+ }
+ break;
+ case BTRFS_ORPHAN_OBJECTID:
+ if (!skip) {
+ printf("orphan");
+ }
+ break;
+ case BTRFS_TREE_LOG_OBJECTID:
+ if (!skip) {
+ printf("log");
+ }
+ break;
+ case BTRFS_TREE_LOG_FIXUP_OBJECTID:
+ if (!skip) {
+ printf("log fixup");
+ }
+ break;
+ case BTRFS_TREE_RELOC_OBJECTID:
+ if (!skip) {
+ printf("reloc");
+ }
+ break;
+ case BTRFS_DATA_RELOC_TREE_OBJECTID:
+ if (!skip) {
+ printf("data reloc");
+ }
+ break;
+ case BTRFS_EXTENT_CSUM_OBJECTID:
+ if (!skip) {
+ printf("extent checksum");
+ }
+ break;
+ case BTRFS_QUOTA_TREE_OBJECTID:
+ if (!skip) {
+ printf("quota");
+ }
+ break;
+ case BTRFS_UUID_TREE_OBJECTID:
+ if (!extent_only && !device_only)
+ skip = 0;
+ if (!skip)
+ printf("uuid");
+ break;
+ case BTRFS_FREE_SPACE_TREE_OBJECTID:
+ if (!skip)
+ printf("free space");
+ break;
+ case BTRFS_MULTIPLE_OBJECTIDS:
+ if (!skip) {
+ printf("multiple");
+ }
+ break;
+ default:
+ if (!skip) {
+ printf("file");
+ }
+ }
+ if (extent_only && !skip) {
+ print_extents(tree_root_scan, buf);
+ } else if (!skip) {
+ printf(" tree ");
+ btrfs_print_key(&disk_key);
+ if (roots_only) {
+ printf(" %llu level %d\n",
+ (unsigned long long)buf->start,
+ btrfs_header_level(buf));
+ } else {
+ printf(" \n");
+ btrfs_print_tree(tree_root_scan, buf, 1);
+ }
+ }
+ free_extent_buffer(buf);
+ }
+next:
+ path.slots[0]++;
+ }
+no_node:
+ btrfs_release_path(&path);
+
+ if (tree_root_scan == info->tree_root &&
+ info->log_root_tree) {
+ tree_root_scan = info->log_root_tree;
+ goto again;
+ }
+
+ if (extent_only || device_only || uuid_tree_only)
+ goto close_root;
+
+ if (root_backups)
+ print_old_roots(info->super_copy);
+
+ printf("total bytes %llu\n",
+ (unsigned long long)btrfs_super_total_bytes(info->super_copy));
+ printf("bytes used %llu\n",
+ (unsigned long long)btrfs_super_bytes_used(info->super_copy));
+ uuidbuf[BTRFS_UUID_UNPARSED_SIZE - 1] = '\0';
+ uuid_unparse(info->super_copy->fsid, uuidbuf);
+ printf("uuid %s\n", uuidbuf);
+ printf("%s\n", PACKAGE_STRING);
+close_root:
+ ret = close_ctree(root);
+ btrfs_close_all_devices();
+ return ret;
+}
diff --git a/btrfs-debugfs b/btrfs-debugfs
new file mode 100755
index 00000000..cf1d285c
--- /dev/null
+++ b/btrfs-debugfs
@@ -0,0 +1,296 @@
+#!/usr/bin/env python2
+#
+# Simple python program to print out all the extents of a single file
+# LGPLv2 license
+# Copyright Facebook 2014
+
+import sys,os,struct,fcntl,ctypes,stat
+
+# helpers for max ints
+maxu64 = (1L << 64) - 1
+maxu32 = (1L << 32) - 1
+
+# the inode (like form stat)
+BTRFS_INODE_ITEM_KEY = 1
+# backref to the directory
+BTRFS_INODE_REF_KEY = 12
+# backref to the directory v2
+BTRFS_INODE_EXTREF_KEY = 13
+# xattr items
+BTRFS_XATTR_ITEM_KEY = 24
+# orphans for list files
+BTRFS_ORPHAN_ITEM_KEY = 48
+# treelog items for dirs
+BTRFS_DIR_LOG_ITEM_KEY = 60
+BTRFS_DIR_LOG_INDEX_KEY = 72
+# dir items and dir indexes both hold filenames
+BTRFS_DIR_ITEM_KEY = 84
+BTRFS_DIR_INDEX_KEY = 96
+# these are the file extent pointers
+BTRFS_EXTENT_DATA_KEY = 108
+# csums
+BTRFS_EXTENT_CSUM_KEY = 128
+# root item for subvols and snapshots
+BTRFS_ROOT_ITEM_KEY = 132
+# root item backrefs
+BTRFS_ROOT_BACKREF_KEY = 144
+BTRFS_ROOT_REF_KEY = 156
+# each allocated extent has an extent item
+BTRFS_EXTENT_ITEM_KEY = 168
+# optimized extents for metadata only
+BTRFS_METADATA_ITEM_KEY = 169
+# backrefs for extents
+BTRFS_TREE_BLOCK_REF_KEY = 176
+BTRFS_EXTENT_DATA_REF_KEY = 178
+BTRFS_EXTENT_REF_V0_KEY = 180
+BTRFS_SHARED_BLOCK_REF_KEY = 182
+BTRFS_SHARED_DATA_REF_KEY = 184
+# one of these for each block group
+BTRFS_BLOCK_GROUP_ITEM_KEY = 192
+# dev extents records which part of each device is allocated
+BTRFS_DEV_EXTENT_KEY = 204
+# dev items describe devs
+BTRFS_DEV_ITEM_KEY = 216
+# one for each chunk
+BTRFS_CHUNK_ITEM_KEY = 228
+# qgroup info
+BTRFS_QGROUP_STATUS_KEY = 240
+BTRFS_QGROUP_INFO_KEY = 242
+BTRFS_QGROUP_LIMIT_KEY = 244
+BTRFS_QGROUP_RELATION_KEY = 246
+# records balance progress
+BTRFS_BALANCE_ITEM_KEY = 248
+# stats on device errors
+BTRFS_DEV_STATS_KEY = 249
+BTRFS_DEV_REPLACE_KEY = 250
+BTRFS_STRING_ITEM_KEY = 253
+
+# in the kernel sources, this is flattened
+# btrfs_ioctl_search_args_v2. It includes both the btrfs_ioctl_search_key
+# and the buffer. We're using a 64K buffer size.
+#
+args_buffer_size = 65536
+class btrfs_ioctl_search_args(ctypes.Structure):
+ _pack_ = 1
+ _fields_ = [ ("tree_id", ctypes.c_ulonglong),
+ ("min_objectid", ctypes.c_ulonglong),
+ ("max_objectid", ctypes.c_ulonglong),
+ ("min_offset", ctypes.c_ulonglong),
+ ("max_offset", ctypes.c_ulonglong),
+ ("min_transid", ctypes.c_ulonglong),
+ ("max_transid", ctypes.c_ulonglong),
+ ("min_type", ctypes.c_uint),
+ ("max_type", ctypes.c_uint),
+ ("nr_items", ctypes.c_uint),
+ ("unused", ctypes.c_uint),
+ ("unused1", ctypes.c_ulonglong),
+ ("unused2", ctypes.c_ulonglong),
+ ("unused3", ctypes.c_ulonglong),
+ ("unused4", ctypes.c_ulonglong),
+ ("buf_size", ctypes.c_ulonglong),
+ ("buf", ctypes.c_ubyte * args_buffer_size),
+ ]
+
+# the search ioctl resturns one header for each item
+#
+class btrfs_ioctl_search_header(ctypes.Structure):
+ _pack_ = 1
+ _fields_ = [ ("transid", ctypes.c_ulonglong),
+ ("objectid", ctypes.c_ulonglong),
+ ("offset", ctypes.c_ulonglong),
+ ("type", ctypes.c_uint),
+ ("len", ctypes.c_uint),
+ ]
+
+# the type field in btrfs_file_extent_item
+BTRFS_FILE_EXTENT_INLINE = 0
+BTRFS_FILE_EXTENT_REG = 1
+BTRFS_FILE_EXTENT_PREALLOC = 2
+
+class btrfs_file_extent_item(ctypes.LittleEndianStructure):
+ _pack_ = 1
+ _fields_ = [ ("generation", ctypes.c_ulonglong),
+ ("ram_bytes", ctypes.c_ulonglong),
+ ("compression", ctypes.c_ubyte),
+ ("encryption", ctypes.c_ubyte),
+ ("other_encoding", ctypes.c_ubyte * 2),
+ ("type", ctypes.c_ubyte),
+ ("disk_bytenr", ctypes.c_ulonglong),
+ ("disk_num_bytes", ctypes.c_ulonglong),
+ ("offset", ctypes.c_ulonglong),
+ ("num_bytes", ctypes.c_ulonglong),
+ ]
+
+class btrfs_ioctl_search():
+ def __init__(self):
+ self.args = btrfs_ioctl_search_args()
+ self.args.tree_id = 0
+ self.args.min_objectid = 0
+ self.args.max_objectid = maxu64
+ self.args.min_offset = 0
+ self.args.max_offset = maxu64
+ self.args.min_transid = 0
+ self.args.max_transid = maxu64
+ self.args.min_type = 0
+ self.args.max_type = maxu32
+ self.args.nr_items = 0
+ self.args.buf_size = args_buffer_size
+
+ # magic encoded for x86_64 this is the v2 search ioctl
+ self.ioctl_num = 3228603409L
+
+ # the results of the search get stored into args.buf
+ def search(self, fd, nritems=65536):
+ self.args.nr_items = nritems
+ fcntl.ioctl(fd, self.ioctl_num, self.args, 1)
+
+# this moves the search key forward by one. If the end result is
+# still a valid search key (all mins less than all maxes), we return
+# True. Otherwise False
+#
+def advance_search(search):
+ if search.args.min_offset < maxu64:
+ search.args.min_offset += 1
+ elif search.args.min_type < 255:
+ search.args.min_type += 1
+ elif search.args.min_objectid < maxu64:
+ search.args.min_objectid += 1
+ else:
+ return False
+
+ if search.args.min_offset > search.args.max_offset:
+ return False
+ if search.args.min_type > search.args.max_type:
+ return False
+ if search.args.min_objectid > search.args.max_objectid:
+ return False
+
+ return True
+
+# given one search_header and one file_item, print the details. This
+# also tosses the [disk_bytenr,disk_num_bytes] into extent_hash to record
+# which extents were used by this file
+#
+def print_one_extent(header, fi, extent_hash):
+ # we're ignoring inline items for now
+ if fi.type == BTRFS_FILE_EXTENT_INLINE:
+ # header.len is the length of the item returned. We subtract
+ # the part of the file item header that is actually used (21 bytes)
+ # and we get the length of the inlined data.
+ # this may or may not be compressed
+ inline_len = header.len - 21
+ if fi.compression:
+ ram_bytes = fi.ram_bytes
+ else:
+ ram_bytes = inline_len
+ print "(%Lu %Lu): ram %Lu disk 0 disk_size %Lu -- inline" % \
+ (header.objectid, header.offset, ram_bytes, inline_len)
+ extent_hash[-1] = inline_len
+ return
+
+ if fi.disk_bytenr == 0:
+ tag = " -- hole"
+ else:
+ tag = ""
+ print "(%Lu %Lu): ram %Lu disk %Lu disk_size %Lu%s" % (header.objectid,
+ header.offset, fi.num_bytes, fi.disk_bytenr, fi.disk_num_bytes, tag)
+
+ if fi.disk_bytenr:
+ extent_hash[fi.disk_bytenr] = fi.disk_num_bytes
+
+# open 'filename' and run the search ioctl against it, printing all the extents
+# we find
+def print_file_extents(filename):
+ extent_hash = {}
+
+ s = btrfs_ioctl_search()
+ s.args.min_type = BTRFS_EXTENT_DATA_KEY
+ s.args.max_type = BTRFS_EXTENT_DATA_KEY
+
+ try:
+ fd = os.open(filename, os.O_RDONLY)
+ st = os.fstat(fd)
+ except Exception, e:
+ sys.stderr.write("Failed to open %s (%s)\n" % (filename, e))
+ return -1
+
+ if not stat.S_ISREG(st.st_mode):
+ sys.stderr.write("%s not a regular file\n" % filename)
+ return 0
+
+ s.args.min_objectid = st.st_ino
+ s.args.max_objectid = st.st_ino
+
+ size = st.st_size
+
+ while True:
+ try:
+ s.search(fd)
+ except Exception, e:
+ sys.stderr.write("Search ioctl failed for %s (%s)\n" % (filename, e))
+ return -1
+
+ if s.args.nr_items == 0:
+ break
+
+ # p is the results buffer from the kernel
+ p = ctypes.addressof(s.args.buf)
+ header = btrfs_ioctl_search_header()
+ header_size = ctypes.sizeof(header)
+ h = ctypes.addressof(header)
+ p_left = args_buffer_size
+
+ for x in xrange(0, s.args.nr_items):
+ # for each item, copy the header from the buffer into
+ # our header struct.
+ ctypes.memmove(h, p, header_size)
+ p += header_size
+ p_left -= header_size
+
+ # this would be a kernel bug it shouldn't be sending malformed
+ # items
+ if p_left <= 0:
+ break
+
+ if header.type == BTRFS_EXTENT_DATA_KEY:
+ fi = btrfs_file_extent_item()
+
+ # this would also be a kernel bug
+ if p_left < ctypes.sizeof(fi):
+ break
+
+ # Copy the file item out of the results buffer
+ ctypes.memmove(ctypes.addressof(fi), p, ctypes.sizeof(fi))
+ print_one_extent(header, fi, extent_hash)
+
+ p += header.len
+ p_left -= header.len
+ if p_left <= 0:
+ break
+
+ s.args.min_offset = header.offset
+
+ if not advance_search(s):
+ break
+
+ total_on_disk = 0
+ total_extents = 0
+ for x in extent_hash.itervalues():
+ total_on_disk += x
+ total_extents += 1
+
+ # don't divide by zero
+ if total_on_disk == 0:
+ total_on_disk = 1
+
+ print "file: %s extents %Lu disk size %Lu logical size %Lu ratio %.2f" % \
+ (filename, total_extents, total_on_disk, st.st_size,
+ float(st.st_size) / float(total_on_disk))
+ return 0
+
+if len(sys.argv) == 1:
+ sys.stderr.write("Usage: btrfs-debug filename ...\n")
+ sys.exit(1)
+
+for f in sys.argv[1:]:
+ print_file_extents(f)
diff --git a/btrfs-find-root.c b/btrfs-find-root.c
new file mode 100644
index 00000000..2d5bbb2a
--- /dev/null
+++ b/btrfs-find-root.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2011 Red Hat. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <zlib.h>
+#include <getopt.h>
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "volumes.h"
+#include "utils.h"
+#include "crc32c.h"
+#include "extent-cache.h"
+#include "find-root.h"
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: find-roots [-a] [-o search_objectid] "
+ "[ -g search_generation ] [ -l search_level ] <device>\n");
+}
+
+/*
+ * Get reliable generation and level for given root.
+ *
+ * We have two sources of gen/level: superblock and tree root.
+ * superblock include the following level:
+ * Root, chunk, log
+ * and the following generations:
+ * Root, chunk, uuid
+ * Other gen/leven can only be read from its btrfs_tree_root if possible.
+ *
+ * Currently we only believe things from superblock.
+ */
+static void get_root_gen_and_level(u64 objectid, struct btrfs_fs_info *fs_info,
+ u64 *ret_gen, u8 *ret_level)
+{
+ struct btrfs_super_block *super = fs_info->super_copy;
+ u64 gen = (u64)-1;
+ u8 level = (u8)-1;
+
+ switch (objectid) {
+ case BTRFS_ROOT_TREE_OBJECTID:
+ level = btrfs_super_root_level(super);
+ gen = btrfs_super_generation(super);
+ break;
+ case BTRFS_CHUNK_TREE_OBJECTID:
+ level = btrfs_super_chunk_root_level(super);
+ gen = btrfs_super_chunk_root_generation(super);
+ break;
+ case BTRFS_TREE_LOG_OBJECTID:
+ level = btrfs_super_log_root_level(super);
+ gen = btrfs_super_log_root_transid(super);
+ break;
+ case BTRFS_UUID_TREE_OBJECTID:
+ gen = btrfs_super_uuid_tree_generation(super);
+ break;
+ }
+ if (gen != (u64)-1) {
+ printf("Superblock thinks the generation is %llu\n", gen);
+ if (ret_gen)
+ *ret_gen = gen;
+ } else {
+ printf("Superblock doesn't contain generation info for root %llu\n",
+ objectid);
+ }
+ if (level != (u8)-1) {
+ printf("Superblock thinks the level is %u\n", level);
+ if (ret_level)
+ *ret_level = level;
+ } else {
+ printf("Superblock doesn't contain the level info for root %llu\n",
+ objectid);
+ }
+}
+
+static void print_one_result(struct cache_extent *tree_block,
+ u8 level, u64 generation,
+ struct btrfs_find_root_filter *filter)
+{
+ int unsure = 0;
+
+ if (filter->match_gen == (u64)-1 || filter->match_level == (u8)-1)
+ unsure = 1;
+ printf("Well block %llu(gen: %llu level: %u) seems good, ",
+ tree_block->start, generation, level);
+ if (unsure)
+ printf("but we are unsure about the correct generation/level\n");
+ else if (level == filter->match_level &&
+ generation == filter->match_gen)
+ printf("and it matches superblock\n");
+ else
+ printf("but generation/level doesn't match, want gen: %llu level: %u\n",
+ filter->match_gen, filter->match_level);
+}
+
+static void print_find_root_result(struct cache_tree *result,
+ struct btrfs_find_root_filter *filter)
+{
+ struct btrfs_find_root_gen_cache *gen_cache;
+ struct cache_extent *cache;
+ struct cache_extent *tree_block;
+ u64 generation = 0;
+ u8 level = 0;
+
+ for (cache = last_cache_extent(result);
+ cache; cache = prev_cache_extent(cache)) {
+ gen_cache = container_of(cache,
+ struct btrfs_find_root_gen_cache, cache);
+ level = gen_cache->highest_level;
+ generation = cache->start;
+ /* For exact found one, skip it as it's output before */
+ if (level == filter->match_level &&
+ generation == filter->match_gen &&
+ !filter->search_all)
+ continue;
+ for (tree_block = last_cache_extent(&gen_cache->eb_tree);
+ tree_block; tree_block = prev_cache_extent(tree_block))
+ print_one_result(tree_block, level, generation, filter);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_find_root_filter filter = {0};
+ struct cache_tree result;
+ struct cache_extent *found;
+ int ret;
+
+ /* Default to search root tree */
+ filter.objectid = BTRFS_ROOT_TREE_OBJECTID;
+ filter.match_gen = (u64)-1;
+ filter.match_level = (u8)-1;
+ while (1) {
+ static const struct option long_options[] = {
+ { "help", no_argument, NULL, GETOPT_VAL_HELP},
+ { NULL, 0, NULL, 0 }
+ };
+ int c = getopt_long(argc, argv, "al:o:g:", long_options, NULL);
+
+ if (c < 0)
+ break;
+
+ switch (c) {
+ case 'a':
+ filter.search_all = 1;
+ break;
+ case 'o':
+ filter.objectid = arg_strtou64(optarg);
+ break;
+ case 'g':
+ filter.generation = arg_strtou64(optarg);
+ break;
+ case 'l':
+ filter.level = arg_strtou64(optarg);
+ break;
+ case GETOPT_VAL_HELP:
+ default:
+ usage();
+ exit(c != GETOPT_VAL_HELP);
+ }
+ }
+
+ set_argv0(argv);
+ argc = argc - optind;
+ if (check_argc_min(argc, 1)) {
+ usage();
+ exit(1);
+ }
+
+ fs_info = open_ctree_fs_info(argv[optind], 0, 0,
+ OPEN_CTREE_CHUNK_ROOT_ONLY |
+ OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR);
+ if (!fs_info) {
+ error("open ctree failed");
+ exit(1);
+ }
+ cache_tree_init(&result);
+
+ get_root_gen_and_level(filter.objectid, fs_info,
+ &filter.match_gen, &filter.match_level);
+ ret = btrfs_find_root_search(fs_info, &filter, &result, &found);
+ if (ret < 0) {
+ fprintf(stderr, "Fail to search the tree root: %s\n",
+ strerror(-ret));
+ goto out;
+ }
+ if (ret > 0) {
+ printf("Found tree root at %llu gen %llu level %u\n",
+ found->start, filter.match_gen, filter.match_level);
+ ret = 0;
+ }
+ print_find_root_result(&result, &filter);
+out:
+ btrfs_find_root_free(&result);
+ close_ctree_fs_info(fs_info);
+ btrfs_close_all_devices();
+ return ret;
+}
diff --git a/btrfs-fragments.c b/btrfs-fragments.c
new file mode 100644
index 00000000..17768c3f
--- /dev/null
+++ b/btrfs-fragments.c
@@ -0,0 +1,452 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <limits.h>
+#include <uuid/uuid.h>
+#include <ctype.h>
+
+#include <gd.h>
+
+#undef ULONG_MAX
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "ioctl.h"
+#include "utils.h"
+
+static int use_color;
+static void
+push_im(gdImagePtr im, char *name, char *dir)
+{
+ char fullname[2000];
+ FILE *pngout;
+
+ if (!im)
+ return;
+
+ snprintf(fullname, sizeof(fullname), "%s/%s", dir, name);
+ pngout = fopen(fullname, "w");
+ if (!pngout) {
+ printf("unable to create file %s\n", fullname);
+ exit(1);
+ }
+
+ gdImagePng(im, pngout);
+
+ fclose(pngout);
+ gdImageDestroy(im);
+}
+
+static char *
+chunk_type(u64 flags)
+{
+ switch (flags & (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_DATA |
+ BTRFS_BLOCK_GROUP_METADATA)) {
+ case BTRFS_BLOCK_GROUP_SYSTEM:
+ return "system";
+ case BTRFS_BLOCK_GROUP_DATA:
+ return "data";
+ case BTRFS_BLOCK_GROUP_METADATA:
+ return "metadata";
+ case BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA:
+ return "mixed";
+ default:
+ return "invalid";
+ }
+}
+
+static void
+print_bg(FILE *html, char *name, u64 start, u64 len, u64 used, u64 flags,
+ u64 areas)
+{
+ double frag = (double)areas / (len / 4096) * 2;
+
+ fprintf(html, "<p>%s chunk starts at %lld, size is %s, %.2f%% used, "
+ "%.2f%% fragmented</p>\n", chunk_type(flags), start,
+ pretty_size(len), 100.0 * used / len, 100.0 * frag);
+ fprintf(html, "<img src=\"%s\" border=\"1\" />\n", name);
+}
+
+enum tree_colors {
+ COLOR_ROOT = 0,
+ COLOR_EXTENT,
+ COLOR_CHUNK,
+ COLOR_DEV,
+ COLOR_FS,
+ COLOR_CSUM,
+ COLOR_RELOC,
+ COLOR_DATA,
+ COLOR_UNKNOWN,
+ COLOR_MAX
+};
+
+static int
+get_color(struct btrfs_extent_item *item, int len)
+{
+ u64 refs;
+ u64 flags;
+ u8 type;
+ u64 offset;
+ struct btrfs_extent_inline_ref *ref;
+
+ refs = btrfs_stack_extent_refs(item);
+ flags = btrfs_stack_extent_flags(item);
+
+ if (flags & BTRFS_EXTENT_FLAG_DATA)
+ return COLOR_DATA;
+ if (refs > 1) {
+ /* this must be an fs tree */
+ return COLOR_FS;
+ }
+
+ ref = (void *)item + sizeof(struct btrfs_extent_item) +
+ sizeof(struct btrfs_tree_block_info);
+ type = btrfs_stack_extent_inline_ref_type(ref);
+ offset = btrfs_stack_extent_inline_ref_offset(ref);
+
+ switch (type) {
+ case BTRFS_EXTENT_DATA_REF_KEY:
+ return COLOR_DATA;
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ case BTRFS_SHARED_DATA_REF_KEY:
+ return COLOR_FS;
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ break;
+ default:
+ return COLOR_UNKNOWN;
+ }
+
+ switch (offset) {
+ case BTRFS_ROOT_TREE_OBJECTID:
+ return COLOR_ROOT;
+ case BTRFS_EXTENT_TREE_OBJECTID:
+ return COLOR_EXTENT;
+ case BTRFS_CHUNK_TREE_OBJECTID:
+ return COLOR_CHUNK;
+ case BTRFS_DEV_TREE_OBJECTID:
+ return COLOR_DEV;
+ case BTRFS_FS_TREE_OBJECTID:
+ return COLOR_FS;
+ case BTRFS_CSUM_TREE_OBJECTID:
+ return COLOR_CSUM;
+ case BTRFS_DATA_RELOC_TREE_OBJECTID:
+ return COLOR_RELOC;
+ }
+
+ return COLOR_UNKNOWN;
+}
+
+static void
+init_colors(gdImagePtr im, int *colors)
+{
+ colors[COLOR_ROOT] = gdImageColorAllocate(im, 255, 0, 0);
+ colors[COLOR_EXTENT] = gdImageColorAllocate(im, 0, 255, 0);
+ colors[COLOR_CHUNK] = gdImageColorAllocate(im, 255, 0, 0);
+ colors[COLOR_DEV] = gdImageColorAllocate(im, 255, 0, 0);
+ colors[COLOR_FS] = gdImageColorAllocate(im, 0, 0, 0);
+ colors[COLOR_CSUM] = gdImageColorAllocate(im, 0, 0, 255);
+ colors[COLOR_RELOC] = gdImageColorAllocate(im, 128, 128, 128);
+ colors[COLOR_DATA] = gdImageColorAllocate(im, 100, 0, 0);
+ colors[COLOR_UNKNOWN] = gdImageColorAllocate(im, 50, 50, 50);
+}
+
+int
+list_fragments(int fd, u64 flags, char *dir)
+{
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ int i;
+ struct btrfs_ioctl_search_header *sh;
+ unsigned long off = 0;
+ int bgnum = 0;
+ u64 bgstart = 0;
+ u64 bglen = 0;
+ u64 bgend = 0;
+ u64 bgflags = 0;
+ u64 bgused = 0;
+ u64 saved_extent = 0;
+ u64 saved_len = 0;
+ int saved_color = 0;
+ u64 last_end = 0;
+ u64 areas = 0;
+ long px;
+ char name[1000];
+ FILE *html;
+ int colors[COLOR_MAX];
+
+ gdImagePtr im = NULL;
+ int black = 0;
+ int width = 800;
+
+ snprintf(name, sizeof(name), "%s/index.html", dir);
+ html = fopen(name, "w");
+ if (!html) {
+ printf("unable to create %s\n", name);
+ exit(1);
+ }
+
+ fprintf(html, "<html><header>\n");
+ fprintf(html, "<title>Btrfs Block Group Allocation Map</title>\n");
+ fprintf(html, "<style type=\"text/css\">\n");
+ fprintf(html, "img {margin-left: 1em; margin-bottom: 2em;}\n");
+ fprintf(html, "</style>\n");
+ fprintf(html, "</header><body>\n");
+
+ memset(&args, 0, sizeof(args));
+
+ sk->tree_id = 2;
+ sk->max_type = -1;
+ sk->min_type = 0;
+ sk->max_objectid = (u64)-1;
+ sk->max_offset = (u64)-1;
+ sk->max_transid = (u64)-1;
+
+ /* just a big number, doesn't matter much */
+ sk->nr_items = 4096;
+
+ while(1) {
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: can't perform the search\n");
+ goto out_close;
+ }
+ /* the ioctl returns the number of item it found in nr_items */
+ if (sk->nr_items == 0)
+ break;
+
+ off = 0;
+ for (i = 0; i < sk->nr_items; i++) {
+ int j;
+
+ sh = (struct btrfs_ioctl_search_header *)(args.buf +
+ off);
+ off += sizeof(*sh);
+ if (sh->type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
+ struct btrfs_block_group_item *bg;
+
+ if (im) {
+ push_im(im, name, dir);
+ im = NULL;
+
+ print_bg(html, name, bgstart, bglen,
+ bgused, bgflags, areas);
+ }
+
+ ++bgnum;
+
+ bg = (struct btrfs_block_group_item *)
+ (args.buf + off);
+ bgflags = btrfs_block_group_flags(bg);
+ bgused = btrfs_block_group_used(bg);
+
+ printf("found block group %lld len %lld "
+ "flags %lld\n", sh->objectid,
+ sh->offset, bgflags);
+ if (!(bgflags & flags)) {
+ /* skip this block group */
+ sk->min_objectid = sh->objectid +
+ sh->offset;
+ sk->min_type = 0;
+ sk->min_offset = 0;
+ break;
+ }
+ im = gdImageCreate(width,
+ (sh->offset / 4096 + 799) / width);
+
+ black = gdImageColorAllocate(im, 0, 0, 0);
+
+ for (j = 0; j < ARRAY_SIZE(colors); ++j)
+ colors[j] = black;
+
+ init_colors(im, colors);
+ bgstart = sh->objectid;
+ bglen = sh->offset;
+ bgend = bgstart + bglen;
+
+ snprintf(name, sizeof(name), "bg%d.png", bgnum);
+
+ last_end = bgstart;
+ if (saved_len) {
+ px = (saved_extent - bgstart) / 4096;
+ for (j = 0; j < saved_len / 4096; ++j) {
+ int x = (px + j) % width;
+ int y = (px + j) / width;
+ gdImageSetPixel(im, x, y,
+ saved_color);
+ }
+ last_end += saved_len;
+ }
+ areas = 0;
+ saved_len = 0;
+ }
+ if (im && sh->type == BTRFS_EXTENT_ITEM_KEY) {
+ int c;
+ struct btrfs_extent_item *item;
+
+ item = (struct btrfs_extent_item *)
+ (args.buf + off);
+
+ if (use_color)
+ c = colors[get_color(item, sh->len)];
+ else
+ c = black;
+ if (sh->objectid > bgend) {
+ printf("WARN: extent %lld is without "
+ "block group\n", sh->objectid);
+ goto skip;
+ }
+ if (sh->objectid == bgend) {
+ saved_extent = sh->objectid;
+ saved_len = sh->offset;
+ saved_color = c;
+ goto skip;
+ }
+ px = (sh->objectid - bgstart) / 4096;
+ for (j = 0; j < sh->offset / 4096; ++j) {
+ int x = (px + j) % width;
+ int y = (px + j) / width;
+ gdImageSetPixel(im, x, y, c);
+ }
+ if (sh->objectid != last_end)
+ ++areas;
+ last_end = sh->objectid + sh->offset;
+skip:;
+ }
+ off += sh->len;
+
+ /*
+ * record the mins in sk so we can make sure the
+ * next search doesn't repeat this root
+ */
+ sk->min_objectid = sh->objectid;
+ sk->min_type = sh->type;
+ sk->min_offset = sh->offset;
+ }
+ sk->nr_items = 4096;
+
+ /* increment by one */
+ if (++sk->min_offset == 0)
+ if (++sk->min_type == 0)
+ if (++sk->min_objectid == 0)
+ break;
+ }
+
+ if (im) {
+ push_im(im, name, dir);
+ print_bg(html, name, bgstart, bglen, bgused, bgflags, areas);
+ }
+
+ if (use_color) {
+ fprintf(html, "<p>");
+ fprintf(html, "data - dark red, ");
+ fprintf(html, "fs tree - black, ");
+ fprintf(html, "extent tree - green, ");
+ fprintf(html, "csum tree - blue, ");
+ fprintf(html, "reloc tree - grey, ");
+ fprintf(html, "other trees - red, ");
+ fprintf(html, "unknown tree - dark grey");
+ fprintf(html, "</p>");
+ }
+ fprintf(html, "</body></html>\n");
+
+out_close:
+ fclose(html);
+
+ return ret;
+}
+
+void
+usage(void)
+{
+ printf("usage: btrfs-fragments [options] <path>\n");
+ printf(" -c use color\n");
+ printf(" -d print data chunks\n");
+ printf(" -m print metadata chunks\n");
+ printf(" -s print system chunks\n");
+ printf(" (default is data+metadata)\n");
+ printf(" -o <dir> output directory, default is html\n");
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ char *path;
+ int fd;
+ int ret;
+ u64 flags = 0;
+ char *dir = "html";
+ DIR *dirstream = NULL;
+
+ while (1) {
+ int c = getopt(argc, argv, "cmso:h");
+ if (c < 0)
+ break;
+ switch (c) {
+ case 'c':
+ use_color = 1;
+ break;
+ case 'd':
+ flags |= BTRFS_BLOCK_GROUP_DATA;
+ break;
+ case 'm':
+ flags |= BTRFS_BLOCK_GROUP_METADATA;
+ break;
+ case 's':
+ flags |= BTRFS_BLOCK_GROUP_SYSTEM;
+ break;
+ case 'o':
+ dir = optarg;
+ break;
+ case 'h':
+ default:
+ usage();
+ }
+ }
+
+ set_argv0(argv);
+ argc = argc - optind;
+ if (check_argc_min(argc, 1)) {
+ usage();
+ exit(1);
+ }
+
+ path = argv[optind++];
+
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ exit(1);
+
+ if (flags == 0)
+ flags = BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA;
+
+ ret = list_fragments(fd, flags, dir);
+ close_file_or_dir(fd, dirstream);
+ if (ret)
+ exit(1);
+
+ exit(0);
+}
diff --git a/btrfs-image.c b/btrfs-image.c
new file mode 100644
index 00000000..c7fa18fb
--- /dev/null
+++ b/btrfs-image.c
@@ -0,0 +1,2880 @@
+/*
+ * Copyright (C) 2008 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <zlib.h>
+#include <getopt.h>
+
+#include "kerncompat.h"
+#include "crc32c.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "utils.h"
+#include "volumes.h"
+#include "extent_io.h"
+
+#define HEADER_MAGIC 0xbd5c25e27295668bULL
+#define MAX_PENDING_SIZE (256 * 1024)
+#define BLOCK_SIZE 1024
+#define BLOCK_MASK (BLOCK_SIZE - 1)
+
+#define COMPRESS_NONE 0
+#define COMPRESS_ZLIB 1
+
+struct meta_cluster_item {
+ __le64 bytenr;
+ __le32 size;
+} __attribute__ ((__packed__));
+
+struct meta_cluster_header {
+ __le64 magic;
+ __le64 bytenr;
+ __le32 nritems;
+ u8 compress;
+} __attribute__ ((__packed__));
+
+/* cluster header + index items + buffers */
+struct meta_cluster {
+ struct meta_cluster_header header;
+ struct meta_cluster_item items[];
+} __attribute__ ((__packed__));
+
+#define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \
+ sizeof(struct meta_cluster_item))
+
+struct fs_chunk {
+ u64 logical;
+ u64 physical;
+ u64 bytes;
+ struct rb_node l;
+ struct rb_node p;
+ struct list_head list;
+};
+
+struct async_work {
+ struct list_head list;
+ struct list_head ordered;
+ u64 start;
+ u64 size;
+ u8 *buffer;
+ size_t bufsize;
+ int error;
+};
+
+struct metadump_struct {
+ struct btrfs_root *root;
+ FILE *out;
+
+ struct meta_cluster *cluster;
+
+ pthread_t *threads;
+ size_t num_threads;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ struct rb_root name_tree;
+
+ struct list_head list;
+ struct list_head ordered;
+ size_t num_items;
+ size_t num_ready;
+
+ u64 pending_start;
+ u64 pending_size;
+
+ int compress_level;
+ int done;
+ int data;
+ int sanitize_names;
+
+ int error;
+};
+
+struct name {
+ struct rb_node n;
+ char *val;
+ char *sub;
+ u32 len;
+};
+
+struct mdrestore_struct {
+ FILE *in;
+ FILE *out;
+
+ pthread_t *threads;
+ size_t num_threads;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ struct rb_root chunk_tree;
+ struct rb_root physical_tree;
+ struct list_head list;
+ struct list_head overlapping_chunks;
+ size_t num_items;
+ u32 leafsize;
+ u64 devid;
+ u64 alloced_chunks;
+ u64 last_physical_offset;
+ u8 uuid[BTRFS_UUID_SIZE];
+ u8 fsid[BTRFS_FSID_SIZE];
+
+ int compress_method;
+ int done;
+ int error;
+ int old_restore;
+ int fixup_offset;
+ int multi_devices;
+ int clear_space_cache;
+ struct btrfs_fs_info *info;
+};
+
+static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
+ u64 search, u64 cluster_bytenr);
+static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
+
+static void csum_block(u8 *buf, size_t len)
+{
+ char result[BTRFS_CRC32_SIZE];
+ u32 crc = ~(u32)0;
+ crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
+ btrfs_csum_final(crc, result);
+ memcpy(buf, result, BTRFS_CRC32_SIZE);
+}
+
+static int has_name(struct btrfs_key *key)
+{
+ switch (key->type) {
+ case BTRFS_DIR_ITEM_KEY:
+ case BTRFS_DIR_INDEX_KEY:
+ case BTRFS_INODE_REF_KEY:
+ case BTRFS_INODE_EXTREF_KEY:
+ case BTRFS_XATTR_ITEM_KEY:
+ return 1;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static char *generate_garbage(u32 name_len)
+{
+ char *buf = malloc(name_len);
+ int i;
+
+ if (!buf)
+ return NULL;
+
+ for (i = 0; i < name_len; i++) {
+ char c = rand() % 94 + 33;
+
+ if (c == '/')
+ c++;
+ buf[i] = c;
+ }
+
+ return buf;
+}
+
+static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
+{
+ struct name *entry = rb_entry(a, struct name, n);
+ struct name *ins = rb_entry(b, struct name, n);
+ u32 len;
+
+ len = min(ins->len, entry->len);
+ return memcmp(ins->val, entry->val, len);
+}
+
+static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
+{
+ struct fs_chunk *entry = rb_entry(a, struct fs_chunk, l);
+ struct fs_chunk *ins = rb_entry(b, struct fs_chunk, l);
+
+ if (fuzz && ins->logical >= entry->logical &&
+ ins->logical < entry->logical + entry->bytes)
+ return 0;
+
+ if (ins->logical < entry->logical)
+ return -1;
+ else if (ins->logical > entry->logical)
+ return 1;
+ return 0;
+}
+
+static int physical_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
+{
+ struct fs_chunk *entry = rb_entry(a, struct fs_chunk, p);
+ struct fs_chunk *ins = rb_entry(b, struct fs_chunk, p);
+
+ if (fuzz && ins->physical >= entry->physical &&
+ ins->physical < entry->physical + entry->bytes)
+ return 0;
+
+ if (fuzz && entry->physical >= ins->physical &&
+ entry->physical < ins->physical + ins->bytes)
+ return 0;
+
+ if (ins->physical < entry->physical)
+ return -1;
+ else if (ins->physical > entry->physical)
+ return 1;
+ return 0;
+}
+
+static void tree_insert(struct rb_root *root, struct rb_node *ins,
+ int (*cmp)(struct rb_node *a, struct rb_node *b,
+ int fuzz))
+{
+ struct rb_node ** p = &root->rb_node;
+ struct rb_node * parent = NULL;
+ int dir;
+
+ while(*p) {
+ parent = *p;
+
+ dir = cmp(*p, ins, 1);
+ if (dir < 0)
+ p = &(*p)->rb_left;
+ else if (dir > 0)
+ p = &(*p)->rb_right;
+ else
+ BUG();
+ }
+
+ rb_link_node(ins, parent, p);
+ rb_insert_color(ins, root);
+}
+
+static struct rb_node *tree_search(struct rb_root *root,
+ struct rb_node *search,
+ int (*cmp)(struct rb_node *a,
+ struct rb_node *b, int fuzz),
+ int fuzz)
+{
+ struct rb_node *n = root->rb_node;
+ int dir;
+
+ while (n) {
+ dir = cmp(n, search, fuzz);
+ if (dir < 0)
+ n = n->rb_left;
+ else if (dir > 0)
+ n = n->rb_right;
+ else
+ return n;
+ }
+
+ return NULL;
+}
+
+static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical, u64 *size)
+{
+ struct fs_chunk *fs_chunk;
+ struct rb_node *entry;
+ struct fs_chunk search;
+ u64 offset;
+
+ if (logical == BTRFS_SUPER_INFO_OFFSET)
+ return logical;
+
+ search.logical = logical;
+ entry = tree_search(&mdres->chunk_tree, &search.l, chunk_cmp, 1);
+ if (!entry) {
+ if (mdres->in != stdin)
+ printf("Couldn't find a chunk, using logical\n");
+ return logical;
+ }
+ fs_chunk = rb_entry(entry, struct fs_chunk, l);
+ if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical)
+ BUG();
+ offset = search.logical - fs_chunk->logical;
+
+ *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical);
+ return fs_chunk->physical + offset;
+}
+
+
+static char *find_collision(struct metadump_struct *md, char *name,
+ u32 name_len)
+{
+ struct name *val;
+ struct rb_node *entry;
+ struct name tmp;
+ unsigned long checksum;
+ int found = 0;
+ int i;
+
+ tmp.val = name;
+ tmp.len = name_len;
+ entry = tree_search(&md->name_tree, &tmp.n, name_cmp, 0);
+ if (entry) {
+ val = rb_entry(entry, struct name, n);
+ free(name);
+ return val->sub;
+ }
+
+ val = malloc(sizeof(struct name));
+ if (!val) {
+ fprintf(stderr, "Couldn't sanitize name, enomem\n");
+ free(name);
+ return NULL;
+ }
+
+ memset(val, 0, sizeof(*val));
+
+ val->val = name;
+ val->len = name_len;
+ val->sub = malloc(name_len);
+ if (!val->sub) {
+ fprintf(stderr, "Couldn't sanitize name, enomem\n");
+ free(val);
+ free(name);
+ return NULL;
+ }
+
+ checksum = crc32c(~1, val->val, name_len);
+ memset(val->sub, ' ', name_len);
+ i = 0;
+ while (1) {
+ if (crc32c(~1, val->sub, name_len) == checksum &&
+ memcmp(val->sub, val->val, val->len)) {
+ found = 1;
+ break;
+ }
+
+ if (val->sub[i] == 127) {
+ do {
+ i++;
+ if (i >= name_len)
+ break;
+ } while (val->sub[i] == 127);
+
+ if (i >= name_len)
+ break;
+ val->sub[i]++;
+ if (val->sub[i] == '/')
+ val->sub[i]++;
+ memset(val->sub, ' ', i);
+ i = 0;
+ continue;
+ } else {
+ val->sub[i]++;
+ if (val->sub[i] == '/')
+ val->sub[i]++;
+ }
+ }
+
+ if (!found) {
+ fprintf(stderr, "Couldn't find a collision for '%.*s', "
+ "generating normal garbage, it won't match indexes\n",
+ val->len, val->val);
+ for (i = 0; i < name_len; i++) {
+ char c = rand() % 94 + 33;
+
+ if (c == '/')
+ c++;
+ val->sub[i] = c;
+ }
+ }
+
+ tree_insert(&md->name_tree, &val->n, name_cmp);
+ return val->sub;
+}
+
+static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb,
+ int slot)
+{
+ struct btrfs_dir_item *dir_item;
+ char *buf;
+ char *garbage;
+ unsigned long name_ptr;
+ u32 total_len;
+ u32 cur = 0;
+ u32 this_len;
+ u32 name_len;
+ int free_garbage = (md->sanitize_names == 1);
+
+ dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
+ total_len = btrfs_item_size_nr(eb, slot);
+ while (cur < total_len) {
+ this_len = sizeof(*dir_item) +
+ btrfs_dir_name_len(eb, dir_item) +
+ btrfs_dir_data_len(eb, dir_item);
+ name_ptr = (unsigned long)(dir_item + 1);
+ name_len = btrfs_dir_name_len(eb, dir_item);
+
+ if (md->sanitize_names > 1) {
+ buf = malloc(name_len);
+ if (!buf) {
+ fprintf(stderr, "Couldn't sanitize name, "
+ "enomem\n");
+ return;
+ }
+ read_extent_buffer(eb, buf, name_ptr, name_len);
+ garbage = find_collision(md, buf, name_len);
+ } else {
+ garbage = generate_garbage(name_len);
+ }
+ if (!garbage) {
+ fprintf(stderr, "Couldn't sanitize name, enomem\n");
+ return;
+ }
+ write_extent_buffer(eb, garbage, name_ptr, name_len);
+ cur += this_len;
+ dir_item = (struct btrfs_dir_item *)((char *)dir_item +
+ this_len);
+ if (free_garbage)
+ free(garbage);
+ }
+}
+
+static void sanitize_inode_ref(struct metadump_struct *md,
+ struct extent_buffer *eb, int slot, int ext)
+{
+ struct btrfs_inode_extref *extref;
+ struct btrfs_inode_ref *ref;
+ char *garbage, *buf;
+ unsigned long ptr;
+ unsigned long name_ptr;
+ u32 item_size;
+ u32 cur_offset = 0;
+ int len;
+ int free_garbage = (md->sanitize_names == 1);
+
+ item_size = btrfs_item_size_nr(eb, slot);
+ ptr = btrfs_item_ptr_offset(eb, slot);
+ while (cur_offset < item_size) {
+ if (ext) {
+ extref = (struct btrfs_inode_extref *)(ptr +
+ cur_offset);
+ name_ptr = (unsigned long)(&extref->name);
+ len = btrfs_inode_extref_name_len(eb, extref);
+ cur_offset += sizeof(*extref);
+ } else {
+ ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
+ len = btrfs_inode_ref_name_len(eb, ref);
+ name_ptr = (unsigned long)(ref + 1);
+ cur_offset += sizeof(*ref);
+ }
+ cur_offset += len;
+
+ if (md->sanitize_names > 1) {
+ buf = malloc(len);
+ if (!buf) {
+ fprintf(stderr, "Couldn't sanitize name, "
+ "enomem\n");
+ return;
+ }
+ read_extent_buffer(eb, buf, name_ptr, len);
+ garbage = find_collision(md, buf, len);
+ } else {
+ garbage = generate_garbage(len);
+ }
+
+ if (!garbage) {
+ fprintf(stderr, "Couldn't sanitize name, enomem\n");
+ return;
+ }
+ write_extent_buffer(eb, garbage, name_ptr, len);
+ if (free_garbage)
+ free(garbage);
+ }
+}
+
+static void sanitize_xattr(struct metadump_struct *md,
+ struct extent_buffer *eb, int slot)
+{
+ struct btrfs_dir_item *dir_item;
+ unsigned long data_ptr;
+ u32 data_len;
+
+ dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
+ data_len = btrfs_dir_data_len(eb, dir_item);
+
+ data_ptr = (unsigned long)((char *)(dir_item + 1) +
+ btrfs_dir_name_len(eb, dir_item));
+ memset_extent_buffer(eb, 0, data_ptr, data_len);
+}
+
+static void sanitize_name(struct metadump_struct *md, u8 *dst,
+ struct extent_buffer *src, struct btrfs_key *key,
+ int slot)
+{
+ struct extent_buffer *eb;
+
+ eb = alloc_dummy_eb(src->start, src->len);
+ if (!eb) {
+ fprintf(stderr, "Couldn't sanitize name, no memory\n");
+ return;
+ }
+
+ memcpy(eb->data, dst, eb->len);
+
+ switch (key->type) {
+ case BTRFS_DIR_ITEM_KEY:
+ case BTRFS_DIR_INDEX_KEY:
+ sanitize_dir_item(md, eb, slot);
+ break;
+ case BTRFS_INODE_REF_KEY:
+ sanitize_inode_ref(md, eb, slot, 0);
+ break;
+ case BTRFS_INODE_EXTREF_KEY:
+ sanitize_inode_ref(md, eb, slot, 1);
+ break;
+ case BTRFS_XATTR_ITEM_KEY:
+ sanitize_xattr(md, eb, slot);
+ break;
+ default:
+ break;
+ }
+
+ memcpy(dst, eb->data, eb->len);
+ free(eb);
+}
+
+/*
+ * zero inline extents and csum items
+ */
+static void zero_items(struct metadump_struct *md, u8 *dst,
+ struct extent_buffer *src)
+{
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_item *item;
+ struct btrfs_key key;
+ u32 nritems = btrfs_header_nritems(src);
+ size_t size;
+ unsigned long ptr;
+ int i, extent_type;
+
+ for (i = 0; i < nritems; i++) {
+ item = btrfs_item_nr(i);
+ btrfs_item_key_to_cpu(src, &key, i);
+ if (key.type == BTRFS_CSUM_ITEM_KEY) {
+ size = btrfs_item_size_nr(src, i);
+ memset(dst + btrfs_leaf_data(src) +
+ btrfs_item_offset_nr(src, i), 0, size);
+ continue;
+ }
+
+ if (md->sanitize_names && has_name(&key)) {
+ sanitize_name(md, dst, src, &key, i);
+ continue;
+ }
+
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ continue;
+
+ fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
+ extent_type = btrfs_file_extent_type(src, fi);
+ if (extent_type != BTRFS_FILE_EXTENT_INLINE)
+ continue;
+
+ ptr = btrfs_file_extent_inline_start(fi);
+ size = btrfs_file_extent_inline_item_len(src, item);
+ memset(dst + ptr, 0, size);
+ }
+}
+
+/*
+ * copy buffer and zero useless data in the buffer
+ */
+static void copy_buffer(struct metadump_struct *md, u8 *dst,
+ struct extent_buffer *src)
+{
+ int level;
+ size_t size;
+ u32 nritems;
+
+ memcpy(dst, src->data, src->len);
+ if (src->start == BTRFS_SUPER_INFO_OFFSET)
+ return;
+
+ level = btrfs_header_level(src);
+ nritems = btrfs_header_nritems(src);
+
+ if (nritems == 0) {
+ size = sizeof(struct btrfs_header);
+ memset(dst + size, 0, src->len - size);
+ } else if (level == 0) {
+ size = btrfs_leaf_data(src) +
+ btrfs_item_offset_nr(src, nritems - 1) -
+ btrfs_item_nr_offset(nritems);
+ memset(dst + btrfs_item_nr_offset(nritems), 0, size);
+ zero_items(md, dst, src);
+ } else {
+ size = offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nritems;
+ memset(dst + size, 0, src->len - size);
+ }
+ csum_block(dst, src->len);
+}
+
+static void *dump_worker(void *data)
+{
+ struct metadump_struct *md = (struct metadump_struct *)data;
+ struct async_work *async;
+ int ret;
+
+ while (1) {
+ pthread_mutex_lock(&md->mutex);
+ while (list_empty(&md->list)) {
+ if (md->done) {
+ pthread_mutex_unlock(&md->mutex);
+ goto out;
+ }
+ pthread_cond_wait(&md->cond, &md->mutex);
+ }
+ async = list_entry(md->list.next, struct async_work, list);
+ list_del_init(&async->list);
+ pthread_mutex_unlock(&md->mutex);
+
+ if (md->compress_level > 0) {
+ u8 *orig = async->buffer;
+
+ async->bufsize = compressBound(async->size);
+ async->buffer = malloc(async->bufsize);
+ if (!async->buffer) {
+ fprintf(stderr, "Error allocing buffer\n");
+ pthread_mutex_lock(&md->mutex);
+ if (!md->error)
+ md->error = -ENOMEM;
+ pthread_mutex_unlock(&md->mutex);
+ pthread_exit(NULL);
+ }
+
+ ret = compress2(async->buffer,
+ (unsigned long *)&async->bufsize,
+ orig, async->size, md->compress_level);
+
+ if (ret != Z_OK)
+ async->error = 1;
+
+ free(orig);
+ }
+
+ pthread_mutex_lock(&md->mutex);
+ md->num_ready++;
+ pthread_mutex_unlock(&md->mutex);
+ }
+out:
+ pthread_exit(NULL);
+}
+
+static void meta_cluster_init(struct metadump_struct *md, u64 start)
+{
+ struct meta_cluster_header *header;
+
+ md->num_items = 0;
+ md->num_ready = 0;
+ header = &md->cluster->header;
+ header->magic = cpu_to_le64(HEADER_MAGIC);
+ header->bytenr = cpu_to_le64(start);
+ header->nritems = cpu_to_le32(0);
+ header->compress = md->compress_level > 0 ?
+ COMPRESS_ZLIB : COMPRESS_NONE;
+}
+
+static void metadump_destroy(struct metadump_struct *md, int num_threads)
+{
+ int i;
+ struct rb_node *n;
+
+ pthread_mutex_lock(&md->mutex);
+ md->done = 1;
+ pthread_cond_broadcast(&md->cond);
+ pthread_mutex_unlock(&md->mutex);
+
+ for (i = 0; i < num_threads; i++)
+ pthread_join(md->threads[i], NULL);
+
+ pthread_cond_destroy(&md->cond);
+ pthread_mutex_destroy(&md->mutex);
+
+ while ((n = rb_first(&md->name_tree))) {
+ struct name *name;
+
+ name = rb_entry(n, struct name, n);
+ rb_erase(n, &md->name_tree);
+ free(name->val);
+ free(name->sub);
+ free(name);
+ }
+ free(md->threads);
+ free(md->cluster);
+}
+
+static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
+ FILE *out, int num_threads, int compress_level,
+ int sanitize_names)
+{
+ int i, ret = 0;
+
+ memset(md, 0, sizeof(*md));
+ md->cluster = calloc(1, BLOCK_SIZE);
+ if (!md->cluster)
+ return -ENOMEM;
+ md->threads = calloc(num_threads, sizeof(pthread_t));
+ if (!md->threads) {
+ free(md->cluster);
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(&md->list);
+ INIT_LIST_HEAD(&md->ordered);
+ md->root = root;
+ md->out = out;
+ md->pending_start = (u64)-1;
+ md->compress_level = compress_level;
+ md->sanitize_names = sanitize_names;
+ if (sanitize_names > 1)
+ crc32c_optimization_init();
+
+ md->name_tree.rb_node = NULL;
+ md->num_threads = num_threads;
+ pthread_cond_init(&md->cond, NULL);
+ pthread_mutex_init(&md->mutex, NULL);
+ meta_cluster_init(md, 0);
+
+ if (!num_threads)
+ return 0;
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_create(md->threads + i, NULL, dump_worker, md);
+ if (ret)
+ break;
+ }
+
+ if (ret)
+ metadump_destroy(md, i + 1);
+
+ return ret;
+}
+
+static int write_zero(FILE *out, size_t size)
+{
+ static char zero[BLOCK_SIZE];
+ return fwrite(zero, size, 1, out);
+}
+
+static int write_buffers(struct metadump_struct *md, u64 *next)
+{
+ struct meta_cluster_header *header = &md->cluster->header;
+ struct meta_cluster_item *item;
+ struct async_work *async;
+ u64 bytenr = 0;
+ u32 nritems = 0;
+ int ret;
+ int err = 0;
+
+ if (list_empty(&md->ordered))
+ goto out;
+
+ /* wait until all buffers are compressed */
+ while (!err && md->num_items > md->num_ready) {
+ struct timespec ts = {
+ .tv_sec = 0,
+ .tv_nsec = 10000000,
+ };
+ pthread_mutex_unlock(&md->mutex);
+ nanosleep(&ts, NULL);
+ pthread_mutex_lock(&md->mutex);
+ err = md->error;
+ }
+
+ if (err) {
+ fprintf(stderr, "One of the threads errored out %s\n",
+ strerror(err));
+ goto out;
+ }
+
+ /* setup and write index block */
+ list_for_each_entry(async, &md->ordered, ordered) {
+ item = md->cluster->items + nritems;
+ item->bytenr = cpu_to_le64(async->start);
+ item->size = cpu_to_le32(async->bufsize);
+ nritems++;
+ }
+ header->nritems = cpu_to_le32(nritems);
+
+ ret = fwrite(md->cluster, BLOCK_SIZE, 1, md->out);
+ if (ret != 1) {
+ fprintf(stderr, "Error writing out cluster: %d\n", errno);
+ return -EIO;
+ }
+
+ /* write buffers */
+ bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
+ while (!list_empty(&md->ordered)) {
+ async = list_entry(md->ordered.next, struct async_work,
+ ordered);
+ list_del_init(&async->ordered);
+
+ bytenr += async->bufsize;
+ if (!err)
+ ret = fwrite(async->buffer, async->bufsize, 1,
+ md->out);
+ if (ret != 1) {
+ err = -EIO;
+ ret = 0;
+ fprintf(stderr, "Error writing out cluster: %d\n",
+ errno);
+ }
+
+ free(async->buffer);
+ free(async);
+ }
+
+ /* zero unused space in the last block */
+ if (!err && bytenr & BLOCK_MASK) {
+ size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
+
+ bytenr += size;
+ ret = write_zero(md->out, size);
+ if (ret != 1) {
+ fprintf(stderr, "Error zeroing out buffer: %d\n",
+ errno);
+ err = -EIO;
+ }
+ }
+out:
+ *next = bytenr;
+ return err;
+}
+
+static int read_data_extent(struct metadump_struct *md,
+ struct async_work *async)
+{
+ struct btrfs_root *root = md->root;
+ u64 bytes_left = async->size;
+ u64 logical = async->start;
+ u64 offset = 0;
+ u64 read_len;
+ int num_copies;
+ int cur_mirror;
+ int ret;
+
+ num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, logical,
+ bytes_left);
+
+ /* Try our best to read data, just like read_tree_block() */
+ for (cur_mirror = 0; cur_mirror < num_copies; cur_mirror++) {
+ while (bytes_left) {
+ read_len = bytes_left;
+ ret = read_extent_data(root,
+ (char *)(async->buffer + offset),
+ logical, &read_len, cur_mirror);
+ if (ret < 0)
+ break;
+ offset += read_len;
+ logical += read_len;
+ bytes_left -= read_len;
+ }
+ }
+ if (bytes_left)
+ return -EIO;
+ return 0;
+}
+
+static int get_dev_fd(struct btrfs_root *root)
+{
+ struct btrfs_device *dev;
+
+ dev = list_first_entry(&root->fs_info->fs_devices->devices,
+ struct btrfs_device, dev_list);
+ return dev->fd;
+}
+
+static int flush_pending(struct metadump_struct *md, int done)
+{
+ struct async_work *async = NULL;
+ struct extent_buffer *eb;
+ u64 blocksize = md->root->nodesize;
+ u64 start;
+ u64 size;
+ size_t offset;
+ int ret = 0;
+
+ if (md->pending_size) {
+ async = calloc(1, sizeof(*async));
+ if (!async)
+ return -ENOMEM;
+
+ async->start = md->pending_start;
+ async->size = md->pending_size;
+ async->bufsize = async->size;
+ async->buffer = malloc(async->bufsize);
+ if (!async->buffer) {
+ free(async);
+ return -ENOMEM;
+ }
+ offset = 0;
+ start = async->start;
+ size = async->size;
+
+ if (md->data) {
+ ret = read_data_extent(md, async);
+ if (ret) {
+ free(async->buffer);
+ free(async);
+ return ret;
+ }
+ }
+
+ /*
+ * Balance can make the mapping not cover the super block, so
+ * just copy directly from one of the devices.
+ */
+ if (start == BTRFS_SUPER_INFO_OFFSET) {
+ int fd = get_dev_fd(md->root);
+
+ ret = pread64(fd, async->buffer, size, start);
+ if (ret < size) {
+ free(async->buffer);
+ free(async);
+ fprintf(stderr, "Error reading superblock\n");
+ return -EIO;
+ }
+ size = 0;
+ ret = 0;
+ }
+
+ while (!md->data && size > 0) {
+ u64 this_read = min(blocksize, size);
+ eb = read_tree_block(md->root, start, this_read, 0);
+ if (!extent_buffer_uptodate(eb)) {
+ free(async->buffer);
+ free(async);
+ fprintf(stderr,
+ "Error reading metadata block\n");
+ return -EIO;
+ }
+ copy_buffer(md, async->buffer + offset, eb);
+ free_extent_buffer(eb);
+ start += this_read;
+ offset += this_read;
+ size -= this_read;
+ }
+
+ md->pending_start = (u64)-1;
+ md->pending_size = 0;
+ } else if (!done) {
+ return 0;
+ }
+
+ pthread_mutex_lock(&md->mutex);
+ if (async) {
+ list_add_tail(&async->ordered, &md->ordered);
+ md->num_items++;
+ if (md->compress_level > 0) {
+ list_add_tail(&async->list, &md->list);
+ pthread_cond_signal(&md->cond);
+ } else {
+ md->num_ready++;
+ }
+ }
+ if (md->num_items >= ITEMS_PER_CLUSTER || done) {
+ ret = write_buffers(md, &start);
+ if (ret)
+ fprintf(stderr, "Error writing buffers %d\n",
+ errno);
+ else
+ meta_cluster_init(md, start);
+ }
+ pthread_mutex_unlock(&md->mutex);
+ return ret;
+}
+
+static int add_extent(u64 start, u64 size, struct metadump_struct *md,
+ int data)
+{
+ int ret;
+ if (md->data != data ||
+ md->pending_size + size > MAX_PENDING_SIZE ||
+ md->pending_start + md->pending_size != start) {
+ ret = flush_pending(md, 0);
+ if (ret)
+ return ret;
+ md->pending_start = start;
+ }
+ readahead_tree_block(md->root, start, size, 0);
+ md->pending_size += size;
+ md->data = data;
+ return 0;
+}
+
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+static int is_tree_block(struct btrfs_root *extent_root,
+ struct btrfs_path *path, u64 bytenr)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ u64 ref_objectid;
+ int ret;
+
+ leaf = path->nodes[0];
+ while (1) {
+ struct btrfs_extent_ref_v0 *ref_item;
+ path->slots[0]++;
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(extent_root, path);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ break;
+ leaf = path->nodes[0];
+ }
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid != bytenr)
+ break;
+ if (key.type != BTRFS_EXTENT_REF_V0_KEY)
+ continue;
+ ref_item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_ref_v0);
+ ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
+ if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
+ return 1;
+ break;
+ }
+ return 0;
+}
+#endif
+
+static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
+ struct metadump_struct *metadump, int root_tree)
+{
+ struct extent_buffer *tmp;
+ struct btrfs_root_item *ri;
+ struct btrfs_key key;
+ u64 bytenr;
+ int level;
+ int nritems = 0;
+ int i = 0;
+ int ret;
+
+ ret = add_extent(btrfs_header_bytenr(eb), root->leafsize, metadump, 0);
+ if (ret) {
+ fprintf(stderr, "Error adding metadata block\n");
+ return ret;
+ }
+
+ if (btrfs_header_level(eb) == 0 && !root_tree)
+ return 0;
+
+ level = btrfs_header_level(eb);
+ nritems = btrfs_header_nritems(eb);
+ for (i = 0; i < nritems; i++) {
+ if (level == 0) {
+ btrfs_item_key_to_cpu(eb, &key, i);
+ if (key.type != BTRFS_ROOT_ITEM_KEY)
+ continue;
+ ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
+ bytenr = btrfs_disk_root_bytenr(eb, ri);
+ tmp = read_tree_block(root, bytenr, root->leafsize, 0);
+ if (!extent_buffer_uptodate(tmp)) {
+ fprintf(stderr,
+ "Error reading log root block\n");
+ return -EIO;
+ }
+ ret = copy_tree_blocks(root, tmp, metadump, 0);
+ free_extent_buffer(tmp);
+ if (ret)
+ return ret;
+ } else {
+ bytenr = btrfs_node_blockptr(eb, i);
+ tmp = read_tree_block(root, bytenr, root->leafsize, 0);
+ if (!extent_buffer_uptodate(tmp)) {
+ fprintf(stderr, "Error reading log block\n");
+ return -EIO;
+ }
+ ret = copy_tree_blocks(root, tmp, metadump, root_tree);
+ free_extent_buffer(tmp);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int copy_log_trees(struct btrfs_root *root,
+ struct metadump_struct *metadump,
+ struct btrfs_path *path)
+{
+ u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
+
+ if (blocknr == 0)
+ return 0;
+
+ if (!root->fs_info->log_root_tree ||
+ !root->fs_info->log_root_tree->node) {
+ fprintf(stderr, "Error copying tree log, it wasn't setup\n");
+ return -EIO;
+ }
+
+ return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
+ metadump, 1);
+}
+
+static int copy_space_cache(struct btrfs_root *root,
+ struct metadump_struct *metadump,
+ struct btrfs_path *path)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_key key;
+ u64 bytenr, num_bytes;
+ int ret;
+
+ root = root->fs_info->tree_root;
+
+ key.objectid = 0;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Error searching for free space inode %d\n",
+ ret);
+ return ret;
+ }
+
+ leaf = path->nodes[0];
+
+ while (1) {
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0) {
+ fprintf(stderr, "Error going to next leaf "
+ "%d\n", ret);
+ return ret;
+ }
+ if (ret > 0)
+ break;
+ leaf = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.type != BTRFS_EXTENT_DATA_KEY) {
+ path->slots[0]++;
+ continue;
+ }
+
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(leaf, fi) !=
+ BTRFS_FILE_EXTENT_REG) {
+ path->slots[0]++;
+ continue;
+ }
+
+ bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
+ ret = add_extent(bytenr, num_bytes, metadump, 1);
+ if (ret) {
+ fprintf(stderr, "Error adding space cache blocks %d\n",
+ ret);
+ btrfs_release_path(path);
+ return ret;
+ }
+ path->slots[0]++;
+ }
+
+ return 0;
+}
+
+static int copy_from_extent_tree(struct metadump_struct *metadump,
+ struct btrfs_path *path)
+{
+ struct btrfs_root *extent_root;
+ struct extent_buffer *leaf;
+ struct btrfs_extent_item *ei;
+ struct btrfs_key key;
+ u64 bytenr;
+ u64 num_bytes;
+ int ret;
+
+ extent_root = metadump->root->fs_info->extent_root;
+ bytenr = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
+ key.objectid = bytenr;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Error searching extent root %d\n", ret);
+ return ret;
+ }
+ ret = 0;
+
+ leaf = path->nodes[0];
+
+ while (1) {
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(extent_root, path);
+ if (ret < 0) {
+ fprintf(stderr, "Error going to next leaf %d"
+ "\n", ret);
+ break;
+ }
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ leaf = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid < bytenr ||
+ (key.type != BTRFS_EXTENT_ITEM_KEY &&
+ key.type != BTRFS_METADATA_ITEM_KEY)) {
+ path->slots[0]++;
+ continue;
+ }
+
+ bytenr = key.objectid;
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
+ num_bytes = extent_root->leafsize;
+ else
+ num_bytes = key.offset;
+
+ if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
+ ei = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_item);
+ if (btrfs_extent_flags(leaf, ei) &
+ BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ ret = add_extent(bytenr, num_bytes, metadump,
+ 0);
+ if (ret) {
+ fprintf(stderr, "Error adding block "
+ "%d\n", ret);
+ break;
+ }
+ }
+ } else {
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ ret = is_tree_block(extent_root, path, bytenr);
+ if (ret < 0) {
+ fprintf(stderr, "Error checking tree block "
+ "%d\n", ret);
+ break;
+ }
+
+ if (ret) {
+ ret = add_extent(bytenr, num_bytes, metadump,
+ 0);
+ if (ret) {
+ fprintf(stderr, "Error adding block "
+ "%d\n", ret);
+ break;
+ }
+ }
+ ret = 0;
+#else
+ fprintf(stderr, "Either extent tree corruption or "
+ "you haven't built with V0 support\n");
+ ret = -EIO;
+ break;
+#endif
+ }
+ bytenr += num_bytes;
+ }
+
+ btrfs_release_path(path);
+
+ return ret;
+}
+
+static int create_metadump(const char *input, FILE *out, int num_threads,
+ int compress_level, int sanitize, int walk_trees)
+{
+ struct btrfs_root *root;
+ struct btrfs_path *path = NULL;
+ struct metadump_struct metadump;
+ int ret;
+ int err = 0;
+
+ root = open_ctree(input, 0, 0);
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ return -EIO;
+ }
+
+ BUG_ON(root->nodesize != root->leafsize);
+
+ ret = metadump_init(&metadump, root, out, num_threads,
+ compress_level, sanitize);
+ if (ret) {
+ fprintf(stderr, "Error initing metadump %d\n", ret);
+ close_ctree(root);
+ return ret;
+ }
+
+ ret = add_extent(BTRFS_SUPER_INFO_OFFSET, BTRFS_SUPER_INFO_SIZE,
+ &metadump, 0);
+ if (ret) {
+ fprintf(stderr, "Error adding metadata %d\n", ret);
+ err = ret;
+ goto out;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ fprintf(stderr, "Out of memory allocing path\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (walk_trees) {
+ ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
+ &metadump, 1);
+ if (ret) {
+ err = ret;
+ goto out;
+ }
+
+ ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
+ &metadump, 1);
+ if (ret) {
+ err = ret;
+ goto out;
+ }
+ } else {
+ ret = copy_from_extent_tree(&metadump, path);
+ if (ret) {
+ err = ret;
+ goto out;
+ }
+ }
+
+ ret = copy_log_trees(root, &metadump, path);
+ if (ret) {
+ err = ret;
+ goto out;
+ }
+
+ ret = copy_space_cache(root, &metadump, path);
+out:
+ ret = flush_pending(&metadump, 1);
+ if (ret) {
+ if (!err)
+ err = ret;
+ fprintf(stderr, "Error flushing pending %d\n", ret);
+ }
+
+ metadump_destroy(&metadump, num_threads);
+
+ btrfs_free_path(path);
+ ret = close_ctree(root);
+ return err ? err : ret;
+}
+
+static void update_super_old(u8 *buffer)
+{
+ struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
+ struct btrfs_chunk *chunk;
+ struct btrfs_disk_key *key;
+ u32 sectorsize = btrfs_super_sectorsize(super);
+ u64 flags = btrfs_super_flags(super);
+
+ flags |= BTRFS_SUPER_FLAG_METADUMP;
+ btrfs_set_super_flags(super, flags);
+
+ key = (struct btrfs_disk_key *)(super->sys_chunk_array);
+ chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
+ sizeof(struct btrfs_disk_key));
+
+ btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
+ btrfs_set_disk_key_offset(key, 0);
+
+ btrfs_set_stack_chunk_length(chunk, (u64)-1);
+ btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
+ btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
+ btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
+ btrfs_set_stack_chunk_io_align(chunk, sectorsize);
+ btrfs_set_stack_chunk_io_width(chunk, sectorsize);
+ btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
+ btrfs_set_stack_chunk_num_stripes(chunk, 1);
+ btrfs_set_stack_chunk_sub_stripes(chunk, 0);
+ chunk->stripe.devid = super->dev_item.devid;
+ btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
+ memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
+ btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
+ csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
+}
+
+static int update_super(struct mdrestore_struct *mdres, u8 *buffer)
+{
+ struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
+ struct btrfs_chunk *chunk;
+ struct btrfs_disk_key *disk_key;
+ struct btrfs_key key;
+ u64 flags = btrfs_super_flags(super);
+ u32 new_array_size = 0;
+ u32 array_size;
+ u32 cur = 0;
+ u8 *ptr, *write_ptr;
+ int old_num_stripes;
+
+ write_ptr = ptr = super->sys_chunk_array;
+ array_size = btrfs_super_sys_array_size(super);
+
+ while (cur < array_size) {
+ disk_key = (struct btrfs_disk_key *)ptr;
+ btrfs_disk_key_to_cpu(&key, disk_key);
+
+ new_array_size += sizeof(*disk_key);
+ memmove(write_ptr, ptr, sizeof(*disk_key));
+
+ write_ptr += sizeof(*disk_key);
+ ptr += sizeof(*disk_key);
+ cur += sizeof(*disk_key);
+
+ if (key.type == BTRFS_CHUNK_ITEM_KEY) {
+ u64 physical, size = 0;
+
+ chunk = (struct btrfs_chunk *)ptr;
+ old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
+ chunk = (struct btrfs_chunk *)write_ptr;
+
+ memmove(write_ptr, ptr, sizeof(*chunk));
+ btrfs_set_stack_chunk_num_stripes(chunk, 1);
+ btrfs_set_stack_chunk_sub_stripes(chunk, 0);
+ btrfs_set_stack_chunk_type(chunk,
+ BTRFS_BLOCK_GROUP_SYSTEM);
+ chunk->stripe.devid = super->dev_item.devid;
+ physical = logical_to_physical(mdres, key.offset,
+ &size);
+ if (size != (u64)-1)
+ btrfs_set_stack_stripe_offset(&chunk->stripe,
+ physical);
+ memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
+ BTRFS_UUID_SIZE);
+ new_array_size += sizeof(*chunk);
+ } else {
+ fprintf(stderr, "Bogus key in the sys chunk array "
+ "%d\n", key.type);
+ return -EIO;
+ }
+ write_ptr += sizeof(*chunk);
+ ptr += btrfs_chunk_item_size(old_num_stripes);
+ cur += btrfs_chunk_item_size(old_num_stripes);
+ }
+
+ if (mdres->clear_space_cache)
+ btrfs_set_super_cache_generation(super, 0);
+
+ flags |= BTRFS_SUPER_FLAG_METADUMP_V2;
+ btrfs_set_super_flags(super, flags);
+ btrfs_set_super_sys_array_size(super, new_array_size);
+ csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
+
+ return 0;
+}
+
+static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
+{
+ struct extent_buffer *eb;
+
+ eb = calloc(1, sizeof(struct extent_buffer) + size);
+ if (!eb)
+ return NULL;
+
+ eb->start = bytenr;
+ eb->len = size;
+ return eb;
+}
+
+static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
+{
+ struct btrfs_item *item;
+ u32 nritems;
+ u32 old_size;
+ u32 old_data_start;
+ u32 size_diff;
+ u32 data_end;
+ int i;
+
+ old_size = btrfs_item_size_nr(eb, slot);
+ if (old_size == new_size)
+ return;
+
+ nritems = btrfs_header_nritems(eb);
+ data_end = btrfs_item_offset_nr(eb, nritems - 1);
+
+ old_data_start = btrfs_item_offset_nr(eb, slot);
+ size_diff = old_size - new_size;
+
+ for (i = slot; i < nritems; i++) {
+ u32 ioff;
+ item = btrfs_item_nr(i);
+ ioff = btrfs_item_offset(eb, item);
+ btrfs_set_item_offset(eb, item, ioff + size_diff);
+ }
+
+ memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
+ btrfs_leaf_data(eb) + data_end,
+ old_data_start + new_size - data_end);
+ item = btrfs_item_nr(slot);
+ btrfs_set_item_size(eb, item, new_size);
+}
+
+static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
+ struct async_work *async, u8 *buffer,
+ size_t size)
+{
+ struct extent_buffer *eb;
+ size_t size_left = size;
+ u64 bytenr = async->start;
+ int i;
+
+ if (size_left % mdres->leafsize)
+ return 0;
+
+ eb = alloc_dummy_eb(bytenr, mdres->leafsize);
+ if (!eb)
+ return -ENOMEM;
+
+ while (size_left) {
+ eb->start = bytenr;
+ memcpy(eb->data, buffer, mdres->leafsize);
+
+ if (btrfs_header_bytenr(eb) != bytenr)
+ break;
+ if (memcmp(mdres->fsid,
+ eb->data + offsetof(struct btrfs_header, fsid),
+ BTRFS_FSID_SIZE))
+ break;
+
+ if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
+ goto next;
+
+ if (btrfs_header_level(eb) != 0)
+ goto next;
+
+ for (i = 0; i < btrfs_header_nritems(eb); i++) {
+ struct btrfs_chunk chunk;
+ struct btrfs_key key;
+ u64 type, physical, size = (u64)-1;
+
+ btrfs_item_key_to_cpu(eb, &key, i);
+ if (key.type != BTRFS_CHUNK_ITEM_KEY)
+ continue;
+ truncate_item(eb, i, sizeof(chunk));
+ read_extent_buffer(eb, &chunk,
+ btrfs_item_ptr_offset(eb, i),
+ sizeof(chunk));
+
+ size = 0;
+ physical = logical_to_physical(mdres, key.offset,
+ &size);
+
+ /* Zero out the RAID profile */
+ type = btrfs_stack_chunk_type(&chunk);
+ type &= (BTRFS_BLOCK_GROUP_DATA |
+ BTRFS_BLOCK_GROUP_SYSTEM |
+ BTRFS_BLOCK_GROUP_METADATA |
+ BTRFS_BLOCK_GROUP_DUP);
+ btrfs_set_stack_chunk_type(&chunk, type);
+
+ btrfs_set_stack_chunk_num_stripes(&chunk, 1);
+ btrfs_set_stack_chunk_sub_stripes(&chunk, 0);
+ btrfs_set_stack_stripe_devid(&chunk.stripe, mdres->devid);
+ if (size != (u64)-1)
+ btrfs_set_stack_stripe_offset(&chunk.stripe,
+ physical);
+ memcpy(chunk.stripe.dev_uuid, mdres->uuid,
+ BTRFS_UUID_SIZE);
+ write_extent_buffer(eb, &chunk,
+ btrfs_item_ptr_offset(eb, i),
+ sizeof(chunk));
+ }
+ memcpy(buffer, eb->data, eb->len);
+ csum_block(buffer, eb->len);
+next:
+ size_left -= mdres->leafsize;
+ buffer += mdres->leafsize;
+ bytenr += mdres->leafsize;
+ }
+
+ free(eb);
+ return 0;
+}
+
+static void write_backup_supers(int fd, u8 *buf)
+{
+ struct btrfs_super_block *super = (struct btrfs_super_block *)buf;
+ struct stat st;
+ u64 size;
+ u64 bytenr;
+ int i;
+ int ret;
+
+ if (fstat(fd, &st)) {
+ fprintf(stderr, "Couldn't stat restore point, won't be able "
+ "to write backup supers: %d\n", errno);
+ return;
+ }
+
+ size = btrfs_device_size(fd, &st);
+
+ for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ if (bytenr + BTRFS_SUPER_INFO_SIZE > size)
+ break;
+ btrfs_set_super_bytenr(super, bytenr);
+ csum_block(buf, BTRFS_SUPER_INFO_SIZE);
+ ret = pwrite64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr);
+ if (ret < BTRFS_SUPER_INFO_SIZE) {
+ if (ret < 0)
+ fprintf(stderr, "Problem writing out backup "
+ "super block %d, err %d\n", i, errno);
+ else
+ fprintf(stderr, "Short write writing out "
+ "backup super block\n");
+ break;
+ }
+ }
+}
+
+static void *restore_worker(void *data)
+{
+ struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
+ struct async_work *async;
+ size_t size;
+ u8 *buffer;
+ u8 *outbuf;
+ int outfd;
+ int ret;
+ int compress_size = MAX_PENDING_SIZE * 4;
+
+ outfd = fileno(mdres->out);
+ buffer = malloc(compress_size);
+ if (!buffer) {
+ fprintf(stderr, "Error allocing buffer\n");
+ pthread_mutex_lock(&mdres->mutex);
+ if (!mdres->error)
+ mdres->error = -ENOMEM;
+ pthread_mutex_unlock(&mdres->mutex);
+ pthread_exit(NULL);
+ }
+
+ while (1) {
+ u64 bytenr;
+ off_t offset = 0;
+ int err = 0;
+
+ pthread_mutex_lock(&mdres->mutex);
+ while (!mdres->leafsize || list_empty(&mdres->list)) {
+ if (mdres->done) {
+ pthread_mutex_unlock(&mdres->mutex);
+ goto out;
+ }
+ pthread_cond_wait(&mdres->cond, &mdres->mutex);
+ }
+ async = list_entry(mdres->list.next, struct async_work, list);
+ list_del_init(&async->list);
+ pthread_mutex_unlock(&mdres->mutex);
+
+ if (mdres->compress_method == COMPRESS_ZLIB) {
+ size = compress_size;
+ ret = uncompress(buffer, (unsigned long *)&size,
+ async->buffer, async->bufsize);
+ if (ret != Z_OK) {
+ fprintf(stderr, "Error decompressing %d\n",
+ ret);
+ err = -EIO;
+ }
+ outbuf = buffer;
+ } else {
+ outbuf = async->buffer;
+ size = async->bufsize;
+ }
+
+ if (!mdres->multi_devices) {
+ if (async->start == BTRFS_SUPER_INFO_OFFSET) {
+ if (mdres->old_restore) {
+ update_super_old(outbuf);
+ } else {
+ ret = update_super(mdres, outbuf);
+ if (ret)
+ err = ret;
+ }
+ } else if (!mdres->old_restore) {
+ ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
+ if (ret)
+ err = ret;
+ }
+ }
+
+ if (!mdres->fixup_offset) {
+ while (size) {
+ u64 chunk_size = size;
+ if (!mdres->multi_devices && !mdres->old_restore)
+ bytenr = logical_to_physical(mdres,
+ async->start + offset,
+ &chunk_size);
+ else
+ bytenr = async->start + offset;
+
+ ret = pwrite64(outfd, outbuf+offset, chunk_size,
+ bytenr);
+ if (ret != chunk_size) {
+ if (ret < 0) {
+ fprintf(stderr, "Error writing to "
+ "device %d\n", errno);
+ err = errno;
+ break;
+ } else {
+ fprintf(stderr, "Short write\n");
+ err = -EIO;
+ break;
+ }
+ }
+ size -= chunk_size;
+ offset += chunk_size;
+ }
+ } else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
+ ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
+ if (ret) {
+ printk("Error write data\n");
+ exit(1);
+ }
+ }
+
+
+ /* backup super blocks are already there at fixup_offset stage */
+ if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
+ write_backup_supers(outfd, outbuf);
+
+ pthread_mutex_lock(&mdres->mutex);
+ if (err && !mdres->error)
+ mdres->error = err;
+ mdres->num_items--;
+ pthread_mutex_unlock(&mdres->mutex);
+
+ free(async->buffer);
+ free(async);
+ }
+out:
+ free(buffer);
+ pthread_exit(NULL);
+}
+
+static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads)
+{
+ struct rb_node *n;
+ int i;
+
+ while ((n = rb_first(&mdres->chunk_tree))) {
+ struct fs_chunk *entry;
+
+ entry = rb_entry(n, struct fs_chunk, l);
+ rb_erase(n, &mdres->chunk_tree);
+ rb_erase(&entry->p, &mdres->physical_tree);
+ free(entry);
+ }
+ pthread_mutex_lock(&mdres->mutex);
+ mdres->done = 1;
+ pthread_cond_broadcast(&mdres->cond);
+ pthread_mutex_unlock(&mdres->mutex);
+
+ for (i = 0; i < num_threads; i++)
+ pthread_join(mdres->threads[i], NULL);
+
+ pthread_cond_destroy(&mdres->cond);
+ pthread_mutex_destroy(&mdres->mutex);
+ free(mdres->threads);
+}
+
+static int mdrestore_init(struct mdrestore_struct *mdres,
+ FILE *in, FILE *out, int old_restore,
+ int num_threads, int fixup_offset,
+ struct btrfs_fs_info *info, int multi_devices)
+{
+ int i, ret = 0;
+
+ memset(mdres, 0, sizeof(*mdres));
+ pthread_cond_init(&mdres->cond, NULL);
+ pthread_mutex_init(&mdres->mutex, NULL);
+ INIT_LIST_HEAD(&mdres->list);
+ INIT_LIST_HEAD(&mdres->overlapping_chunks);
+ mdres->in = in;
+ mdres->out = out;
+ mdres->old_restore = old_restore;
+ mdres->chunk_tree.rb_node = NULL;
+ mdres->fixup_offset = fixup_offset;
+ mdres->info = info;
+ mdres->multi_devices = multi_devices;
+ mdres->clear_space_cache = 0;
+ mdres->last_physical_offset = 0;
+ mdres->alloced_chunks = 0;
+
+ if (!num_threads)
+ return 0;
+
+ mdres->num_threads = num_threads;
+ mdres->threads = calloc(num_threads, sizeof(pthread_t));
+ if (!mdres->threads)
+ return -ENOMEM;
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_create(mdres->threads + i, NULL, restore_worker,
+ mdres);
+ if (ret)
+ break;
+ }
+ if (ret)
+ mdrestore_destroy(mdres, i + 1);
+ return ret;
+}
+
+static int fill_mdres_info(struct mdrestore_struct *mdres,
+ struct async_work *async)
+{
+ struct btrfs_super_block *super;
+ u8 *buffer = NULL;
+ u8 *outbuf;
+ int ret;
+
+ /* We've already been initialized */
+ if (mdres->leafsize)
+ return 0;
+
+ if (mdres->compress_method == COMPRESS_ZLIB) {
+ size_t size = MAX_PENDING_SIZE * 2;
+
+ buffer = malloc(MAX_PENDING_SIZE * 2);
+ if (!buffer)
+ return -ENOMEM;
+ ret = uncompress(buffer, (unsigned long *)&size,
+ async->buffer, async->bufsize);
+ if (ret != Z_OK) {
+ fprintf(stderr, "Error decompressing %d\n", ret);
+ free(buffer);
+ return -EIO;
+ }
+ outbuf = buffer;
+ } else {
+ outbuf = async->buffer;
+ }
+
+ super = (struct btrfs_super_block *)outbuf;
+ mdres->leafsize = btrfs_super_leafsize(super);
+ memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
+ memcpy(mdres->uuid, super->dev_item.uuid,
+ BTRFS_UUID_SIZE);
+ mdres->devid = le64_to_cpu(super->dev_item.devid);
+ free(buffer);
+ return 0;
+}
+
+static int add_cluster(struct meta_cluster *cluster,
+ struct mdrestore_struct *mdres, u64 *next)
+{
+ struct meta_cluster_item *item;
+ struct meta_cluster_header *header = &cluster->header;
+ struct async_work *async;
+ u64 bytenr;
+ u32 i, nritems;
+ int ret;
+
+ mdres->compress_method = header->compress;
+
+ bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
+ nritems = le32_to_cpu(header->nritems);
+ for (i = 0; i < nritems; i++) {
+ item = &cluster->items[i];
+ async = calloc(1, sizeof(*async));
+ if (!async) {
+ fprintf(stderr, "Error allocating async\n");
+ return -ENOMEM;
+ }
+ async->start = le64_to_cpu(item->bytenr);
+ async->bufsize = le32_to_cpu(item->size);
+ async->buffer = malloc(async->bufsize);
+ if (!async->buffer) {
+ fprintf(stderr, "Error allocing async buffer\n");
+ free(async);
+ return -ENOMEM;
+ }
+ ret = fread(async->buffer, async->bufsize, 1, mdres->in);
+ if (ret != 1) {
+ fprintf(stderr, "Error reading buffer %d\n", errno);
+ free(async->buffer);
+ free(async);
+ return -EIO;
+ }
+ bytenr += async->bufsize;
+
+ pthread_mutex_lock(&mdres->mutex);
+ if (async->start == BTRFS_SUPER_INFO_OFFSET) {
+ ret = fill_mdres_info(mdres, async);
+ if (ret) {
+ fprintf(stderr, "Error setting up restore\n");
+ pthread_mutex_unlock(&mdres->mutex);
+ free(async->buffer);
+ free(async);
+ return ret;
+ }
+ }
+ list_add_tail(&async->list, &mdres->list);
+ mdres->num_items++;
+ pthread_cond_signal(&mdres->cond);
+ pthread_mutex_unlock(&mdres->mutex);
+ }
+ if (bytenr & BLOCK_MASK) {
+ char buffer[BLOCK_MASK];
+ size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
+
+ bytenr += size;
+ ret = fread(buffer, size, 1, mdres->in);
+ if (ret != 1) {
+ fprintf(stderr, "Error reading in buffer %d\n", errno);
+ return -EIO;
+ }
+ }
+ *next = bytenr;
+ return 0;
+}
+
+static int wait_for_worker(struct mdrestore_struct *mdres)
+{
+ int ret = 0;
+
+ pthread_mutex_lock(&mdres->mutex);
+ ret = mdres->error;
+ while (!ret && mdres->num_items > 0) {
+ struct timespec ts = {
+ .tv_sec = 0,
+ .tv_nsec = 10000000,
+ };
+ pthread_mutex_unlock(&mdres->mutex);
+ nanosleep(&ts, NULL);
+ pthread_mutex_lock(&mdres->mutex);
+ ret = mdres->error;
+ }
+ pthread_mutex_unlock(&mdres->mutex);
+ return ret;
+}
+
+static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer,
+ u64 bytenr, u64 item_bytenr, u32 bufsize,
+ u64 cluster_bytenr)
+{
+ struct extent_buffer *eb;
+ int ret = 0;
+ int i;
+
+ eb = alloc_dummy_eb(bytenr, mdres->leafsize);
+ if (!eb) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ while (item_bytenr != bytenr) {
+ buffer += mdres->leafsize;
+ item_bytenr += mdres->leafsize;
+ }
+
+ memcpy(eb->data, buffer, mdres->leafsize);
+ if (btrfs_header_bytenr(eb) != bytenr) {
+ fprintf(stderr, "Eb bytenr doesn't match found bytenr\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid),
+ BTRFS_FSID_SIZE)) {
+ fprintf(stderr, "Fsid doesn't match\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) {
+ fprintf(stderr, "Does not belong to the chunk tree\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ for (i = 0; i < btrfs_header_nritems(eb); i++) {
+ struct btrfs_chunk chunk;
+ struct fs_chunk *fs_chunk;
+ struct btrfs_key key;
+
+ if (btrfs_header_level(eb)) {
+ u64 blockptr = btrfs_node_blockptr(eb, i);
+
+ ret = search_for_chunk_blocks(mdres, blockptr,
+ cluster_bytenr);
+ if (ret)
+ break;
+ continue;
+ }
+
+ /* Yay a leaf! We loves leafs! */
+ btrfs_item_key_to_cpu(eb, &key, i);
+ if (key.type != BTRFS_CHUNK_ITEM_KEY)
+ continue;
+
+ fs_chunk = malloc(sizeof(struct fs_chunk));
+ if (!fs_chunk) {
+ fprintf(stderr, "Erorr allocating chunk\n");
+ ret = -ENOMEM;
+ break;
+ }
+ memset(fs_chunk, 0, sizeof(*fs_chunk));
+ read_extent_buffer(eb, &chunk, btrfs_item_ptr_offset(eb, i),
+ sizeof(chunk));
+
+ fs_chunk->logical = key.offset;
+ fs_chunk->physical = btrfs_stack_stripe_offset(&chunk.stripe);
+ fs_chunk->bytes = btrfs_stack_chunk_length(&chunk);
+ INIT_LIST_HEAD(&fs_chunk->list);
+ if (tree_search(&mdres->physical_tree, &fs_chunk->p,
+ physical_cmp, 1) != NULL)
+ list_add(&fs_chunk->list, &mdres->overlapping_chunks);
+ else
+ tree_insert(&mdres->physical_tree, &fs_chunk->p,
+ physical_cmp);
+ if (fs_chunk->physical + fs_chunk->bytes >
+ mdres->last_physical_offset)
+ mdres->last_physical_offset = fs_chunk->physical +
+ fs_chunk->bytes;
+ mdres->alloced_chunks += fs_chunk->bytes;
+ tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp);
+ }
+out:
+ free(eb);
+ return ret;
+}
+
+/* If you have to ask you aren't worthy */
+static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
+ u64 search, u64 cluster_bytenr)
+{
+ struct meta_cluster *cluster;
+ struct meta_cluster_header *header;
+ struct meta_cluster_item *item;
+ u64 current_cluster = cluster_bytenr, bytenr;
+ u64 item_bytenr;
+ u32 bufsize, nritems, i;
+ u32 max_size = MAX_PENDING_SIZE * 2;
+ u8 *buffer, *tmp = NULL;
+ int ret = 0;
+
+ cluster = malloc(BLOCK_SIZE);
+ if (!cluster) {
+ fprintf(stderr, "Error allocating cluster\n");
+ return -ENOMEM;
+ }
+
+ buffer = malloc(max_size);
+ if (!buffer) {
+ fprintf(stderr, "Error allocing buffer\n");
+ free(cluster);
+ return -ENOMEM;
+ }
+
+ if (mdres->compress_method == COMPRESS_ZLIB) {
+ tmp = malloc(max_size);
+ if (!tmp) {
+ fprintf(stderr, "Error allocing tmp buffer\n");
+ free(cluster);
+ free(buffer);
+ return -ENOMEM;
+ }
+ }
+
+ bytenr = current_cluster;
+ while (1) {
+ if (fseek(mdres->in, current_cluster, SEEK_SET)) {
+ fprintf(stderr, "Error seeking: %d\n", errno);
+ ret = -EIO;
+ break;
+ }
+
+ ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
+ if (ret == 0) {
+ if (cluster_bytenr != 0) {
+ cluster_bytenr = 0;
+ current_cluster = 0;
+ bytenr = 0;
+ continue;
+ }
+ printf("ok this is where we screwed up?\n");
+ ret = -EIO;
+ break;
+ } else if (ret < 0) {
+ fprintf(stderr, "Error reading image\n");
+ break;
+ }
+ ret = 0;
+
+ header = &cluster->header;
+ if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
+ le64_to_cpu(header->bytenr) != current_cluster) {
+ fprintf(stderr, "bad header in metadump image\n");
+ ret = -EIO;
+ break;
+ }
+
+ bytenr += BLOCK_SIZE;
+ nritems = le32_to_cpu(header->nritems);
+ for (i = 0; i < nritems; i++) {
+ size_t size;
+
+ item = &cluster->items[i];
+ bufsize = le32_to_cpu(item->size);
+ item_bytenr = le64_to_cpu(item->bytenr);
+
+ if (bufsize > max_size) {
+ fprintf(stderr, "item %u size %u too big\n",
+ i, bufsize);
+ ret = -EIO;
+ break;
+ }
+
+ if (mdres->compress_method == COMPRESS_ZLIB) {
+ ret = fread(tmp, bufsize, 1, mdres->in);
+ if (ret != 1) {
+ fprintf(stderr, "Error reading: %d\n",
+ errno);
+ ret = -EIO;
+ break;
+ }
+
+ size = max_size;
+ ret = uncompress(buffer,
+ (unsigned long *)&size, tmp,
+ bufsize);
+ if (ret != Z_OK) {
+ fprintf(stderr, "Error decompressing "
+ "%d\n", ret);
+ ret = -EIO;
+ break;
+ }
+ } else {
+ ret = fread(buffer, bufsize, 1, mdres->in);
+ if (ret != 1) {
+ fprintf(stderr, "Error reading: %d\n",
+ errno);
+ ret = -EIO;
+ break;
+ }
+ size = bufsize;
+ }
+ ret = 0;
+
+ if (item_bytenr <= search &&
+ item_bytenr + size > search) {
+ ret = read_chunk_block(mdres, buffer, search,
+ item_bytenr, size,
+ current_cluster);
+ if (!ret)
+ ret = 1;
+ break;
+ }
+ bytenr += bufsize;
+ }
+ if (ret) {
+ if (ret > 0)
+ ret = 0;
+ break;
+ }
+ if (bytenr & BLOCK_MASK)
+ bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK);
+ current_cluster = bytenr;
+ }
+
+ free(tmp);
+ free(buffer);
+ free(cluster);
+ return ret;
+}
+
+static int build_chunk_tree(struct mdrestore_struct *mdres,
+ struct meta_cluster *cluster)
+{
+ struct btrfs_super_block *super;
+ struct meta_cluster_header *header;
+ struct meta_cluster_item *item = NULL;
+ u64 chunk_root_bytenr = 0;
+ u32 i, nritems;
+ u64 bytenr = 0;
+ u8 *buffer;
+ int ret;
+
+ /* We can't seek with stdin so don't bother doing this */
+ if (mdres->in == stdin)
+ return 0;
+
+ ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
+ if (ret <= 0) {
+ fprintf(stderr, "Error reading in cluster: %d\n", errno);
+ return -EIO;
+ }
+ ret = 0;
+
+ header = &cluster->header;
+ if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
+ le64_to_cpu(header->bytenr) != 0) {
+ fprintf(stderr, "bad header in metadump image\n");
+ return -EIO;
+ }
+
+ bytenr += BLOCK_SIZE;
+ mdres->compress_method = header->compress;
+ nritems = le32_to_cpu(header->nritems);
+ for (i = 0; i < nritems; i++) {
+ item = &cluster->items[i];
+
+ if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET)
+ break;
+ bytenr += le32_to_cpu(item->size);
+ if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) {
+ fprintf(stderr, "Error seeking: %d\n", errno);
+ return -EIO;
+ }
+ }
+
+ if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) {
+ fprintf(stderr, "Huh, didn't find the super?\n");
+ return -EINVAL;
+ }
+
+ buffer = malloc(le32_to_cpu(item->size));
+ if (!buffer) {
+ fprintf(stderr, "Error allocing buffer\n");
+ return -ENOMEM;
+ }
+
+ ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in);
+ if (ret != 1) {
+ fprintf(stderr, "Error reading buffer: %d\n", errno);
+ free(buffer);
+ return -EIO;
+ }
+
+ if (mdres->compress_method == COMPRESS_ZLIB) {
+ size_t size = MAX_PENDING_SIZE * 2;
+ u8 *tmp;
+
+ tmp = malloc(MAX_PENDING_SIZE * 2);
+ if (!tmp) {
+ free(buffer);
+ return -ENOMEM;
+ }
+ ret = uncompress(tmp, (unsigned long *)&size,
+ buffer, le32_to_cpu(item->size));
+ if (ret != Z_OK) {
+ fprintf(stderr, "Error decompressing %d\n", ret);
+ free(buffer);
+ free(tmp);
+ return -EIO;
+ }
+ free(buffer);
+ buffer = tmp;
+ }
+
+ pthread_mutex_lock(&mdres->mutex);
+ super = (struct btrfs_super_block *)buffer;
+ chunk_root_bytenr = btrfs_super_chunk_root(super);
+ mdres->leafsize = btrfs_super_leafsize(super);
+ memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
+ memcpy(mdres->uuid, super->dev_item.uuid,
+ BTRFS_UUID_SIZE);
+ mdres->devid = le64_to_cpu(super->dev_item.devid);
+ free(buffer);
+ pthread_mutex_unlock(&mdres->mutex);
+
+ return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
+}
+
+static int range_contains_super(u64 physical, u64 bytes)
+{
+ u64 super_bytenr;
+ int i;
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ super_bytenr = btrfs_sb_offset(i);
+ if (super_bytenr >= physical &&
+ super_bytenr < physical + bytes)
+ return 1;
+ }
+
+ return 0;
+}
+
+static void remap_overlapping_chunks(struct mdrestore_struct *mdres)
+{
+ struct fs_chunk *fs_chunk;
+
+ while (!list_empty(&mdres->overlapping_chunks)) {
+ fs_chunk = list_first_entry(&mdres->overlapping_chunks,
+ struct fs_chunk, list);
+ list_del_init(&fs_chunk->list);
+ if (range_contains_super(fs_chunk->physical,
+ fs_chunk->bytes)) {
+ fprintf(stderr, "Remapping a chunk that had a super "
+ "mirror inside of it, clearing space cache "
+ "so we don't end up with corruption\n");
+ mdres->clear_space_cache = 1;
+ }
+ fs_chunk->physical = mdres->last_physical_offset;
+ tree_insert(&mdres->physical_tree, &fs_chunk->p, physical_cmp);
+ mdres->last_physical_offset += fs_chunk->bytes;
+ }
+}
+
+static int fixup_devices(struct btrfs_fs_info *fs_info,
+ struct mdrestore_struct *mdres, off_t dev_size)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_dev_item *dev_item;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_root *root = fs_info->chunk_root;
+ struct btrfs_key key;
+ u64 devid, cur_devid;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ fprintf(stderr, "Error alloc'ing path\n");
+ return -ENOMEM;
+ }
+
+ trans = btrfs_start_transaction(fs_info->tree_root, 1);
+ if (IS_ERR(trans)) {
+ fprintf(stderr, "Error starting transaction %ld\n",
+ PTR_ERR(trans));
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
+ dev_item = &fs_info->super_copy->dev_item;
+
+ devid = btrfs_stack_device_id(dev_item);
+
+ btrfs_set_stack_device_total_bytes(dev_item, dev_size);
+ btrfs_set_stack_device_bytes_used(dev_item, mdres->alloced_chunks);
+
+ key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ key.type = BTRFS_DEV_ITEM_KEY;
+ key.offset = 0;
+
+again:
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret < 0) {
+ fprintf(stderr, "search failed %d\n", ret);
+ exit(1);
+ }
+
+ while (1) {
+ leaf = path->nodes[0];
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0) {
+ fprintf(stderr, "Error going to next leaf "
+ "%d\n", ret);
+ exit(1);
+ }
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ leaf = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.type > BTRFS_DEV_ITEM_KEY)
+ break;
+ if (key.type != BTRFS_DEV_ITEM_KEY) {
+ path->slots[0]++;
+ continue;
+ }
+
+ dev_item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_dev_item);
+ cur_devid = btrfs_device_id(leaf, dev_item);
+ if (devid != cur_devid) {
+ ret = btrfs_del_item(trans, root, path);
+ if (ret) {
+ fprintf(stderr, "Error deleting item %d\n",
+ ret);
+ exit(1);
+ }
+ btrfs_release_path(path);
+ goto again;
+ }
+
+ btrfs_set_device_total_bytes(leaf, dev_item, dev_size);
+ btrfs_set_device_bytes_used(leaf, dev_item,
+ mdres->alloced_chunks);
+ btrfs_mark_buffer_dirty(leaf);
+ path->slots[0]++;
+ }
+
+ btrfs_free_path(path);
+ ret = btrfs_commit_transaction(trans, fs_info->tree_root);
+ if (ret) {
+ fprintf(stderr, "Commit failed %d\n", ret);
+ return ret;
+ }
+ return 0;
+}
+
+static int restore_metadump(const char *input, FILE *out, int old_restore,
+ int num_threads, int fixup_offset,
+ const char *target, int multi_devices)
+{
+ struct meta_cluster *cluster = NULL;
+ struct meta_cluster_header *header;
+ struct mdrestore_struct mdrestore;
+ struct btrfs_fs_info *info = NULL;
+ u64 bytenr = 0;
+ FILE *in = NULL;
+ int ret = 0;
+
+ if (!strcmp(input, "-")) {
+ in = stdin;
+ } else {
+ in = fopen(input, "r");
+ if (!in) {
+ perror("unable to open metadump image");
+ return 1;
+ }
+ }
+
+ /* NOTE: open with write mode */
+ if (fixup_offset) {
+ BUG_ON(!target);
+ info = open_ctree_fs_info(target, 0, 0,
+ OPEN_CTREE_WRITES |
+ OPEN_CTREE_RESTORE |
+ OPEN_CTREE_PARTIAL);
+ if (!info) {
+ fprintf(stderr, "%s: open ctree failed\n", __func__);
+ ret = -EIO;
+ goto failed_open;
+ }
+ }
+
+ cluster = malloc(BLOCK_SIZE);
+ if (!cluster) {
+ fprintf(stderr, "Error allocating cluster\n");
+ ret = -ENOMEM;
+ goto failed_info;
+ }
+
+ ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
+ fixup_offset, info, multi_devices);
+ if (ret) {
+ fprintf(stderr, "Error initing mdrestore %d\n", ret);
+ goto failed_cluster;
+ }
+
+ if (!multi_devices && !old_restore) {
+ ret = build_chunk_tree(&mdrestore, cluster);
+ if (ret)
+ goto out;
+ if (!list_empty(&mdrestore.overlapping_chunks))
+ remap_overlapping_chunks(&mdrestore);
+ }
+
+ if (in != stdin && fseek(in, 0, SEEK_SET)) {
+ fprintf(stderr, "Error seeking %d\n", errno);
+ goto out;
+ }
+
+ while (!mdrestore.error) {
+ ret = fread(cluster, BLOCK_SIZE, 1, in);
+ if (!ret)
+ break;
+
+ header = &cluster->header;
+ if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
+ le64_to_cpu(header->bytenr) != bytenr) {
+ fprintf(stderr, "bad header in metadump image\n");
+ ret = -EIO;
+ break;
+ }
+ ret = add_cluster(cluster, &mdrestore, &bytenr);
+ if (ret) {
+ fprintf(stderr, "Error adding cluster\n");
+ break;
+ }
+ }
+ ret = wait_for_worker(&mdrestore);
+
+ if (!ret && !multi_devices && !old_restore) {
+ struct btrfs_root *root;
+ struct stat st;
+
+ root = open_ctree_fd(fileno(out), target, 0,
+ OPEN_CTREE_PARTIAL |
+ OPEN_CTREE_WRITES |
+ OPEN_CTREE_NO_DEVICES);
+ if (!root) {
+ fprintf(stderr, "unable to open %s\n", target);
+ ret = -EIO;
+ goto out;
+ }
+ info = root->fs_info;
+
+ if (stat(target, &st)) {
+ fprintf(stderr, "statting %s failed\n", target);
+ close_ctree(info->chunk_root);
+ return 1;
+ }
+
+ ret = fixup_devices(info, &mdrestore, st.st_size);
+ close_ctree(info->chunk_root);
+ if (ret)
+ goto out;
+ }
+out:
+ mdrestore_destroy(&mdrestore, num_threads);
+failed_cluster:
+ free(cluster);
+failed_info:
+ if (fixup_offset && info)
+ close_ctree(info->chunk_root);
+failed_open:
+ if (in != stdin)
+ fclose(in);
+ return ret;
+}
+
+static int update_disk_super_on_device(struct btrfs_fs_info *info,
+ const char *other_dev, u64 cur_devid)
+{
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct btrfs_path path;
+ struct btrfs_dev_item *dev_item;
+ struct btrfs_super_block *disk_super;
+ char dev_uuid[BTRFS_UUID_SIZE];
+ char fs_uuid[BTRFS_UUID_SIZE];
+ u64 devid, type, io_align, io_width;
+ u64 sector_size, total_bytes, bytes_used;
+ char buf[BTRFS_SUPER_INFO_SIZE];
+ int fp = -1;
+ int ret;
+
+ key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ key.type = BTRFS_DEV_ITEM_KEY;
+ key.offset = cur_devid;
+
+ btrfs_init_path(&path);
+ ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0);
+ if (ret) {
+ fprintf(stderr, "ERROR: search key failed\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ leaf = path.nodes[0];
+ dev_item = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_dev_item);
+
+ devid = btrfs_device_id(leaf, dev_item);
+ if (devid != cur_devid) {
+ printk("ERROR: devid %llu mismatch with %llu\n", devid, cur_devid);
+ ret = -EIO;
+ goto out;
+ }
+
+ type = btrfs_device_type(leaf, dev_item);
+ io_align = btrfs_device_io_align(leaf, dev_item);
+ io_width = btrfs_device_io_width(leaf, dev_item);
+ sector_size = btrfs_device_sector_size(leaf, dev_item);
+ total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+ bytes_used = btrfs_device_bytes_used(leaf, dev_item);
+ read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
+ read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
+
+ btrfs_release_path(&path);
+
+ printk("update disk super on %s devid=%llu\n", other_dev, devid);
+
+ /* update other devices' super block */
+ fp = open(other_dev, O_CREAT | O_RDWR, 0600);
+ if (fp < 0) {
+ fprintf(stderr, "ERROR: could not open %s\n", other_dev);
+ ret = -EIO;
+ goto out;
+ }
+
+ memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
+
+ disk_super = (struct btrfs_super_block *)buf;
+ dev_item = &disk_super->dev_item;
+
+ btrfs_set_stack_device_type(dev_item, type);
+ btrfs_set_stack_device_id(dev_item, devid);
+ btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
+ btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
+ btrfs_set_stack_device_io_align(dev_item, io_align);
+ btrfs_set_stack_device_io_width(dev_item, io_width);
+ btrfs_set_stack_device_sector_size(dev_item, sector_size);
+ memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
+ memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
+ csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
+
+ ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
+ if (ret != BTRFS_SUPER_INFO_SIZE) {
+ if (ret < 0)
+ fprintf(stderr, "ERROR: cannot write superblock: %s\n", strerror(ret));
+ else
+ fprintf(stderr, "ERROR: cannot write superblock\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ write_backup_supers(fp, (u8 *)buf);
+
+out:
+ if (fp != -1)
+ close(fp);
+ return ret;
+}
+
+static void print_usage(int ret)
+{
+ fprintf(stderr, "usage: btrfs-image [options] source target\n");
+ fprintf(stderr, "\t-r \trestore metadump image\n");
+ fprintf(stderr, "\t-c value\tcompression level (0 ~ 9)\n");
+ fprintf(stderr, "\t-t value\tnumber of threads (1 ~ 32)\n");
+ fprintf(stderr, "\t-o \tdon't mess with the chunk tree when restoring\n");
+ fprintf(stderr, "\t-s \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
+ fprintf(stderr, "\t-w \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
+ fprintf(stderr, "\t-m \trestore for multiple devices\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "\tIn the dump mode, source is the btrfs device and target is the output file (use '-' for stdout).\n");
+ fprintf(stderr, "\tIn the restore mode, source is the dumped image and target is the btrfs device/file.\n");
+ exit(ret);
+}
+
+int main(int argc, char *argv[])
+{
+ char *source;
+ char *target;
+ u64 num_threads = 0;
+ u64 compress_level = 0;
+ int create = 1;
+ int old_restore = 0;
+ int walk_trees = 0;
+ int multi_devices = 0;
+ int ret;
+ int sanitize = 0;
+ int dev_cnt = 0;
+ int usage_error = 0;
+ FILE *out;
+
+ while (1) {
+ static const struct option long_options[] = {
+ { "help", no_argument, NULL, GETOPT_VAL_HELP},
+ { NULL, 0, NULL, 0 }
+ };
+ int c = getopt_long(argc, argv, "rc:t:oswm", long_options, NULL);
+ if (c < 0)
+ break;
+ switch (c) {
+ case 'r':
+ create = 0;
+ break;
+ case 't':
+ num_threads = arg_strtou64(optarg);
+ if (num_threads > 32)
+ print_usage(1);
+ break;
+ case 'c':
+ compress_level = arg_strtou64(optarg);
+ if (compress_level > 9)
+ print_usage(1);
+ break;
+ case 'o':
+ old_restore = 1;
+ break;
+ case 's':
+ sanitize++;
+ break;
+ case 'w':
+ walk_trees = 1;
+ break;
+ case 'm':
+ create = 0;
+ multi_devices = 1;
+ break;
+ case GETOPT_VAL_HELP:
+ default:
+ print_usage(c != GETOPT_VAL_HELP);
+ }
+ }
+
+ argc = argc - optind;
+ set_argv0(argv);
+ if (check_argc_min(argc, 2))
+ print_usage(1);
+
+ dev_cnt = argc - 1;
+
+ if (create) {
+ if (old_restore) {
+ fprintf(stderr, "Usage error: create and restore cannot be used at the same time\n");
+ usage_error++;
+ }
+ } else {
+ if (walk_trees || sanitize || compress_level) {
+ fprintf(stderr, "Usage error: use -w, -s, -c options for restore makes no sense\n");
+ usage_error++;
+ }
+ if (multi_devices && dev_cnt < 2) {
+ fprintf(stderr, "Usage error: not enough devices specified for -m option\n");
+ usage_error++;
+ }
+ if (!multi_devices && dev_cnt != 1) {
+ fprintf(stderr, "Usage error: accepts only 1 device without -m option\n");
+ usage_error++;
+ }
+ }
+
+ if (usage_error)
+ print_usage(1);
+
+ source = argv[optind];
+ target = argv[optind + 1];
+
+ if (create && !strcmp(target, "-")) {
+ out = stdout;
+ } else {
+ out = fopen(target, "w+");
+ if (!out) {
+ perror("unable to create target file");
+ exit(1);
+ }
+ }
+
+ if (compress_level > 0 || create == 0) {
+ if (num_threads == 0) {
+ long tmp = sysconf(_SC_NPROCESSORS_ONLN);
+
+ if (tmp <= 0)
+ tmp = 1;
+ num_threads = tmp;
+ }
+ } else {
+ num_threads = 0;
+ }
+
+ if (create) {
+ ret = check_mounted(source);
+ if (ret < 0) {
+ fprintf(stderr, "Could not check mount status: %s\n",
+ strerror(-ret));
+ exit(1);
+ } else if (ret)
+ fprintf(stderr,
+ "WARNING: The device is mounted. Make sure the filesystem is quiescent.\n");
+
+ ret = create_metadump(source, out, num_threads,
+ compress_level, sanitize, walk_trees);
+ } else {
+ ret = restore_metadump(source, out, old_restore, num_threads,
+ 0, target, multi_devices);
+ }
+ if (ret) {
+ printk("%s failed (%s)\n", (create) ? "create" : "restore",
+ strerror(errno));
+ goto out;
+ }
+
+ /* extended support for multiple devices */
+ if (!create && multi_devices) {
+ struct btrfs_fs_info *info;
+ u64 total_devs;
+ int i;
+
+ info = open_ctree_fs_info(target, 0, 0,
+ OPEN_CTREE_PARTIAL |
+ OPEN_CTREE_RESTORE);
+ if (!info) {
+ fprintf(stderr, "unable to open %s error = %s\n",
+ target, strerror(errno));
+ return 1;
+ }
+
+ total_devs = btrfs_super_num_devices(info->super_copy);
+ if (total_devs != dev_cnt) {
+ printk("it needs %llu devices but has only %d\n",
+ total_devs, dev_cnt);
+ close_ctree(info->chunk_root);
+ goto out;
+ }
+
+ /* update super block on other disks */
+ for (i = 2; i <= dev_cnt; i++) {
+ ret = update_disk_super_on_device(info,
+ argv[optind + i], (u64)i);
+ if (ret) {
+ printk("update disk super failed devid=%d (error=%d)\n",
+ i, ret);
+ close_ctree(info->chunk_root);
+ exit(1);
+ }
+ }
+
+ close_ctree(info->chunk_root);
+
+ /* fix metadata block to map correct chunk */
+ ret = restore_metadump(source, out, 0, num_threads, 1,
+ target, 1);
+ if (ret) {
+ fprintf(stderr, "fix metadump failed (error=%d)\n",
+ ret);
+ exit(1);
+ }
+ }
+out:
+ if (out == stdout) {
+ fflush(out);
+ } else {
+ fclose(out);
+ if (ret && create) {
+ int unlink_ret;
+
+ unlink_ret = unlink(target);
+ if (unlink_ret)
+ fprintf(stderr,
+ "unlink output file failed : %s\n",
+ strerror(errno));
+ }
+ }
+
+ btrfs_close_all_devices();
+
+ return !!ret;
+}
diff --git a/btrfs-list.c b/btrfs-list.c
new file mode 100644
index 00000000..2da54bf7
--- /dev/null
+++ b/btrfs-list.c
@@ -0,0 +1,1914 @@
+/*
+ * Copyright (C) 2010 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <libgen.h>
+#include "ctree.h"
+#include "transaction.h"
+#include "utils.h"
+#include "ioctl.h"
+#include <uuid/uuid.h>
+#include "btrfs-list.h"
+#include "rbtree-utils.h"
+
+#define BTRFS_LIST_NFILTERS_INCREASE (2 * BTRFS_LIST_FILTER_MAX)
+#define BTRFS_LIST_NCOMPS_INCREASE (2 * BTRFS_LIST_COMP_MAX)
+
+/* we store all the roots we find in an rbtree so that we can
+ * search for them later.
+ */
+struct root_lookup {
+ struct rb_root root;
+};
+
+static struct {
+ char *name;
+ char *column_name;
+ int need_print;
+} btrfs_list_columns[] = {
+ {
+ .name = "ID",
+ .column_name = "ID",
+ .need_print = 0,
+ },
+ {
+ .name = "gen",
+ .column_name = "Gen",
+ .need_print = 0,
+ },
+ {
+ .name = "cgen",
+ .column_name = "CGen",
+ .need_print = 0,
+ },
+ {
+ .name = "parent",
+ .column_name = "Parent",
+ .need_print = 0,
+ },
+ {
+ .name = "top level",
+ .column_name = "Top Level",
+ .need_print = 0,
+ },
+ {
+ .name = "otime",
+ .column_name = "OTime",
+ .need_print = 0,
+ },
+ {
+ .name = "parent_uuid",
+ .column_name = "Parent UUID",
+ .need_print = 0,
+ },
+ {
+ .name = "received_uuid",
+ .column_name = "Received UUID",
+ .need_print = 0,
+ },
+ {
+ .name = "uuid",
+ .column_name = "UUID",
+ .need_print = 0,
+ },
+ {
+ .name = "path",
+ .column_name = "Path",
+ .need_print = 0,
+ },
+ {
+ .name = NULL,
+ .column_name = NULL,
+ .need_print = 0,
+ },
+};
+
+static btrfs_list_filter_func all_filter_funcs[];
+static btrfs_list_comp_func all_comp_funcs[];
+
+void btrfs_list_setup_print_column(enum btrfs_list_column_enum column)
+{
+ int i;
+
+ BUG_ON(column < 0 || column > BTRFS_LIST_ALL);
+
+ if (column < BTRFS_LIST_ALL) {
+ btrfs_list_columns[column].need_print = 1;
+ return;
+ }
+
+ for (i = 0; i < BTRFS_LIST_ALL; i++)
+ btrfs_list_columns[i].need_print = 1;
+}
+
+static void root_lookup_init(struct root_lookup *tree)
+{
+ tree->root.rb_node = NULL;
+}
+
+static int comp_entry_with_rootid(struct root_info *entry1,
+ struct root_info *entry2,
+ int is_descending)
+{
+ int ret;
+
+ if (entry1->root_id > entry2->root_id)
+ ret = 1;
+ else if (entry1->root_id < entry2->root_id)
+ ret = -1;
+ else
+ ret = 0;
+
+ return is_descending ? -ret : ret;
+}
+
+static int comp_entry_with_gen(struct root_info *entry1,
+ struct root_info *entry2,
+ int is_descending)
+{
+ int ret;
+
+ if (entry1->gen > entry2->gen)
+ ret = 1;
+ else if (entry1->gen < entry2->gen)
+ ret = -1;
+ else
+ ret = 0;
+
+ return is_descending ? -ret : ret;
+}
+
+static int comp_entry_with_ogen(struct root_info *entry1,
+ struct root_info *entry2,
+ int is_descending)
+{
+ int ret;
+
+ if (entry1->ogen > entry2->ogen)
+ ret = 1;
+ else if (entry1->ogen < entry2->ogen)
+ ret = -1;
+ else
+ ret = 0;
+
+ return is_descending ? -ret : ret;
+}
+
+static int comp_entry_with_path(struct root_info *entry1,
+ struct root_info *entry2,
+ int is_descending)
+{
+ int ret;
+
+ if (strcmp(entry1->full_path, entry2->full_path) > 0)
+ ret = 1;
+ else if (strcmp(entry1->full_path, entry2->full_path) < 0)
+ ret = -1;
+ else
+ ret = 0;
+
+ return is_descending ? -ret : ret;
+}
+
+static btrfs_list_comp_func all_comp_funcs[] = {
+ [BTRFS_LIST_COMP_ROOTID] = comp_entry_with_rootid,
+ [BTRFS_LIST_COMP_OGEN] = comp_entry_with_ogen,
+ [BTRFS_LIST_COMP_GEN] = comp_entry_with_gen,
+ [BTRFS_LIST_COMP_PATH] = comp_entry_with_path,
+};
+
+static char *all_sort_items[] = {
+ [BTRFS_LIST_COMP_ROOTID] = "rootid",
+ [BTRFS_LIST_COMP_OGEN] = "ogen",
+ [BTRFS_LIST_COMP_GEN] = "gen",
+ [BTRFS_LIST_COMP_PATH] = "path",
+ [BTRFS_LIST_COMP_MAX] = NULL,
+};
+
+static int btrfs_list_get_sort_item(char *sort_name)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_LIST_COMP_MAX; i++) {
+ if (strcmp(sort_name, all_sort_items[i]) == 0)
+ return i;
+ }
+ return -1;
+}
+
+struct btrfs_list_comparer_set *btrfs_list_alloc_comparer_set(void)
+{
+ struct btrfs_list_comparer_set *set;
+ int size;
+
+ size = sizeof(struct btrfs_list_comparer_set) +
+ BTRFS_LIST_NCOMPS_INCREASE * sizeof(struct btrfs_list_comparer);
+ set = calloc(1, size);
+ if (!set) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(1);
+ }
+
+ set->total = BTRFS_LIST_NCOMPS_INCREASE;
+
+ return set;
+}
+
+void btrfs_list_free_comparer_set(struct btrfs_list_comparer_set *comp_set)
+{
+ free(comp_set);
+}
+
+static int btrfs_list_setup_comparer(struct btrfs_list_comparer_set **comp_set,
+ enum btrfs_list_comp_enum comparer, int is_descending)
+{
+ struct btrfs_list_comparer_set *set = *comp_set;
+ int size;
+
+ BUG_ON(!set);
+ BUG_ON(comparer >= BTRFS_LIST_COMP_MAX);
+ BUG_ON(set->ncomps > set->total);
+
+ if (set->ncomps == set->total) {
+ void *tmp;
+
+ size = set->total + BTRFS_LIST_NCOMPS_INCREASE;
+ size = sizeof(*set) + size * sizeof(struct btrfs_list_comparer);
+ tmp = set;
+ set = realloc(set, size);
+ if (!set) {
+ fprintf(stderr, "memory allocation failed\n");
+ free(tmp);
+ exit(1);
+ }
+
+ memset(&set->comps[set->total], 0,
+ BTRFS_LIST_NCOMPS_INCREASE *
+ sizeof(struct btrfs_list_comparer));
+ set->total += BTRFS_LIST_NCOMPS_INCREASE;
+ *comp_set = set;
+ }
+
+ BUG_ON(set->comps[set->ncomps].comp_func);
+
+ set->comps[set->ncomps].comp_func = all_comp_funcs[comparer];
+ set->comps[set->ncomps].is_descending = is_descending;
+ set->ncomps++;
+ return 0;
+}
+
+static int sort_comp(struct root_info *entry1, struct root_info *entry2,
+ struct btrfs_list_comparer_set *set)
+{
+ int rootid_compared = 0;
+ int i, ret = 0;
+
+ if (!set || !set->ncomps)
+ goto comp_rootid;
+
+ for (i = 0; i < set->ncomps; i++) {
+ if (!set->comps[i].comp_func)
+ break;
+
+ ret = set->comps[i].comp_func(entry1, entry2,
+ set->comps[i].is_descending);
+ if (ret)
+ return ret;
+
+ if (set->comps[i].comp_func == comp_entry_with_rootid)
+ rootid_compared = 1;
+ }
+
+ if (!rootid_compared) {
+comp_rootid:
+ ret = comp_entry_with_rootid(entry1, entry2, 0);
+ }
+
+ return ret;
+}
+
+static int sort_tree_insert(struct root_lookup *sort_tree,
+ struct root_info *ins,
+ struct btrfs_list_comparer_set *comp_set)
+{
+ struct rb_node **p = &sort_tree->root.rb_node;
+ struct rb_node *parent = NULL;
+ struct root_info *curr;
+ int ret;
+
+ while (*p) {
+ parent = *p;
+ curr = rb_entry(parent, struct root_info, sort_node);
+
+ ret = sort_comp(ins, curr, comp_set);
+ if (ret < 0)
+ p = &(*p)->rb_left;
+ else if (ret > 0)
+ p = &(*p)->rb_right;
+ else
+ return -EEXIST;
+ }
+
+ rb_link_node(&ins->sort_node, parent, p);
+ rb_insert_color(&ins->sort_node, &sort_tree->root);
+ return 0;
+}
+
+/*
+ * insert a new root into the tree. returns the existing root entry
+ * if one is already there. Both root_id and ref_tree are used
+ * as the key
+ */
+static int root_tree_insert(struct root_lookup *root_tree,
+ struct root_info *ins)
+{
+ struct rb_node **p = &root_tree->root.rb_node;
+ struct rb_node * parent = NULL;
+ struct root_info *curr;
+ int ret;
+
+ while(*p) {
+ parent = *p;
+ curr = rb_entry(parent, struct root_info, rb_node);
+
+ ret = comp_entry_with_rootid(ins, curr, 0);
+ if (ret < 0)
+ p = &(*p)->rb_left;
+ else if (ret > 0)
+ p = &(*p)->rb_right;
+ else
+ return -EEXIST;
+ }
+
+ rb_link_node(&ins->rb_node, parent, p);
+ rb_insert_color(&ins->rb_node, &root_tree->root);
+ return 0;
+}
+
+/*
+ * find a given root id in the tree. We return the smallest one,
+ * rb_next can be used to move forward looking for more if required
+ */
+static struct root_info *root_tree_search(struct root_lookup *root_tree,
+ u64 root_id)
+{
+ struct rb_node *n = root_tree->root.rb_node;
+ struct root_info *entry;
+ struct root_info tmp;
+ int ret;
+
+ tmp.root_id = root_id;
+
+ while(n) {
+ entry = rb_entry(n, struct root_info, rb_node);
+
+ ret = comp_entry_with_rootid(&tmp, entry, 0);
+ if (ret < 0)
+ n = n->rb_left;
+ else if (ret > 0)
+ n = n->rb_right;
+ else
+ return entry;
+ }
+ return NULL;
+}
+
+static int update_root(struct root_lookup *root_lookup,
+ u64 root_id, u64 ref_tree, u64 root_offset, u64 flags,
+ u64 dir_id, char *name, int name_len, u64 ogen, u64 gen,
+ time_t ot, void *uuid, void *puuid, void *ruuid)
+{
+ struct root_info *ri;
+
+ ri = root_tree_search(root_lookup, root_id);
+ if (!ri || ri->root_id != root_id)
+ return -ENOENT;
+ if (name && name_len > 0) {
+ free(ri->name);
+
+ ri->name = malloc(name_len + 1);
+ if (!ri->name) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(1);
+ }
+ strncpy(ri->name, name, name_len);
+ ri->name[name_len] = 0;
+ }
+ if (ref_tree)
+ ri->ref_tree = ref_tree;
+ if (root_offset)
+ ri->root_offset = root_offset;
+ if (flags)
+ ri->flags = flags;
+ if (dir_id)
+ ri->dir_id = dir_id;
+ if (gen)
+ ri->gen = gen;
+ if (ogen)
+ ri->ogen = ogen;
+ if (!ri->ogen && root_offset)
+ ri->ogen = root_offset;
+ if (ot)
+ ri->otime = ot;
+ if (uuid)
+ memcpy(&ri->uuid, uuid, BTRFS_UUID_SIZE);
+ if (puuid)
+ memcpy(&ri->puuid, puuid, BTRFS_UUID_SIZE);
+ if (ruuid)
+ memcpy(&ri->ruuid, ruuid, BTRFS_UUID_SIZE);
+
+ return 0;
+}
+
+/*
+ * add_root - update the existed root, or allocate a new root and insert it
+ * into the lookup tree.
+ * root_id: object id of the root
+ * ref_tree: object id of the referring root.
+ * root_offset: offset value of the root'key
+ * dir_id: inode id of the directory in ref_tree where this root can be found.
+ * name: the name of root_id in that directory
+ * name_len: the length of name
+ * ogen: the original generation of the root
+ * gen: the current generation of the root
+ * ot: the original time(create time) of the root
+ * uuid: uuid of the root
+ * puuid: uuid of the root parent if any
+ * ruuid: uuid of the received subvol, if any
+ */
+static int add_root(struct root_lookup *root_lookup,
+ u64 root_id, u64 ref_tree, u64 root_offset, u64 flags,
+ u64 dir_id, char *name, int name_len, u64 ogen, u64 gen,
+ time_t ot, void *uuid, void *puuid, void *ruuid)
+{
+ struct root_info *ri;
+ int ret;
+
+ ret = update_root(root_lookup, root_id, ref_tree, root_offset, flags,
+ dir_id, name, name_len, ogen, gen, ot,
+ uuid, puuid, ruuid);
+ if (!ret)
+ return 0;
+
+ ri = calloc(1, sizeof(*ri));
+ if (!ri) {
+ printf("memory allocation failed\n");
+ exit(1);
+ }
+ ri->root_id = root_id;
+
+ if (name && name_len > 0) {
+ ri->name = malloc(name_len + 1);
+ if (!ri->name) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(1);
+ }
+ strncpy(ri->name, name, name_len);
+ ri->name[name_len] = 0;
+ }
+ if (ref_tree)
+ ri->ref_tree = ref_tree;
+ if (dir_id)
+ ri->dir_id = dir_id;
+ if (root_offset)
+ ri->root_offset = root_offset;
+ if (flags)
+ ri->flags = flags;
+ if (gen)
+ ri->gen = gen;
+ if (ogen)
+ ri->ogen = ogen;
+ if (!ri->ogen && root_offset)
+ ri->ogen = root_offset;
+ if (ot)
+ ri->otime = ot;
+
+ if (uuid)
+ memcpy(&ri->uuid, uuid, BTRFS_UUID_SIZE);
+
+ if (puuid)
+ memcpy(&ri->puuid, puuid, BTRFS_UUID_SIZE);
+
+ if (ruuid)
+ memcpy(&ri->ruuid, ruuid, BTRFS_UUID_SIZE);
+
+ ret = root_tree_insert(root_lookup, ri);
+ if (ret) {
+ printf("failed to insert tree %llu\n", (unsigned long long)root_id);
+ exit(1);
+ }
+ return 0;
+}
+
+static void __free_root_info(struct rb_node *node)
+{
+ struct root_info *ri;
+
+ ri = rb_entry(node, struct root_info, rb_node);
+ free(ri->name);
+ free(ri->path);
+ free(ri->full_path);
+ free(ri);
+}
+
+static inline void __free_all_subvolumn(struct root_lookup *root_tree)
+{
+ rb_free_nodes(&root_tree->root, __free_root_info);
+}
+
+/*
+ * for a given root_info, search through the root_lookup tree to construct
+ * the full path name to it.
+ *
+ * This can't be called until all the root_info->path fields are filled
+ * in by lookup_ino_path
+ */
+static int resolve_root(struct root_lookup *rl, struct root_info *ri,
+ u64 top_id)
+{
+ char *full_path = NULL;
+ int len = 0;
+ struct root_info *found;
+
+ /*
+ * we go backwards from the root_info object and add pathnames
+ * from parent directories as we go.
+ */
+ found = ri;
+ while (1) {
+ char *tmp;
+ u64 next;
+ int add_len;
+
+ /*
+ * ref_tree = 0 indicates the subvolumes
+ * has been deleted.
+ */
+ if (!found->ref_tree) {
+ free(full_path);
+ return -ENOENT;
+ }
+
+ add_len = strlen(found->path);
+
+ if (full_path) {
+ /* room for / and for null */
+ tmp = malloc(add_len + 2 + len);
+ if (!tmp) {
+ perror("malloc failed");
+ exit(1);
+ }
+ memcpy(tmp + add_len + 1, full_path, len);
+ tmp[add_len] = '/';
+ memcpy(tmp, found->path, add_len);
+ tmp [add_len + len + 1] = '\0';
+ free(full_path);
+ full_path = tmp;
+ len += add_len + 1;
+ } else {
+ full_path = strdup(found->path);
+ len = add_len;
+ }
+ if (!ri->top_id)
+ ri->top_id = found->ref_tree;
+
+ next = found->ref_tree;
+ if (next == top_id)
+ break;
+ /*
+ * if the ref_tree = BTRFS_FS_TREE_OBJECTID,
+ * we are at the top
+ */
+ if (next == BTRFS_FS_TREE_OBJECTID)
+ break;
+ /*
+ * if the ref_tree wasn't in our tree of roots, the
+ * subvolume was deleted.
+ */
+ found = root_tree_search(rl, next);
+ if (!found) {
+ free(full_path);
+ return -ENOENT;
+ }
+ }
+
+ ri->full_path = full_path;
+
+ return 0;
+}
+
+/*
+ * for a single root_info, ask the kernel to give us a path name
+ * inside it's ref_root for the dir_id where it lives.
+ *
+ * This fills in root_info->path with the path to the directory and and
+ * appends this root's name.
+ */
+static int lookup_ino_path(int fd, struct root_info *ri)
+{
+ struct btrfs_ioctl_ino_lookup_args args;
+ int ret;
+
+ if (ri->path)
+ return 0;
+
+ if (!ri->ref_tree)
+ return -ENOENT;
+
+ memset(&args, 0, sizeof(args));
+ args.treeid = ri->ref_tree;
+ args.objectid = ri->dir_id;
+
+ ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
+ if (ret < 0) {
+ if (errno == ENOENT) {
+ ri->ref_tree = 0;
+ return -ENOENT;
+ }
+ fprintf(stderr, "ERROR: Failed to lookup path for root %llu - %s\n",
+ (unsigned long long)ri->ref_tree,
+ strerror(errno));
+ return ret;
+ }
+
+ if (args.name[0]) {
+ /*
+ * we're in a subdirectory of ref_tree, the kernel ioctl
+ * puts a / in there for us
+ */
+ ri->path = malloc(strlen(ri->name) + strlen(args.name) + 1);
+ if (!ri->path) {
+ perror("malloc failed");
+ exit(1);
+ }
+ strcpy(ri->path, args.name);
+ strcat(ri->path, ri->name);
+ } else {
+ /* we're at the root of ref_tree */
+ ri->path = strdup(ri->name);
+ if (!ri->path) {
+ perror("strdup failed");
+ exit(1);
+ }
+ }
+ return 0;
+}
+
+/* finding the generation for a given path is a two step process.
+ * First we use the inode loookup routine to find out the root id
+ *
+ * Then we use the tree search ioctl to scan all the root items for a
+ * given root id and spit out the latest generation we can find
+ */
+static u64 find_root_gen(int fd)
+{
+ struct btrfs_ioctl_ino_lookup_args ino_args;
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ struct btrfs_ioctl_search_header sh;
+ unsigned long off = 0;
+ u64 max_found = 0;
+ int i;
+
+ memset(&ino_args, 0, sizeof(ino_args));
+ ino_args.objectid = BTRFS_FIRST_FREE_OBJECTID;
+
+ /* this ioctl fills in ino_args->treeid */
+ ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: Failed to lookup path for dirid %llu - %s\n",
+ (unsigned long long)BTRFS_FIRST_FREE_OBJECTID,
+ strerror(errno));
+ return 0;
+ }
+
+ memset(&args, 0, sizeof(args));
+
+ sk->tree_id = 1;
+
+ /*
+ * there may be more than one ROOT_ITEM key if there are
+ * snapshots pending deletion, we have to loop through
+ * them.
+ */
+ sk->min_objectid = ino_args.treeid;
+ sk->max_objectid = ino_args.treeid;
+ sk->max_type = BTRFS_ROOT_ITEM_KEY;
+ sk->min_type = BTRFS_ROOT_ITEM_KEY;
+ sk->max_offset = (u64)-1;
+ sk->max_transid = (u64)-1;
+ sk->nr_items = 4096;
+
+ while (1) {
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: can't perform the search - %s\n",
+ strerror(errno));
+ return 0;
+ }
+ /* the ioctl returns the number of item it found in nr_items */
+ if (sk->nr_items == 0)
+ break;
+
+ off = 0;
+ for (i = 0; i < sk->nr_items; i++) {
+ struct btrfs_root_item *item;
+
+ memcpy(&sh, args.buf + off, sizeof(sh));
+ off += sizeof(sh);
+ item = (struct btrfs_root_item *)(args.buf + off);
+ off += sh.len;
+
+ sk->min_objectid = sh.objectid;
+ sk->min_type = sh.type;
+ sk->min_offset = sh.offset;
+
+ if (sh.objectid > ino_args.treeid)
+ break;
+
+ if (sh.objectid == ino_args.treeid &&
+ sh.type == BTRFS_ROOT_ITEM_KEY) {
+ max_found = max(max_found,
+ btrfs_root_generation(item));
+ }
+ }
+ if (sk->min_offset < (u64)-1)
+ sk->min_offset++;
+ else
+ break;
+
+ if (sk->min_type != BTRFS_ROOT_ITEM_KEY)
+ break;
+ if (sk->min_objectid != ino_args.treeid)
+ break;
+ }
+ return max_found;
+}
+
+/* pass in a directory id and this will return
+ * the full path of the parent directory inside its
+ * subvolume root.
+ *
+ * It may return NULL if it is in the root, or an ERR_PTR if things
+ * go badly.
+ */
+static char *__ino_resolve(int fd, u64 dirid)
+{
+ struct btrfs_ioctl_ino_lookup_args args;
+ int ret;
+ char *full;
+
+ memset(&args, 0, sizeof(args));
+ args.objectid = dirid;
+
+ ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: Failed to lookup path for dirid %llu - %s\n",
+ (unsigned long long)dirid, strerror(errno));
+ return ERR_PTR(ret);
+ }
+
+ if (args.name[0]) {
+ /*
+ * we're in a subdirectory of ref_tree, the kernel ioctl
+ * puts a / in there for us
+ */
+ full = strdup(args.name);
+ if (!full) {
+ perror("malloc failed");
+ return ERR_PTR(-ENOMEM);
+ }
+ } else {
+ /* we're at the root of ref_tree */
+ full = NULL;
+ }
+ return full;
+}
+
+/*
+ * simple string builder, returning a new string with both
+ * dirid and name
+ */
+static char *build_name(char *dirid, char *name)
+{
+ char *full;
+ if (!dirid)
+ return strdup(name);
+
+ full = malloc(strlen(dirid) + strlen(name) + 1);
+ if (!full)
+ return NULL;
+ strcpy(full, dirid);
+ strcat(full, name);
+ return full;
+}
+
+/*
+ * given an inode number, this returns the full path name inside the subvolume
+ * to that file/directory. cache_dirid and cache_name are used to
+ * cache the results so we can avoid tree searches if a later call goes
+ * to the same directory or file name
+ */
+static char *ino_resolve(int fd, u64 ino, u64 *cache_dirid, char **cache_name)
+
+{
+ u64 dirid;
+ char *dirname;
+ char *name;
+ char *full;
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ struct btrfs_ioctl_search_header *sh;
+ unsigned long off = 0;
+ int namelen;
+
+ memset(&args, 0, sizeof(args));
+
+ sk->tree_id = 0;
+
+ /*
+ * step one, we search for the inode back ref. We just use the first
+ * one
+ */
+ sk->min_objectid = ino;
+ sk->max_objectid = ino;
+ sk->max_type = BTRFS_INODE_REF_KEY;
+ sk->max_offset = (u64)-1;
+ sk->min_type = BTRFS_INODE_REF_KEY;
+ sk->max_transid = (u64)-1;
+ sk->nr_items = 1;
+
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: can't perform the search - %s\n",
+ strerror(errno));
+ return NULL;
+ }
+ /* the ioctl returns the number of item it found in nr_items */
+ if (sk->nr_items == 0)
+ return NULL;
+
+ off = 0;
+ sh = (struct btrfs_ioctl_search_header *)(args.buf + off);
+
+ if (sh->type == BTRFS_INODE_REF_KEY) {
+ struct btrfs_inode_ref *ref;
+ dirid = sh->offset;
+
+ ref = (struct btrfs_inode_ref *)(sh + 1);
+ namelen = btrfs_stack_inode_ref_name_len(ref);
+
+ name = (char *)(ref + 1);
+ name = strndup(name, namelen);
+
+ /* use our cached value */
+ if (dirid == *cache_dirid && *cache_name) {
+ dirname = *cache_name;
+ goto build;
+ }
+ } else {
+ return NULL;
+ }
+ /*
+ * the inode backref gives us the file name and the parent directory id.
+ * From here we use __ino_resolve to get the path to the parent
+ */
+ dirname = __ino_resolve(fd, dirid);
+build:
+ full = build_name(dirname, name);
+ if (*cache_name && dirname != *cache_name)
+ free(*cache_name);
+
+ *cache_name = dirname;
+ *cache_dirid = dirid;
+ free(name);
+
+ return full;
+}
+
+int btrfs_list_get_default_subvolume(int fd, u64 *default_id)
+{
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ struct btrfs_ioctl_search_header *sh;
+ u64 found = 0;
+ int ret;
+
+ memset(&args, 0, sizeof(args));
+
+ /*
+ * search for a dir item with a name 'default' in the tree of
+ * tree roots, it should point us to a default root
+ */
+ sk->tree_id = 1;
+
+ /* don't worry about ancient format and request only one item */
+ sk->nr_items = 1;
+
+ sk->max_objectid = BTRFS_ROOT_TREE_DIR_OBJECTID;
+ sk->min_objectid = BTRFS_ROOT_TREE_DIR_OBJECTID;
+ sk->max_type = BTRFS_DIR_ITEM_KEY;
+ sk->min_type = BTRFS_DIR_ITEM_KEY;
+ sk->max_offset = (u64)-1;
+ sk->max_transid = (u64)-1;
+
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0)
+ return ret;
+
+ /* the ioctl returns the number of items it found in nr_items */
+ if (sk->nr_items == 0)
+ goto out;
+
+ sh = (struct btrfs_ioctl_search_header *)args.buf;
+
+ if (sh->type == BTRFS_DIR_ITEM_KEY) {
+ struct btrfs_dir_item *di;
+ int name_len;
+ char *name;
+
+ di = (struct btrfs_dir_item *)(sh + 1);
+ name_len = btrfs_stack_dir_name_len(di);
+ name = (char *)(di + 1);
+
+ if (!strncmp("default", name, name_len))
+ found = btrfs_disk_key_objectid(&di->location);
+ }
+
+out:
+ *default_id = found;
+ return 0;
+}
+
+static int __list_subvol_search(int fd, struct root_lookup *root_lookup)
+{
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ struct btrfs_ioctl_search_header sh;
+ struct btrfs_root_ref *ref;
+ struct btrfs_root_item *ri;
+ unsigned long off = 0;
+ int name_len;
+ char *name;
+ u64 dir_id;
+ u64 gen = 0;
+ u64 ogen;
+ u64 flags;
+ int i;
+ time_t t;
+ u8 uuid[BTRFS_UUID_SIZE];
+ u8 puuid[BTRFS_UUID_SIZE];
+ u8 ruuid[BTRFS_UUID_SIZE];
+
+ root_lookup_init(root_lookup);
+ memset(&args, 0, sizeof(args));
+
+ /* search in the tree of tree roots */
+ sk->tree_id = 1;
+
+ /*
+ * set the min and max to backref keys. The search will
+ * only send back this type of key now.
+ */
+ sk->max_type = BTRFS_ROOT_BACKREF_KEY;
+ sk->min_type = BTRFS_ROOT_ITEM_KEY;
+
+ sk->min_objectid = BTRFS_FIRST_FREE_OBJECTID;
+
+ /*
+ * set all the other params to the max, we'll take any objectid
+ * and any trans
+ */
+ sk->max_objectid = BTRFS_LAST_FREE_OBJECTID;
+ sk->max_offset = (u64)-1;
+ sk->max_transid = (u64)-1;
+
+ /* just a big number, doesn't matter much */
+ sk->nr_items = 4096;
+
+ while(1) {
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0)
+ return ret;
+ /* the ioctl returns the number of item it found in nr_items */
+ if (sk->nr_items == 0)
+ break;
+
+ off = 0;
+
+ /*
+ * for each item, pull the key out of the header and then
+ * read the root_ref item it contains
+ */
+ for (i = 0; i < sk->nr_items; i++) {
+ memcpy(&sh, args.buf + off, sizeof(sh));
+ off += sizeof(sh);
+ if (sh.type == BTRFS_ROOT_BACKREF_KEY) {
+ ref = (struct btrfs_root_ref *)(args.buf + off);
+ name_len = btrfs_stack_root_ref_name_len(ref);
+ name = (char *)(ref + 1);
+ dir_id = btrfs_stack_root_ref_dirid(ref);
+
+ add_root(root_lookup, sh.objectid, sh.offset,
+ 0, 0, dir_id, name, name_len, 0, 0, 0,
+ NULL, NULL, NULL);
+ } else if (sh.type == BTRFS_ROOT_ITEM_KEY) {
+ ri = (struct btrfs_root_item *)(args.buf + off);
+ gen = btrfs_root_generation(ri);
+ flags = btrfs_root_flags(ri);
+ if(sh.len >
+ sizeof(struct btrfs_root_item_v0)) {
+ t = btrfs_stack_timespec_sec(&ri->otime);
+ ogen = btrfs_root_otransid(ri);
+ memcpy(uuid, ri->uuid, BTRFS_UUID_SIZE);
+ memcpy(puuid, ri->parent_uuid, BTRFS_UUID_SIZE);
+ memcpy(ruuid, ri->received_uuid, BTRFS_UUID_SIZE);
+ } else {
+ t = 0;
+ ogen = 0;
+ memset(uuid, 0, BTRFS_UUID_SIZE);
+ memset(puuid, 0, BTRFS_UUID_SIZE);
+ memset(ruuid, 0, BTRFS_UUID_SIZE);
+ }
+
+ add_root(root_lookup, sh.objectid, 0,
+ sh.offset, flags, 0, NULL, 0, ogen,
+ gen, t, uuid, puuid, ruuid);
+ }
+
+ off += sh.len;
+
+ /*
+ * record the mins in sk so we can make sure the
+ * next search doesn't repeat this root
+ */
+ sk->min_objectid = sh.objectid;
+ sk->min_type = sh.type;
+ sk->min_offset = sh.offset;
+ }
+ sk->nr_items = 4096;
+ sk->min_offset++;
+ if (!sk->min_offset) /* overflow */
+ sk->min_type++;
+ else
+ continue;
+
+ if (sk->min_type > BTRFS_ROOT_BACKREF_KEY) {
+ sk->min_type = BTRFS_ROOT_ITEM_KEY;
+ sk->min_objectid++;
+ } else
+ continue;
+
+ if (sk->min_objectid > sk->max_objectid)
+ break;
+ }
+
+ return 0;
+}
+
+static int filter_by_rootid(struct root_info *ri, u64 data)
+{
+ return ri->root_id == data;
+}
+
+static int filter_snapshot(struct root_info *ri, u64 data)
+{
+ return !!ri->root_offset;
+}
+
+static int filter_flags(struct root_info *ri, u64 flags)
+{
+ return ri->flags & flags;
+}
+
+static int filter_gen_more(struct root_info *ri, u64 data)
+{
+ return ri->gen >= data;
+}
+
+static int filter_gen_less(struct root_info *ri, u64 data)
+{
+ return ri->gen <= data;
+}
+
+static int filter_gen_equal(struct root_info *ri, u64 data)
+{
+ return ri->gen == data;
+}
+
+static int filter_cgen_more(struct root_info *ri, u64 data)
+{
+ return ri->ogen >= data;
+}
+
+static int filter_cgen_less(struct root_info *ri, u64 data)
+{
+ return ri->ogen <= data;
+}
+
+static int filter_cgen_equal(struct root_info *ri, u64 data)
+{
+ return ri->ogen == data;
+}
+
+static int filter_topid_equal(struct root_info *ri, u64 data)
+{
+ return ri->top_id == data;
+}
+
+static int filter_full_path(struct root_info *ri, u64 data)
+{
+ if (ri->full_path && ri->top_id != data) {
+ char *tmp;
+ char p[] = "<FS_TREE>";
+ int add_len = strlen(p);
+ int len = strlen(ri->full_path);
+
+ tmp = malloc(len + add_len + 2);
+ if (!tmp) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(1);
+ }
+ memcpy(tmp + add_len + 1, ri->full_path, len);
+ tmp[len + add_len + 1] = '\0';
+ tmp[add_len] = '/';
+ memcpy(tmp, p, add_len);
+ free(ri->full_path);
+ ri->full_path = tmp;
+ }
+ return 1;
+}
+
+static int filter_by_parent(struct root_info *ri, u64 data)
+{
+ return !uuid_compare(ri->puuid, (u8 *)(unsigned long)data);
+}
+
+static int filter_deleted(struct root_info *ri, u64 data)
+{
+ return ri->deleted;
+}
+
+static btrfs_list_filter_func all_filter_funcs[] = {
+ [BTRFS_LIST_FILTER_ROOTID] = filter_by_rootid,
+ [BTRFS_LIST_FILTER_SNAPSHOT_ONLY] = filter_snapshot,
+ [BTRFS_LIST_FILTER_FLAGS] = filter_flags,
+ [BTRFS_LIST_FILTER_GEN_MORE] = filter_gen_more,
+ [BTRFS_LIST_FILTER_GEN_LESS] = filter_gen_less,
+ [BTRFS_LIST_FILTER_GEN_EQUAL] = filter_gen_equal,
+ [BTRFS_LIST_FILTER_CGEN_MORE] = filter_cgen_more,
+ [BTRFS_LIST_FILTER_CGEN_LESS] = filter_cgen_less,
+ [BTRFS_LIST_FILTER_CGEN_EQUAL] = filter_cgen_equal,
+ [BTRFS_LIST_FILTER_TOPID_EQUAL] = filter_topid_equal,
+ [BTRFS_LIST_FILTER_FULL_PATH] = filter_full_path,
+ [BTRFS_LIST_FILTER_BY_PARENT] = filter_by_parent,
+ [BTRFS_LIST_FILTER_DELETED] = filter_deleted,
+};
+
+struct btrfs_list_filter_set *btrfs_list_alloc_filter_set(void)
+{
+ struct btrfs_list_filter_set *set;
+ int size;
+
+ size = sizeof(struct btrfs_list_filter_set) +
+ BTRFS_LIST_NFILTERS_INCREASE * sizeof(struct btrfs_list_filter);
+ set = calloc(1, size);
+ if (!set) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(1);
+ }
+
+ set->total = BTRFS_LIST_NFILTERS_INCREASE;
+
+ return set;
+}
+
+void btrfs_list_free_filter_set(struct btrfs_list_filter_set *filter_set)
+{
+ free(filter_set);
+}
+
+int btrfs_list_setup_filter(struct btrfs_list_filter_set **filter_set,
+ enum btrfs_list_filter_enum filter, u64 data)
+{
+ struct btrfs_list_filter_set *set = *filter_set;
+ int size;
+
+ BUG_ON(!set);
+ BUG_ON(filter >= BTRFS_LIST_FILTER_MAX);
+ BUG_ON(set->nfilters > set->total);
+
+ if (set->nfilters == set->total) {
+ void *tmp;
+
+ size = set->total + BTRFS_LIST_NFILTERS_INCREASE;
+ size = sizeof(*set) + size * sizeof(struct btrfs_list_filter);
+ tmp = set;
+ set = realloc(set, size);
+ if (!set) {
+ fprintf(stderr, "memory allocation failed\n");
+ free(tmp);
+ exit(1);
+ }
+
+ memset(&set->filters[set->total], 0,
+ BTRFS_LIST_NFILTERS_INCREASE *
+ sizeof(struct btrfs_list_filter));
+ set->total += BTRFS_LIST_NFILTERS_INCREASE;
+ *filter_set = set;
+ }
+
+ BUG_ON(set->filters[set->nfilters].filter_func);
+
+ if (filter == BTRFS_LIST_FILTER_DELETED)
+ set->only_deleted = 1;
+
+ set->filters[set->nfilters].filter_func = all_filter_funcs[filter];
+ set->filters[set->nfilters].data = data;
+ set->nfilters++;
+ return 0;
+}
+
+static int filter_root(struct root_info *ri,
+ struct btrfs_list_filter_set *set)
+{
+ int i, ret;
+
+ if (!set)
+ return 1;
+
+ if (set->only_deleted && !ri->deleted)
+ return 0;
+
+ if (!set->only_deleted && ri->deleted)
+ return 0;
+
+ for (i = 0; i < set->nfilters; i++) {
+ if (!set->filters[i].filter_func)
+ break;
+ ret = set->filters[i].filter_func(ri, set->filters[i].data);
+ if (!ret)
+ return 0;
+ }
+ return 1;
+}
+
+static void __filter_and_sort_subvol(struct root_lookup *all_subvols,
+ struct root_lookup *sort_tree,
+ struct btrfs_list_filter_set *filter_set,
+ struct btrfs_list_comparer_set *comp_set,
+ u64 top_id)
+{
+ struct rb_node *n;
+ struct root_info *entry;
+ int ret;
+
+ root_lookup_init(sort_tree);
+
+ n = rb_last(&all_subvols->root);
+ while (n) {
+ entry = rb_entry(n, struct root_info, rb_node);
+
+ ret = resolve_root(all_subvols, entry, top_id);
+ if (ret == -ENOENT) {
+ entry->full_path = strdup("DELETED");
+ entry->deleted = 1;
+ }
+ ret = filter_root(entry, filter_set);
+ if (ret)
+ sort_tree_insert(sort_tree, entry, comp_set);
+ n = rb_prev(n);
+ }
+}
+
+static int __list_subvol_fill_paths(int fd, struct root_lookup *root_lookup)
+{
+ struct rb_node *n;
+
+ n = rb_first(&root_lookup->root);
+ while (n) {
+ struct root_info *entry;
+ int ret;
+ entry = rb_entry(n, struct root_info, rb_node);
+ ret = lookup_ino_path(fd, entry);
+ if (ret && ret != -ENOENT)
+ return ret;
+ n = rb_next(n);
+ }
+
+ return 0;
+}
+
+static void print_subvolume_column(struct root_info *subv,
+ enum btrfs_list_column_enum column)
+{
+ char tstr[256];
+ char uuidparse[BTRFS_UUID_UNPARSED_SIZE];
+
+ BUG_ON(column >= BTRFS_LIST_ALL || column < 0);
+
+ switch (column) {
+ case BTRFS_LIST_OBJECTID:
+ printf("%llu", subv->root_id);
+ break;
+ case BTRFS_LIST_GENERATION:
+ printf("%llu", subv->gen);
+ break;
+ case BTRFS_LIST_OGENERATION:
+ printf("%llu", subv->ogen);
+ break;
+ case BTRFS_LIST_PARENT:
+ printf("%llu", subv->ref_tree);
+ break;
+ case BTRFS_LIST_TOP_LEVEL:
+ printf("%llu", subv->top_id);
+ break;
+ case BTRFS_LIST_OTIME:
+ if (subv->otime) {
+ struct tm tm;
+
+ localtime_r(&subv->otime, &tm);
+ strftime(tstr, 256, "%Y-%m-%d %X", &tm);
+ } else
+ strcpy(tstr, "-");
+ printf("%s", tstr);
+ break;
+ case BTRFS_LIST_UUID:
+ if (uuid_is_null(subv->uuid))
+ strcpy(uuidparse, "-");
+ else
+ uuid_unparse(subv->uuid, uuidparse);
+ printf("%s", uuidparse);
+ break;
+ case BTRFS_LIST_PUUID:
+ if (uuid_is_null(subv->puuid))
+ strcpy(uuidparse, "-");
+ else
+ uuid_unparse(subv->puuid, uuidparse);
+ printf("%s", uuidparse);
+ break;
+ case BTRFS_LIST_RUUID:
+ if (uuid_is_null(subv->ruuid))
+ strcpy(uuidparse, "-");
+ else
+ uuid_unparse(subv->ruuid, uuidparse);
+ printf("%s", uuidparse);
+ break;
+ case BTRFS_LIST_PATH:
+ BUG_ON(!subv->full_path);
+ printf("%s", subv->full_path);
+ break;
+ default:
+ break;
+ }
+}
+
+static void print_single_volume_info_raw(struct root_info *subv, char *raw_prefix)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_LIST_ALL; i++) {
+ if (!btrfs_list_columns[i].need_print)
+ continue;
+
+ if (raw_prefix)
+ printf("%s",raw_prefix);
+
+ print_subvolume_column(subv, i);
+ }
+ printf("\n");
+}
+
+static void print_single_volume_info_table(struct root_info *subv)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_LIST_ALL; i++) {
+ if (!btrfs_list_columns[i].need_print)
+ continue;
+
+ print_subvolume_column(subv, i);
+
+ if (i != BTRFS_LIST_PATH)
+ printf("\t");
+
+ if (i == BTRFS_LIST_TOP_LEVEL)
+ printf("\t");
+ }
+ printf("\n");
+}
+
+static void print_single_volume_info_default(struct root_info *subv)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_LIST_ALL; i++) {
+ if (!btrfs_list_columns[i].need_print)
+ continue;
+
+ printf("%s ", btrfs_list_columns[i].name);
+ print_subvolume_column(subv, i);
+
+ if (i != BTRFS_LIST_PATH)
+ printf(" ");
+ }
+ printf("\n");
+}
+
+static void print_all_volume_info_tab_head(void)
+{
+ int i;
+ int len;
+ char barrier[20];
+
+ for (i = 0; i < BTRFS_LIST_ALL; i++) {
+ if (btrfs_list_columns[i].need_print)
+ printf("%s\t", btrfs_list_columns[i].name);
+
+ if (i == BTRFS_LIST_ALL-1)
+ printf("\n");
+ }
+
+ for (i = 0; i < BTRFS_LIST_ALL; i++) {
+ memset(barrier, 0, sizeof(barrier));
+
+ if (btrfs_list_columns[i].need_print) {
+ len = strlen(btrfs_list_columns[i].name);
+ while (len--)
+ strcat(barrier, "-");
+
+ printf("%s\t", barrier);
+ }
+ if (i == BTRFS_LIST_ALL-1)
+ printf("\n");
+ }
+}
+
+static void print_all_volume_info(struct root_lookup *sorted_tree,
+ int layout, char *raw_prefix)
+{
+ struct rb_node *n;
+ struct root_info *entry;
+
+ if (layout == BTRFS_LIST_LAYOUT_TABLE)
+ print_all_volume_info_tab_head();
+
+ n = rb_first(&sorted_tree->root);
+ while (n) {
+ entry = rb_entry(n, struct root_info, sort_node);
+ switch (layout) {
+ case BTRFS_LIST_LAYOUT_DEFAULT:
+ print_single_volume_info_default(entry);
+ break;
+ case BTRFS_LIST_LAYOUT_TABLE:
+ print_single_volume_info_table(entry);
+ break;
+ case BTRFS_LIST_LAYOUT_RAW:
+ print_single_volume_info_raw(entry, raw_prefix);
+ break;
+ }
+ n = rb_next(n);
+ }
+}
+
+static int btrfs_list_subvols(int fd, struct root_lookup *root_lookup)
+{
+ int ret;
+
+ ret = __list_subvol_search(fd, root_lookup);
+ if (ret) {
+ fprintf(stderr, "ERROR: can't perform the search - %s\n",
+ strerror(errno));
+ return ret;
+ }
+
+ /*
+ * now we have an rbtree full of root_info objects, but we need to fill
+ * in their path names within the subvol that is referencing each one.
+ */
+ ret = __list_subvol_fill_paths(fd, root_lookup);
+ return ret;
+}
+
+int btrfs_list_subvols_print(int fd, struct btrfs_list_filter_set *filter_set,
+ struct btrfs_list_comparer_set *comp_set,
+ int layout, int full_path, char *raw_prefix)
+{
+ struct root_lookup root_lookup;
+ struct root_lookup root_sort;
+ int ret = 0;
+ u64 top_id = 0;
+
+ if (full_path)
+ ret = btrfs_list_get_path_rootid(fd, &top_id);
+ if (ret)
+ return ret;
+
+ ret = btrfs_list_subvols(fd, &root_lookup);
+ if (ret)
+ return ret;
+ __filter_and_sort_subvol(&root_lookup, &root_sort, filter_set,
+ comp_set, top_id);
+
+ print_all_volume_info(&root_sort, layout, raw_prefix);
+ __free_all_subvolumn(&root_lookup);
+
+ return 0;
+}
+
+static char *strdup_or_null(const char *s)
+{
+ if (!s)
+ return NULL;
+ return strdup(s);
+}
+
+int btrfs_get_subvol(int fd, struct root_info *the_ri)
+{
+ int ret, rr;
+ struct root_lookup rl;
+ struct rb_node *rbn;
+ struct root_info *ri;
+ u64 root_id;
+
+ ret = btrfs_list_get_path_rootid(fd, &root_id);
+ if (ret)
+ return ret;
+
+ ret = btrfs_list_subvols(fd, &rl);
+ if (ret)
+ return ret;
+
+ rbn = rb_first(&rl.root);
+ while(rbn) {
+ ri = rb_entry(rbn, struct root_info, rb_node);
+ rr = resolve_root(&rl, ri, root_id);
+ if (rr == -ENOENT) {
+ ret = -ENOENT;
+ rbn = rb_next(rbn);
+ continue;
+ }
+ if (!comp_entry_with_rootid(the_ri, ri, 0)) {
+ memcpy(the_ri, ri, offsetof(struct root_info, path));
+ the_ri->path = strdup_or_null(ri->path);
+ the_ri->name = strdup_or_null(ri->name);
+ the_ri->full_path = strdup_or_null(ri->full_path);
+ ret = 0;
+ break;
+ }
+ rbn = rb_next(rbn);
+ }
+ __free_all_subvolumn(&rl);
+ return ret;
+}