summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas D Steeves <nsteeves@gmail.com>2016-04-23 00:41:30 +0100
committerNicholas D Steeves <nsteeves@gmail.com>2016-04-23 00:41:30 +0100
commitcec572daccafa1e912cbed363df6f84687778c6f (patch)
tree7d99ab9f73d25c1ed8eaf6393f6374edf5316b03
btrfs-progs (4.4.1-1.1) unstable; urgency=medium
* Non-maintainer upload. * New upstream release. * Rename package to btrfs-progs (Closes: #780081) * Update standards version to 3.9.7 (no changes needed). * debian/control: Add "Breaks" per Gianfranco Costamagna's suggestion * Change lintian override to reflect package rename * Switch from using postinst and postrm to using triggers per Christian Seiler's recommendation. # imported from the archive
-rw-r--r--.gitignore75
-rw-r--r--Android.mk111
-rw-r--r--COPYING341
-rw-r--r--Documentation/Makefile.in133
-rw-r--r--Documentation/asciidoc.conf48
-rw-r--r--Documentation/btrfs-balance.asciidoc217
-rw-r--r--Documentation/btrfs-check.asciidoc60
-rw-r--r--Documentation/btrfs-convert.asciidoc98
-rw-r--r--Documentation/btrfs-debug-tree.asciidoc38
-rw-r--r--Documentation/btrfs-device.asciidoc145
-rw-r--r--Documentation/btrfs-filesystem.asciidoc338
-rw-r--r--Documentation/btrfs-find-root.asciidoc35
-rw-r--r--Documentation/btrfs-image.asciidoc66
-rw-r--r--Documentation/btrfs-inspect-internal.asciidoc84
-rw-r--r--Documentation/btrfs-man5.asciidoc406
-rw-r--r--Documentation/btrfs-map-logical.asciidoc37
-rw-r--r--Documentation/btrfs-property.asciidoc73
-rw-r--r--Documentation/btrfs-qgroup.asciidoc145
-rw-r--r--Documentation/btrfs-quota.asciidoc56
-rw-r--r--Documentation/btrfs-receive.asciidoc67
-rw-r--r--Documentation/btrfs-replace.asciidoc76
-rw-r--r--Documentation/btrfs-rescue.asciidoc90
-rw-r--r--Documentation/btrfs-restore.asciidoc91
-rw-r--r--Documentation/btrfs-scrub.asciidoc94
-rw-r--r--Documentation/btrfs-select-super.asciidoc29
-rw-r--r--Documentation/btrfs-send.asciidoc63
-rw-r--r--Documentation/btrfs-show-super.asciidoc54
-rw-r--r--Documentation/btrfs-subvolume.asciidoc182
-rw-r--r--Documentation/btrfs.asciidoc127
-rw-r--r--Documentation/btrfstune.asciidoc75
-rw-r--r--Documentation/fsck.btrfs.asciidoc51
-rw-r--r--Documentation/manpage-base.xsl35
-rw-r--r--Documentation/manpage-bold-literal.xsl17
-rw-r--r--Documentation/manpage-normal.xsl13
-rw-r--r--Documentation/mkfs.btrfs.asciidoc341
-rw-r--r--INSTALL79
-rw-r--r--Makefile.extrawarn90
-rw-r--r--Makefile.in399
-rw-r--r--README.md41
-rw-r--r--androidcompat.h28
-rwxr-xr-xautogen.sh99
-rw-r--r--backref.c1651
-rw-r--r--backref.h73
-rwxr-xr-xbcp159
-rw-r--r--bitops.h224
-rw-r--r--btrfs-calc-size.c506
-rw-r--r--btrfs-completion152
-rw-r--r--btrfs-convert.c3206
-rw-r--r--btrfs-corrupt-block.c1310
-rw-r--r--btrfs-crc.c90
-rw-r--r--btrfs-debug-tree.c462
-rwxr-xr-xbtrfs-debugfs296
-rw-r--r--btrfs-find-root.c222
-rw-r--r--btrfs-fragments.c452
-rw-r--r--btrfs-image.c2880
-rw-r--r--btrfs-list.c1914
-rw-r--r--btrfs-list.h178
-rw-r--r--btrfs-map-logical.c364
-rw-r--r--btrfs-select-super.c108
-rw-r--r--btrfs-show-super.c542
-rw-r--r--btrfs-zero-log.c76
-rw-r--r--btrfs.c248
-rw-r--r--btrfsck.h203
-rw-r--r--btrfstune.c554
-rw-r--r--chunk-recover.c2407
-rw-r--r--cmds-balance.c857
-rw-r--r--cmds-check.c9844
-rw-r--r--cmds-device.c543
-rw-r--r--cmds-fi-usage.c1036
-rw-r--r--cmds-fi-usage.h57
-rw-r--r--cmds-filesystem.c1295
-rw-r--r--cmds-inspect.c644
-rw-r--r--cmds-property.c478
-rw-r--r--cmds-qgroup.c492
-rw-r--r--cmds-quota.c189
-rw-r--r--cmds-receive.c1306
-rw-r--r--cmds-replace.c561
-rw-r--r--cmds-rescue.c219
-rw-r--r--cmds-restore.c1612
-rw-r--r--cmds-scrub.c1803
-rw-r--r--cmds-send.c741
-rw-r--r--cmds-subvolume.c1347
-rw-r--r--commands.h134
-rw-r--r--config.h.in129
-rwxr-xr-xconfig/config.guess1568
-rwxr-xr-xconfig/config.sub1788
-rwxr-xr-xconfig/install-sh527
-rwxr-xr-xconfigure7246
-rw-r--r--configure.ac174
-rw-r--r--crc32c.c222
-rw-r--r--crc32c.h33
-rw-r--r--ctree.c2940
-rw-r--r--ctree.h2570
-rw-r--r--debian/btrfs-progs-udeb.install2
-rw-r--r--debian/btrfs-progs.install3
-rw-r--r--debian/btrfs-progs.lintian-overrides4
-rw-r--r--debian/btrfs-progs.triggers1
-rw-r--r--debian/changelog672
-rw-r--r--debian/compat1
-rw-r--r--debian/control65
-rw-r--r--debian/copyright41
-rw-r--r--debian/local/btrfs-lvm.udev6
-rw-r--r--debian/local/btrfs.hook29
-rw-r--r--debian/local/btrfs.local-premount23
-rw-r--r--debian/local/btrfs.udev7
-rwxr-xr-xdebian/rules32
-rw-r--r--debian/source/format1
-rw-r--r--debian/source/options2
-rw-r--r--debian/watch2
-rw-r--r--dir-item.c351
-rw-r--r--dir-test.c517
-rw-r--r--disk-io.c1750
-rw-r--r--disk-io.h166
-rw-r--r--extent-cache.c341
-rw-r--r--extent-cache.h121
-rw-r--r--extent-tree.c4122
-rw-r--r--extent_io.c892
-rw-r--r--extent_io.h159
-rw-r--r--file-item.c486
-rw-r--r--file.c162
-rw-r--r--find-root.c150
-rw-r--r--find-root.h87
-rw-r--r--free-space-cache.c878
-rw-r--r--free-space-cache.h62
-rw-r--r--free-space-tree.c273
-rw-r--r--free-space-tree.h25
-rwxr-xr-xfsck.btrfs39
-rw-r--r--hash.h28
-rw-r--r--hasher.c42
-rw-r--r--help.c271
-rw-r--r--inode-item.c460
-rw-r--r--inode-map.c100
-rw-r--r--inode.c536
-rw-r--r--interval_tree_generic.h193
-rw-r--r--ioctl-test.c37
-rw-r--r--ioctl.h716
-rw-r--r--kerncompat.h374
-rw-r--r--library-test.c69
-rw-r--r--list.h486
-rw-r--r--list_sort.c144
-rw-r--r--list_sort.h14
-rw-r--r--mkfs.c1844
-rw-r--r--print-tree.c1137
-rw-r--r--print-tree.h27
-rw-r--r--props.c198
-rw-r--r--props.h43
-rw-r--r--qgroup-verify.c1179
-rw-r--r--qgroup-verify.h30
-rw-r--r--qgroup.c1390
-rw-r--r--qgroup.h101
-rw-r--r--quick-test.c221
-rw-r--r--radix-tree.c849
-rw-r--r--radix-tree.h97
-rw-r--r--raid6.c101
-rw-r--r--random-test.c433
-rw-r--r--rbtree-utils.c82
-rw-r--r--rbtree-utils.h53
-rw-r--r--rbtree.c548
-rw-r--r--rbtree.h118
-rw-r--r--rbtree_augmented.h249
-rw-r--r--repair.c50
-rw-r--r--repair.h34
-rw-r--r--root-tree.c192
-rw-r--r--send-stream.c499
-rw-r--r--send-stream.h80
-rw-r--r--send-test.c447
-rw-r--r--send-utils.c771
-rw-r--r--send-utils.h108
-rw-r--r--send.h147
-rwxr-xr-xshow-blocks325
-rw-r--r--string-table.c151
-rw-r--r--string-table.h34
-rw-r--r--super-recover.c336
-rw-r--r--task-utils.c143
-rw-r--r--task-utils.h47
-rw-r--r--tests/README.md136
-rwxr-xr-xtests/clean-tests.sh24
-rw-r--r--tests/common240
-rw-r--r--tests/convert-tests.sh86
-rwxr-xr-xtests/fsck-tests.sh68
-rw-r--r--tests/fsck-tests/001-bad-file-extent-bytenr/default_case.imgbin0 -> 4096 bytes
-rw-r--r--tests/fsck-tests/002-bad-transid/default_case.imgbin0 -> 4096 bytes
-rw-r--r--tests/fsck-tests/003-shift-offsets/default_case.imgbin0 -> 4096 bytes
-rw-r--r--tests/fsck-tests/004-no-dir-index/default_case.imgbin0 -> 4096 bytes
-rw-r--r--tests/fsck-tests/005-bad-item-offset/default_case.imgbin0 -> 398336 bytes
-rw-r--r--tests/fsck-tests/006-bad-root-items/default_case.tar.xzbin0 -> 24980 bytes
-rw-r--r--tests/fsck-tests/006-bad-root-items/skinny_case.tar.xzbin0 -> 26520 bytes
-rwxr-xr-xtests/fsck-tests/006-bad-root-items/test.sh17
-rw-r--r--tests/fsck-tests/007-bad-offset-snapshots/default_case.imgbin0 -> 249856 bytes
-rw-r--r--tests/fsck-tests/008-bad-dir-index-name/default_case.imgbin0 -> 4096 bytes
-rw-r--r--tests/fsck-tests/009-no-dir-item-or-index/default_case.imgbin0 -> 4953088 bytes
-rw-r--r--tests/fsck-tests/010-no-rootdir-inode-item/default_case.imgbin0 -> 398336 bytes
-rw-r--r--tests/fsck-tests/011-no-inode-item/default_case.imgbin0 -> 398336 bytes
-rw-r--r--tests/fsck-tests/012-leaf-corruption/no_data_extent.tar.xzbin0 -> 130260 bytes
-rwxr-xr-xtests/fsck-tests/012-leaf-corruption/test.sh117
-rwxr-xr-xtests/fsck-tests/013-extent-tree-rebuild/test.sh44
-rw-r--r--tests/fsck-tests/014-no-extent-info/default_case.imgbin0 -> 4096 bytes
-rw-r--r--tests/fsck-tests/015-check-bad-memory-access/bko-97171-btrfs-image.raw.txt254
-rw-r--r--tests/fsck-tests/015-check-bad-memory-access/bko-97171-btrfs-image.raw.xzbin0 -> 6748 bytes
-rw-r--r--tests/fsck-tests/016-wrong-inode-nbytes/default_case.img.xzbin0 -> 1996 bytes
-rw-r--r--tests/fsck-tests/017-missing-all-file-extent/default_case.img.xzbin0 -> 1104 bytes
-rw-r--r--tests/fsck-tests/018-leaf-crossing-stripes/default_case.raw.xzbin0 -> 105064 bytes
-rwxr-xr-xtests/fsck-tests/018-leaf-crossing-stripes/test.sh12
-rw-r--r--tests/fsck-tests/019-non-skinny-false-alert/default_case.img.xzbin0 -> 15236 bytes
-rwxr-xr-xtests/fsck-tests/019-non-skinny-false-alert/test.sh23
-rwxr-xr-xtests/fuzz-tests.sh41
-rwxr-xr-xtests/fuzz-tests/001-simple-unmounted/test.sh20
-rw-r--r--tests/fuzz-tests/images/bad-superblock-1.raw.xzbin0 -> 228 bytes
-rw-r--r--tests/fuzz-tests/images/bad-superblock-2.raw.xzbin0 -> 228 bytes
-rw-r--r--tests/fuzz-tests/images/bad-superblock-3.raw.xzbin0 -> 228 bytes
-rw-r--r--tests/fuzz-tests/images/bad-superblock.txt17
-rw-r--r--tests/fuzz-tests/images/bko-104131-fsck-oob-read.raw.xzbin0 -> 192 bytes
-rw-r--r--tests/fuzz-tests/images/bko-104131-fsck-oob-read.txt31
-rw-r--r--tests/fuzz-tests/images/bko-104141-fsck-exception.raw.xzbin0 -> 196 bytes
-rw-r--r--tests/fuzz-tests/images/bko-104141-fsck-exception.txt9
-rw-r--r--tests/fuzz-tests/images/bko-97191-btrfs-image.raw.txt137
-rw-r--r--tests/fuzz-tests/images/bko-97191-btrfs-image.raw.xzbin0 -> 7076 bytes
-rw-r--r--tests/fuzz-tests/images/bko-97271-btrfs-image.raw.txt54
-rw-r--r--tests/fuzz-tests/images/bko-97271-btrfs-image.raw.xzbin0 -> 6580 bytes
-rw-r--r--tests/fuzz-tests/images/sys-array-num-stripes-0.raw.txt30
-rw-r--r--tests/fuzz-tests/images/sys-array-num-stripes-0.raw.xzbin0 -> 8364 bytes
-rwxr-xr-xtests/misc-tests.sh47
-rwxr-xr-xtests/misc-tests/001-btrfstune-features/test.sh46
-rwxr-xr-xtests/misc-tests/002-uuid-rewrite/test.sh66
-rwxr-xr-xtests/misc-tests/003-zero-log/test.sh52
-rwxr-xr-xtests/misc-tests/004-shrink-fs/test.sh72
-rwxr-xr-xtests/misc-tests/005-convert-progress-thread-crash/test.sh15
-rwxr-xr-xtests/misc-tests/006-image-on-missing-device/test.sh78
-rwxr-xr-xtests/misc-tests/007-subvolume-sync/test.sh32
-rwxr-xr-xtests/misc-tests/008-leaf-crossing-stripes/test.sh25
-rwxr-xr-xtests/misc-tests/009-subvolume-sync-must-wait/test.sh52
-rwxr-xr-xtests/misc-tests/010-convert-delete-ext2-subvol/test.sh26
-rwxr-xr-xtests/misc-tests/011-delete-missing-device/test.sh83
-rw-r--r--tests/misc-tests/012-find-root-no-result/first_meta_chunk.btrfs-imagebin0 -> 4096 bytes
-rwxr-xr-xtests/misc-tests/012-find-root-no-result/test.sh24
-rwxr-xr-xtests/mkfs-tests.sh44
-rwxr-xr-xtests/mkfs-tests/001-basic-profiles/test.sh91
-rwxr-xr-xtests/mkfs-tests/002-no-force-mixed-on-small-volume/test.sh13
-rwxr-xr-xtests/mkfs-tests/003-mixed-with-wrong-nodesize/test.sh12
-rwxr-xr-xtests/mkfs-tests/004-rootdir-keeps-size/test.sh31
-rwxr-xr-xtests/mkfs-tests/005-long-device-name-for-ssd/test.sh40
-rw-r--r--tests/mkfs-tests/006-partitioned-loopdev/partition-1g-1gbin0 -> 512 bytes
-rwxr-xr-xtests/mkfs-tests/006-partitioned-loopdev/test.sh27
-rwxr-xr-xtests/mkfs-tests/007-mix-nodesize-sectorsize/test.sh36
-rwxr-xr-xtests/mkfs-tests/008-secorsize-nodesize-combination/test.sh50
-rw-r--r--transaction.h64
-rw-r--r--ulist.c253
-rw-r--r--ulist.h81
-rw-r--r--utils-lib.c40
-rw-r--r--utils.c3150
-rw-r--r--utils.h338
-rw-r--r--uuid-tree.c105
-rw-r--r--version.h.in14
-rwxr-xr-xversion.sh37
-rw-r--r--volumes.c2133
-rw-r--r--volumes.h229
256 files changed, 99051 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..a27cb0d9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,75 @@
+*.o
+*.static.o
+*.o.d
+tags
+.cc-defines.h
+version.h
+version
+man/*.gz
+Documentation/*.gz
+Documentation/*.html
+btrfs
+btrfs.static
+btrfs-debug-tree
+btrfs-map-logical
+btrfs-fragments
+btrfsck
+calc-size
+ioctl-test
+dir-test
+send-test
+quick-test
+find-root
+mkfs.btrfs
+mkfs.btrfs.static
+repair
+restore
+btrfs-convert
+btrfs-find-root
+btrfs-find-root.static
+btrfs-image
+btrfs-show-super
+btrfs-zero-log
+btrfs-corrupt-block
+btrfs-select-super
+btrfs-calc-size
+btrfstune
+libbtrfs.a
+libbtrfs.so
+libbtrfs.so.0
+libbtrfs.so.0.1
+library-test
+library-test-static
+
+/tests/*-tests-results.txt
+/tests/test.img
+
+aclocal.m4
+autom4te.cache
+compile
+config.cache
+config.guess
+config.h
+config.h.in
+config.h.in~
+config.log
+config.rpath
+config.status
+config.sub
+config/ltmain.sh
+config/py-compile
+config/test-driver
+configure
+cscope.out
+depcomp
+libtool
+m4/*.m4
+Makefile
+Documentation/Makefile
+missing
+mkinstalldirs
+stamp-h
+stamp-h.in
+stamp-h1
+config/*
+
diff --git a/Android.mk b/Android.mk
new file mode 100644
index 00000000..fe3209b6
--- /dev/null
+++ b/Android.mk
@@ -0,0 +1,111 @@
+LOCAL_PATH:= $(call my-dir)
+
+#include $(call all-subdir-makefiles)
+
+CFLAGS := -g -O1 -Wall -D_FORTIFY_SOURCE=2 -include config.h \
+ -DBTRFS_FLAT_INCLUDES -D_XOPEN_SOURCE=700 -fno-strict-aliasing -fPIC
+
+LDFLAGS := -static -rdynamic
+
+LIBS := -luuid -lblkid -lz -llzo2 -L. -lpthread
+LIBBTRFS_LIBS := $(LIBS)
+
+STATIC_CFLAGS := $(CFLAGS) -ffunction-sections -fdata-sections
+STATIC_LDFLAGS := -static -Wl,--gc-sections
+STATIC_LIBS := -luuid -lblkid -luuid -lz -llzo2 -L. -pthread
+
+btrfs_shared_libraries := libext2_uuid \
+ libext2_blkid
+
+objects := ctree.c disk-io.c radix-tree.c extent-tree.c print-tree.c \
+ root-tree.c dir-item.c file-item.c inode-item.c inode-map.c \
+ extent-cache.c extent_io.c volumes.c utils.c repair.c \
+ qgroup.c raid6.c free-space-cache.c list_sort.c props.c \
+ ulist.c qgroup-verify.c backref.c string-table.c task-utils.c \
+ inode.c file.c find-root.c
+cmds_objects := cmds-subvolume.c cmds-filesystem.c cmds-device.c cmds-scrub.c \
+ cmds-inspect.c cmds-balance.c cmds-send.c cmds-receive.c \
+ cmds-quota.c cmds-qgroup.c cmds-replace.c cmds-check.c \
+ cmds-restore.c cmds-rescue.c chunk-recover.c super-recover.c \
+ cmds-property.c cmds-fi-usage.c
+libbtrfs_objects := send-stream.c send-utils.c rbtree.c btrfs-list.c crc32c.c \
+ uuid-tree.c utils-lib.c rbtree-utils.c
+libbtrfs_headers := send-stream.h send-utils.h send.h rbtree.h btrfs-list.h \
+ crc32c.h list.h kerncompat.h radix-tree.h extent-cache.h \
+ extent_io.h ioctl.h ctree.h btrfsck.h version.h
+TESTS := fsck-tests.sh convert-tests.sh
+blkid_objects := partition/ superblocks/ topology/
+
+
+# external/e2fsprogs/lib is needed for uuid/uuid.h
+common_C_INCLUDES := $(LOCAL_PATH) external/e2fsprogs/lib/ external/lzo/include/ external/zlib/
+
+#----------------------------------------------------------
+include $(CLEAR_VARS)
+LOCAL_SRC_FILES := $(libbtrfs_objects)
+LOCAL_CFLAGS := $(STATIC_CFLAGS)
+LOCAL_MODULE := libbtrfs
+LOCAL_C_INCLUDES := $(common_C_INCLUDES)
+include $(BUILD_STATIC_LIBRARY)
+
+#----------------------------------------------------------
+include $(CLEAR_VARS)
+LOCAL_MODULE := btrfs
+#LOCAL_FORCE_STATIC_EXECUTABLE := true
+LOCAL_SRC_FILES := \
+ $(objects) \
+ $(cmds_objects) \
+ btrfs.c \
+ help.c \
+
+LOCAL_C_INCLUDES := $(common_C_INCLUDES)
+LOCAL_CFLAGS := $(STATIC_CFLAGS)
+#LOCAL_LDLIBS := $(LIBBTRFS_LIBS)
+#LOCAL_LDFLAGS := $(STATIC_LDFLAGS)
+LOCAL_SHARED_LIBRARIES := $(btrfs_shared_libraries)
+LOCAL_STATIC_LIBRARIES := libbtrfs liblzo-static libz
+LOCAL_SYSTEM_SHARED_LIBRARIES := libc libcutils
+
+LOCAL_EXPORT_C_INCLUDES := $(common_C_INCLUDES)
+#LOCAL_MODULE_TAGS := optional
+include $(BUILD_EXECUTABLE)
+
+#----------------------------------------------------------
+include $(CLEAR_VARS)
+LOCAL_MODULE := mkfs.btrfs
+LOCAL_SRC_FILES := \
+ $(objects) \
+ mkfs.c
+
+LOCAL_C_INCLUDES := $(common_C_INCLUDES)
+LOCAL_CFLAGS := $(STATIC_CFLAGS)
+#LOCAL_LDLIBS := $(LIBBTRFS_LIBS)
+#LOCAL_LDFLAGS := $(STATIC_LDFLAGS)
+LOCAL_SHARED_LIBRARIES := $(btrfs_shared_libraries)
+LOCAL_STATIC_LIBRARIES := libbtrfs liblzo-static
+LOCAL_SYSTEM_SHARED_LIBRARIES := libc libcutils
+
+LOCAL_EXPORT_C_INCLUDES := $(common_C_INCLUDES)
+#LOCAL_MODULE_TAGS := optional
+include $(BUILD_EXECUTABLE)
+
+#---------------------------------------------------------------
+include $(CLEAR_VARS)
+LOCAL_MODULE := btrfstune
+LOCAL_SRC_FILES := \
+ $(objects) \
+ btrfstune.c
+
+LOCAL_C_INCLUDES := $(common_C_INCLUDES)
+LOCAL_CFLAGS := $(STATIC_CFLAGS)
+LOCAL_SHARED_LIBRARIES := $(btrfs_shared_libraries)
+#LOCAL_LDLIBS := $(LIBBTRFS_LIBS)
+#LOCAL_LDFLAGS := $(STATIC_LDFLAGS)
+LOCAL_SHARED_LIBRARIES := $(btrfs_shared_libraries)
+LOCAL_STATIC_LIBRARIES := libbtrfs liblzo-static
+LOCAL_SYSTEM_SHARED_LIBRARIES := libc libcutils
+
+LOCAL_EXPORT_C_INCLUDES := $(common_C_INCLUDES)
+LOCAL_MODULE_TAGS := optional
+include $(BUILD_EXECUTABLE)
+#--------------------------------------------------------------
diff --git a/COPYING b/COPYING
new file mode 100644
index 00000000..10828e06
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,341 @@
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/Documentation/Makefile.in b/Documentation/Makefile.in
new file mode 100644
index 00000000..f046abd5
--- /dev/null
+++ b/Documentation/Makefile.in
@@ -0,0 +1,133 @@
+# Guard against environment variables
+MAN8_TXT =
+
+# Top level commands
+MAN8_TXT += btrfs.asciidoc
+MAN8_TXT += btrfs-convert.asciidoc
+MAN8_TXT += btrfs-debug-tree.asciidoc
+MAN8_TXT += btrfs-find-root.asciidoc
+MAN8_TXT += btrfs-image.asciidoc
+MAN8_TXT += btrfs-map-logical.asciidoc
+MAN8_TXT += btrfs-show-super.asciidoc
+MAN8_TXT += btrfs-select-super.asciidoc
+MAN8_TXT += btrfstune.asciidoc
+MAN8_TXT += fsck.btrfs.asciidoc
+MAN8_TXT += mkfs.btrfs.asciidoc
+
+# Sub commands for btrfs
+MAN8_TXT += btrfs-subvolume.asciidoc
+MAN8_TXT += btrfs-filesystem.asciidoc
+MAN8_TXT += btrfs-balance.asciidoc
+MAN8_TXT += btrfs-device.asciidoc
+MAN8_TXT += btrfs-scrub.asciidoc
+MAN8_TXT += btrfs-check.asciidoc
+MAN8_TXT += btrfs-rescue.asciidoc
+MAN8_TXT += btrfs-inspect-internal.asciidoc
+MAN8_TXT += btrfs-send.asciidoc
+MAN8_TXT += btrfs-receive.asciidoc
+MAN8_TXT += btrfs-quota.asciidoc
+MAN8_TXT += btrfs-qgroup.asciidoc
+MAN8_TXT += btrfs-replace.asciidoc
+MAN8_TXT += btrfs-restore.asciidoc
+MAN8_TXT += btrfs-property.asciidoc
+
+# Category 5 manual page
+MAN5_TXT += btrfs-man5.asciidoc
+
+MAN_TXT = $(MAN8_TXT) $(MAN5_TXT)
+MAN_XML = $(patsubst %.asciidoc,%.xml,$(MAN_TXT))
+MAN_HTML = $(patsubst %.asciidoc,%.html,$(MAN_TXT))
+
+DOC_MAN5 = $(patsubst %.asciidoc,%.5,$(MAN5_TXT))
+GZ_MAN5 = $(patsubst %.asciidoc,%.5.gz,$(MAN5_TXT))
+
+DOC_MAN8 = $(patsubst %.asciidoc,%.8,$(MAN8_TXT))
+GZ_MAN8 = $(patsubst %.asciidoc,%.8.gz,$(MAN8_TXT))
+
+mandir ?= $(prefix)/share/man
+man8dir = $(mandir)/man8
+man5dir = $(mandir)/man5
+
+ASCIIDOC = @ASCIIDOC@
+ASCIIDOC_EXTRA =
+MANPAGE_XSL = manpage-normal.xsl
+XMLTO = @XMLTO@
+XMLTO_EXTRA =
+XMLTO_EXTRA = -m manpage-bold-literal.xsl
+GZIPCMD = @GZIP@
+INSTALL = @INSTALL@
+RM = @RM@
+RMDIR = @RMDIR@
+LN_S = @LN_S@
+MV = @MV@
+SED = @SED@
+BTRFS_VERSION = $(shell $(SED) -n 's/.*PACKAGE_VERSION "\(.*\)"/\1/p'\
+ ../config.h)
+
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+ifndef V
+ QUIET_RM = @
+ QUIET_ASCIIDOC = @echo " [ASCII] $@";
+ QUIET_XMLTO = @echo " [XMLTO] $@";
+ QUIET_GZIP = @echo " [GZ] $@";
+ QUIET_STDERR = 2> /dev/null
+ QUIET_SUBDIR0 = +@subdir=
+ QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \
+ $(MAKE) $(PRINT_DIR) -C $$subdir
+ export V
+endif
+endif
+
+all: man
+man: man5 man8
+man5: $(GZ_MAN5)
+man8: $(GZ_MAN8)
+html: $(MAN_HTML)
+
+install: install-man
+
+install-man: man
+ $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir)
+ $(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
+ $(INSTALL) -m 644 $(GZ_MAN5) $(DESTDIR)$(man5dir)
+ # the source file name of btrfs.5 clashes with section 8 page, but we
+ # want to keep the code generic
+ $(MV) $(DESTDIR)$(man5dir)/btrfs-man5.5.gz $(DESTDIR)$(man5dir)/btrfs.5.gz
+ $(INSTALL) -m 644 $(GZ_MAN8) $(DESTDIR)$(man8dir)
+ $(LN_S) -f btrfs-check.8.gz $(DESTDIR)$(man8dir)/btrfsck.8.gz
+ $(LN_S) -f btrfs-rescue.8.gz $(DESTDIR)$(man8dir)/btrfs-zero-log.8.gz
+
+uninstall:
+ cd $(DESTDIR)$(man8dir); rm -f btrfs-check.8.gz $(GZ_MAN8)
+ $(RMDIR) -p --ignore-fail-on-non-empty $(DESTDIR)$(man8dir)
+
+clean:
+ $(QUIET_RM)$(RM) -f *.xml *.xml+ *.5 *.5.gz *.8 *.8.gz *.html
+
+%.5.gz : %.5
+ $(QUIET_GZIP)$(GZIPCMD) -n -c $< > $@
+
+%.8.gz : %.8
+ $(QUIET_GZIP)$(GZIPCMD) -n -c $< > $@
+
+%.5 : %.xml
+ $(QUIET_XMLTO)$(RM) -f $@ && \
+ $(XMLTO) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+
+%.8 : %.xml
+ $(QUIET_XMLTO)$(RM) -f $@ && \
+ $(XMLTO) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+
+%.xml : %.asciidoc asciidoc.conf
+ $(QUIET_ASCIIDOC)$(RM) -f $@+ $@ && \
+ $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
+ $(ASCIIDOC_EXTRA) -abtrfs_version=$(BTRFS_VERSION) \
+ -o $@+ $< && \
+ $(MV) $@+ $@
+
+%.html : %.asciidoc asciidoc.conf
+ $(QUIET_ASCIIDOC)$(RM) -f $@+ $@ && \
+ $(ASCIIDOC) -b html -d article -f asciidoc.conf \
+ $(ASCIIDOC_EXTRA) -abtrfs_version=$(BTRFS_VERSION) \
+ -o $@+ $< && \
+ $(MV) $@+ $@
diff --git a/Documentation/asciidoc.conf b/Documentation/asciidoc.conf
new file mode 100644
index 00000000..1ea74591
--- /dev/null
+++ b/Documentation/asciidoc.conf
@@ -0,0 +1,48 @@
+## linkbtrfs: macro
+#
+# Usage: linkbtrfs:command[manpage-section]
+#
+# Note, {0} is the manpage section, while {target} is the command.
+#
+# Show Btrfslink as: <command>(<section>); if section is defined, else just show
+# the command.
+
+[macros]
+(?su)[\\]?(?P<name>linkbtrfs):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
+
+[tags]
+bracket-emphasis={1?[{1}]}<emphasis>&lt;|&gt;</emphasis>
+
+[quotes]
+&lt;|&gt;=#bracket-emphasis
+
+[attributes]
+asterisk=&#42;
+plus=&#43;
+caret=&#94;
+startsb=&#91;
+endsb=&#93;
+backslash=&#92;
+tilde=&#126;
+apostrophe=&#39;
+backtick=&#96;
+litdd=&#45;&#45;
+
+ifdef::doctype-manpage[]
+ifdef::backend-docbook[]
+[header]
+template::[header-declarations]
+<refentry>
+<refmeta>
+<refentrytitle>{mantitle}</refentrytitle>
+<manvolnum>{manvolnum}</manvolnum>
+<refmiscinfo class="source">Btrfs</refmiscinfo>
+<refmiscinfo class="version">{btrfs_version}</refmiscinfo>
+<refmiscinfo class="manual">Btrfs Manual</refmiscinfo>
+</refmeta>
+<refnamediv>
+ <refname>{manname}</refname>
+ <refpurpose>{manpurpose}</refpurpose>
+</refnamediv>
+endif::backend-docbook[]
+endif::doctype-manpage[]
diff --git a/Documentation/btrfs-balance.asciidoc b/Documentation/btrfs-balance.asciidoc
new file mode 100644
index 00000000..c8407419
--- /dev/null
+++ b/Documentation/btrfs-balance.asciidoc
@@ -0,0 +1,217 @@
+btrfs-balance(8)
+================
+
+NAME
+----
+btrfs-balance - balance block groups on a btrfs filesystem
+
+SYNOPSIS
+--------
+*btrfs balance* <subcommand> <args>
+
+DESCRIPTION
+-----------
+The primary purpose of the balance feature is to spread block groups accross
+all devices so they match constraints defined by the respective profiles. See
+`mkfs.btrfs`(8) section 'PROFILES' for more details.
+The scope of the balancing process can be further tuned by use of filters that
+can select the block groups to process. Balance works only on a mounted
+filesystem.
+
+The balance operation is cancellable by the user. The on-disk state of the
+filesystem is always consistent so an unexpected interruption (eg. system crash,
+reboot) does not corrupt the filesystem. The progress of the balance operation
+is temporarily stored and will be resumed upon mount, unless the mount option
+'skip_balance' is specified.
+
+WARNING: running balance without filters will take a lot of time as it basically
+rewrites the entire filesystem and needs to update all block pointers.
+
+The filters can be used to perform following actions:
+
+- convert block group profiles (filter 'convert')
+- make block group usage more compact (filter 'usage')
+- perform actions only on a given device (filters 'devid', 'drange')
+
+The filters can be applied to a combination of block group types (data,
+metadata, system). Note that changing 'system' needs the force option.
+
+NOTE: the balance operation needs enough work space, ie. space that is
+completely unused in the filesystem, otherwise this may lead to ENOSPC reports.
+See the section 'ENOSPC' for more details.
+
+COMPATIBILITY
+-------------
+
+NOTE: The balance subcommand also exists under the *btrfs filesystem*
+namespace. This still works for backward compatibility but is deprecated and
+should not be used anymore.
+
+NOTE: A short syntax *btrfs balance <path>* works due to backward compatibility
+but is deprecated and should not be used anymore. Use *btrfs balance start*
+command instead.
+
+SUBCOMMAND
+----------
+*cancel* <path>::
+cancel running or paused balance
+
+*pause* <path>::
+pause running balance operation, this will store the state of the balance
+progress and used filters to the filesystem
+
+*resume* <path>::
+resume interrupted balance
+
+*start* [options] <path>::
+start the balance operation according to the specified filters, no filters
+will rewrite the entire filesystem. The process runs in the foreground.
++
+`Options`
++
+-d[<filters>]::::
+act on data block groups, see `FILTERS` section for details about 'filters'
+-m[<filters>]::::
+act on metadata chunks, see `FILTERS` section for details about 'filters'
+-s[<filters>]::::
+act on system chunks (requires '-f'), see `FILTERS` section for details about 'filters'.
+-v::::
+be verbose and print balance filter arguments
+-f::::
+force reducing of metadata integrity, eg. when going from 'raid1' to 'single'
+
+*status* [-v] <path>::
+Show status of running or paused balance.
++
+If '-v' option is given, output will be verbose.
+
+FILTERS
+-------
+From kernel 3.3 onwards, btrfs balance can limit its action to a subset of the
+full filesystem, and can be used to change the replication configuration (e.g.
+moving data from single to RAID1). This functionality is accessed through the
+'-d', '-m' or '-s' options to btrfs balance start, which filter on data,
+metadata and system blocks respectively.
+
+A filter has the following stucture: 'type'[='params'][,'type'=...]
+
+The available types are:
+
+*profiles=<profiles>*::
+Balances only block groups with the given profiles. Parameters
+are a list of profile names separated by "'|'" (pipe).
+
+*usage=<percent>*::
+*usage=<range>*::
+Balances only block groups with usage under the given percentage. The
+value of 0 is allowed and will clean up completely unused block groups, this
+should not require any new work space allocated. You may want to use 'usage=0'
+in case balance is returnin ENOSPC and your filesystem is not too full.
++
+The argument may be a single value or a range. The single value 'N' means 'at
+most N percent used', equivalent to '..N' range syntax. Kernels prior to 4.4
+accept only the single value format.
+The minimum range boundary is inclusive, maximum is exclusive.
+
+*devid=<id>*::
+Balances only block groups which have at least one chunk on the given
+device. To list devices with ids use *btrfs fi show*.
+
+*drange=<range>*::
+Balance only block groups which overlap with the given byte range on any
+device. Use in conjunction with 'devid' to filter on a specific device. The
+parameter is a range specified as 'start..end'.
+
+*vrange=<range>*::
+Balance only block groups which overlap with the given byte range in the
+filesystem's internal virtual address space. This is the address space that
+most reports from btrfs in the kernel log use. The parameter is a range
+specified as 'start..end'.
+
+*convert=<profile>*::
+Convert each selected block group to the given profile name identified by
+parameters.
++
+NOTE: starting with kernel 4.5, the 'data' chunks can be converted to/from the
+'DUP' profile on a single device.
+
+*limit=<number>*::
+*limit=<range>*::
+Process only given number of chunks, after all filters are applied. This can be
+used to specifically target a chunk in connection with other filters ('drange',
+'vrange') or just simply limit the amount of work done by a single balance run.
++
+The argument may be a single value or a range. The single value 'N' means 'at
+most N chunks', equivalent to '..N' range syntax. Kernels prior to 4.4 accept
+only the single value format. The range minimum and maximum are inclusive.
+
+*stripes=<range>*::
+Balance only block groups which have the given number of stripes. The parameter
+is a range specified as 'start..end'. Makes sense fo block group profiles that
+utilize striping, ie. RAID0/10/5/6. The range minimum and maximum are
+inclusive.
+
+*soft*::
+Takes no parameters. Only has meaning when converting between profiles.
+When doing convert from one profile to another and soft mode is on,
+chunks that already have the target profile are left untouched.
+This is useful e.g. when half of the filesystem was converted earlier but got
+cancelled.
++
+The soft mode switch is (like every other filter) per-type.
+For example, this means that we can convert metadata chunks the "hard" way
+while converting data chunks selectively with soft switch.
+
+Profile names, used in 'profiles' and 'convert' are one of: 'raid0', 'raid1',
+'raid10', 'raid5', 'raid6', 'dup', 'single'. The mixed data/metadata profiles
+can be converted in the same way, but it's conversion between mixed and non-mixed
+is not implemented. For the constraints of the profiles please refer to `mkfs.btrfs`(8),
+section 'PROFILES'.
+
+ENOSPC
+------
+
+The way balance operates, it usually needs to temporarily create a new block
+group and move the old data there. For that it needs work space, otherwise
+it fails for ENOSPC reasons.
+This is not the same ENOSPC as if the free space is exhausted. This refers to
+the space on the level of block groups.
+
+The free work space can be calculated from the output of the *btrfs filesystem show*
+command:
+
+------------------------------
+ Label: 'BTRFS' uuid: 8a9d72cd-ead3-469d-b371-9c7203276265
+ Total devices 2 FS bytes used 77.03GiB
+ devid 1 size 53.90GiB used 51.90GiB path /dev/sdc2
+ devid 2 size 53.90GiB used 51.90GiB path /dev/sde1
+------------------------------
+
+'size' - 'used' = 'free work space' +
+'53.90GiB' - '51.90GiB' = '2.00GiB'
+
+An example of a filter that does not require workspace is 'usage=0'. This will
+scan through all unused block groups of a given type and will reclaim the
+space. Ater that it might be possible to run other filters.
+
+**CONVERSIONS ON MULTIPLE DEVICES**
+
+Conversion to profiles based on striping (RAID0, RAID5/6) require the work
+space on each device. An interrupted balance may leave partially filled block
+groups that might consume the work space.
+
+EXIT STATUS
+-----------
+*btrfs balance* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-device`(8)
diff --git a/Documentation/btrfs-check.asciidoc b/Documentation/btrfs-check.asciidoc
new file mode 100644
index 00000000..327a45d6
--- /dev/null
+++ b/Documentation/btrfs-check.asciidoc
@@ -0,0 +1,60 @@
+btrfs-check(8)
+==============
+
+NAME
+----
+btrfs-check - check or repair an unmounted btrfs filesystem
+
+SYNOPSIS
+--------
+*btrfs check* [options] <device>
+
+DESCRIPTION
+-----------
+*btrfs check* is used to check or repair an unmounted btrfs filesystem.
+
+NOTE: Since btrfs is under development, the *btrfs check* capabilities are
+continuously enhanced. It's highly recommended to read the following btrfs
+wiki before executing *btrfs check* with '--repair' option: +
+https://btrfs.wiki.kernel.org/index.php/Btrfsck
+
+*btrfsck* is an alias of *btrfs check* command and is now deprecated.
+
+OPTIONS
+-------
+-s|--super <superblock>::
+use <superblock>th superblock copy, valid values are 0 up to 2 if the
+respective superblock offset is within the filesystem
+--repair::
+try to repair the filesystem
+--init-csum-tree::
+create a new CRC tree and recalculate all checksums
+--init-extent-tree::
+create a new extent tree
+--check-data-csum::
+verify checksums of data blocks
+-p|--progress::
+indicate progress at various checking phases
+--qgroup-report::
+verify qgroup accounting and compare against filesystem accounting
+--subvol-extents <subvolid>::
+show extent state for a subvolume
+--tree-root <bytenr>::
+use the given bytenr for the tree root
+
+EXIT STATUS
+-----------
+*btrfs check* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-scrub`(8),
+`btrfs-rescue`(8)
diff --git a/Documentation/btrfs-convert.asciidoc b/Documentation/btrfs-convert.asciidoc
new file mode 100644
index 00000000..ca3417f4
--- /dev/null
+++ b/Documentation/btrfs-convert.asciidoc
@@ -0,0 +1,98 @@
+btrfs-convert(8)
+================
+
+NAME
+----
+btrfs-convert - convert from ext2/3/4 filesystem to btrfs
+
+SYNOPSIS
+--------
+*btrfs-convert* [options] <device>
+
+DESCRIPTION
+-----------
+*btrfs-convert* is used to convert existing ext2/3/4 filesystem image to a
+btrfs filesystem in-place. The original filesystem image is accessible
+subvolume named 'ext2_saved' as file 'image'.
+
+WARNING: If you are going to perform rollback to ext2/3/4, you should not
+execute *btrfs balance* command on the converted filesystem. This will change
+the extent layout and make *btrfs-convert* unable to rollback.
+
+The conversion utilizes free space of the original filesystem. The exact
+estimate of the required space cannot be foretold. The final btrfs metadata
+might occupy several gigabytes on a hundreds-gigabyte filesystem.
+
+If you decide not to rollback anymore, it is recommended to perform a few more
+steps to transform the btrfs filesystem to a more compact layout. The
+conversion inherits the original data block fragmentation and the metadata
+blocks are bound to the original free space layout.
+
+**REMOVE THE ORIGINAL FILESYSTEM METADATA**
+
+By removing the 'ext2_saved' subvolume, all metadata of the original filesystem
+will be removed:
+
+ # btrfs subvolume delete /mnt/ext2_saved
+
+At this point it's not possible to do rollback. The filesystem is usable but may
+be impacted by the fragmentation.
+
+**MAKE FILE DATA MORE CONTIGUOUS**
+
+An optional but recommended step is to run defragmentation on the entire
+filesystem. This will attempt to make file extents more contiguous.
+
+ # btrfs filesystem defrag -v -r -f -t 32M /mnt/btrfs
+
+Verbose recursive defragmentation ('-v', '-r'), flush data per-file ('-f') with target
+extent size 32M ('-t').
+
+**ATTEMPT TO MAKE BTRFS METADATA MORE COMPACT**
+
+Optional but recommended step.
+
+The metadata block groups after conversion may be smaller than the default size
+(256MiB or 1GiB). Running a balance will attempt to merge the block groups.
+This depends on the free space layout (and fragmentation) and may fail. This is
+a soft error leaving the filesystem usable but the block group layout may
+remain unchanged.
+
+Note that balance operation takes a lot of time.
+
+ # btrfs balance start -m /mnt/btrfs
+
+OPTIONS
+-------
+-d|--no-datasum::
+disable data checksum calculations and set NODATASUM file flag, this can speed
+up the conversion
+-i|--no-xattr::
+ignore xattrs and ACLs of files
+-n|--no-inline::
+disable inlining of small files to metadata blocks, this will decrease the metadata
+consumption and may help to convert a filesystem with low free space
+-N|--nodesize <SIZE>::
+set filesystem nodesize, the tree block size in which btrfs stores its metadata.
+The default value is 16KB (16384) or the page size, whichever is bigger.
+Must be a multiple of the sectorsize, but not larger than 65536. Se
+`mkfs.btrfs`(8) for more details.
+-r|--rollback::
+rollback to the original ext2/3/4 filesystem if possible
+-l|--label <LABEL>::
+set filesystem label during conversion
+-L|--copy-label::
+use label from the converted filesystem
+-p|--progress::
+show progress of conversion, on by default
+--no-progress::
+disable detailed progress and show only the main phases of conversion
+
+EXIT STATUS
+-----------
+*btrfs-convert* will return 0 if no error happened.
+If any problems happened, 1 will be returned.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8)
diff --git a/Documentation/btrfs-debug-tree.asciidoc b/Documentation/btrfs-debug-tree.asciidoc
new file mode 100644
index 00000000..23fc1156
--- /dev/null
+++ b/Documentation/btrfs-debug-tree.asciidoc
@@ -0,0 +1,38 @@
+btrfs-debug-tree(8)
+===================
+
+NAME
+----
+btrfs-debug-tree - dump btrfs filesystem metadata into stdout
+
+SYNOPSIS
+--------
+*btrfs-debug-tree* [options] <device>
+
+DESCRIPTION
+-----------
+*btrfs-debug-tree* is used to dump the whole tree of the given device.
+
+This is maybe useful for analyzing filesystem state or inconsistence and has
+a positive educational effect on understanding the internal structure.
+<device> is the device file where the filesystem is stored.
+
+OPTIONS
+-------
+-e::
+Print detailed extents info.
+-d::
+Print info of btrfs device and root tree dirs only.
+-r::
+Print info of roots only.
+-b <block_num>::
+Print info of the specified block only.
+
+EXIT STATUS
+-----------
+*btrfs-debug-tree* will return 0 if no error happened.
+If any problems happened, 1 will be returned.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8)
diff --git a/Documentation/btrfs-device.asciidoc b/Documentation/btrfs-device.asciidoc
new file mode 100644
index 00000000..2827598a
--- /dev/null
+++ b/Documentation/btrfs-device.asciidoc
@@ -0,0 +1,145 @@
+btrfs-device(8)
+===============
+
+NAME
+----
+btrfs-device - control btrfs devices
+
+SYNOPSIS
+--------
+*btrfs device* <subcommand> <args>
+
+DESCRIPTION
+-----------
+*btrfs device* is used to control the btrfs devices, since btrfs can be used
+across several devices, *btrfs device* is used for multiple device management.
+
+DEVICE MANAGEMENT
+-----------------
+Btrfs filesystem is capable to manage multiple devices.
+
+Btrfs filesystem uses different profiles to manage different RAID level, and
+use balance to rebuild chunks, also devices can be added/removed/replace
+online.
+
+Profile::
+Btrfs filesystem uses data/metadata profiles to manage allocation/duplication
+mechanism. +
+Profiles like RAID level can be assigned to data and metadata separately.
++
+See `mkfs.btrfs`(8) for more details.
+
+RAID level::
+Btrfs filesystem supports most of the standard RAID level: 0/1/5/6/10. +
+RAID levels can be assigned at mkfs time or online.
++
+See `mkfs.btrfs`(8) for mkfs time RAID level assign and `btrfs-balance`(8) for
+online RAID level assign.
++
+NOTE: Since btrfs is under heavy development especially the RAID5/6 support,
+it is *highly* recommended to read the follow btrfs wiki page to get more
+updated details on RAID5/6: +
+https://btrfs.wiki.kernel.org/index.php/RAID56
+
+Balance::
+`btrfs-balance`(8) subcommand can be used to balance or rebuild chunks to the
+desired profile.
++
+Due to the fact that balance can rebuild/recovery chunks according to its RAID
+duplication if possible, so when using RAID1/5/6/10 with some devices failed
+and you just added a new device to btrfs using `btrfs-device`(8), you should
+run `btrfs-balance`(8) to rebuild the chunks.
++
+See `btrfs-balance`(8) for more details.
+
+Device add/remove/replace::
+Device can be added/removed using `btrfs-device`(8) subcommand and replaced
+using `btrfs-replace`(8).
++
+When device is removed or replaced, btrfs will do the chunk rebuild if needed.
++
+See `btrfs-replace`(8) man page for more details on device replace.
+
+SUBCOMMAND
+----------
+*add* [-Kf] <dev> [<dev>...] <path>::
+Add device(s) to the filesystem identified by <path>.
++
+If applicable, a whole device discard (TRIM) operation is performed.
++
+`Options`
++
+-K|--nodiscard::::
+do not perform discard by default
+-f|--force::::
+force overwrite of existing filesystem on the given disk(s)
+
+*remove* <dev> [<dev>...] <path>::
+Remove device(s) from a filesystem identified by <path>.
+
+*delete* <dev> [<dev>...] <path>::
+Alias of remove kept for backwards compatability
+
+*ready* <device>::
+Check device to see if it has all of it's devices in cache for mounting.
+
+*scan* [(--all-devices|-d)|<device> [<device>...]]::
+Scan devices for a btrfs filesystem.
++
+If one or more devices are passed, these are scanned for a btrfs filesystem.
+If no devices are passed, btrfs uses block devices containing btrfs
+filesystem as listed by blkid.
+Finally, if '--all-devices' or '-d' is passed, all the devices under /dev are
+scanned.
+
+*stats* [-z] <path>|<device>::
+Read and print the device IO stats for all mounted devices of the filesystem
+identified by <path> or for a single <device>.
++
+`Options`
++
+-z::::
+Reset stats to zero after reading them.
+
+*usage* [options] <path> [<path>...]::
+Show detailed information about internal allocations in devices.
++
+`Options`
++
+-b|--raw::::
+raw numbers in bytes, without the 'B' suffix
+-h|--human-readable::::
+print human friendly numbers, base 1024, this is the default
+-H::::
+print human friendly numbers, base 1000
+--iec::::
+select the 1024 base for the following options, according to the IEC standard
+--si::::
+select the 1000 base for the following options, according to the SI standard
+-k|--kbytes::::
+show sizes in KiB, or kB with --si
+-m|--mbytes::::
+show sizes in MiB, or MB with --si
+-g|--gbytes::::
+show sizes in GiB, or GB with --si
+-t|--tbytes::::
+show sizes in TiB, or TB with --si
+
+If conflicting options are passed, the last one takes precedence.
+
+EXIT STATUS
+-----------
+*btrfs device* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-replace`(8),
+`btrfs-balance`(8)
diff --git a/Documentation/btrfs-filesystem.asciidoc b/Documentation/btrfs-filesystem.asciidoc
new file mode 100644
index 00000000..26126175
--- /dev/null
+++ b/Documentation/btrfs-filesystem.asciidoc
@@ -0,0 +1,338 @@
+btrfs-filesystem(8)
+===================
+
+NAME
+----
+btrfs-filesystem - command group of btrfs that usually work on the whole filesystem
+
+SYNOPSIS
+--------
+*btrfs filesystem* <subcommand> <args>
+
+DESCRIPTION
+-----------
+*btrfs filesystem* is used to do the whole filesystem level tasks, including
+all the regular filesystem operations like resizing, space stats, label
+setting/getting, and defragmentation.
+
+SUBCOMMAND
+----------
+*df* [options] <path>::
+Show a terse summary information about allocation of block group types of a given
+mount point. The original purpose of this command was a debugging helper. The
+output needs to be further interpreted and is not suitable for quick overview.
++
+--
+An example with description:
+
+* device size: '1.9TiB', one device, no RAID
+* filesystem size: '1.9TiB'
+* created with: 'mkfs.btrfs -d single -m single'
+--
++
+------------------------------
+$ btrfs filesystem df /path
+Data, single: total=1.15TiB, used=1.13TiB
+System, single: total=32.00MiB, used=144.00KiB
+Metadata, single: total=12.00GiB, used=6.45GiB
+GlobalReserve, single: total=512.00MiB, used=0.00B
+------------------------------
++
+--
+* 'Data', 'System' and 'Metadata' are separeate block group types.
+'GlobalReserve' is an artificial and internal emergency space, see below.
+* 'single' -- the allocation profile, defined at mkfs time
+* 'total' -- sum of space reserved for
+all allocation profiles of the given type, ie. all Data/single. Note that it's
+not total size of filesystem.
+* 'used' -- sum of used space of the above, ie. file extents, metadata blocks
+--
++
+'GlobalReserve' is an artificial and internal emergency space. It is used eg.
+when the filesystem is full. Its 'total' size is dynamic based on the
+filesystem size, usually not larger than 512MiB, 'used' may fluctuate.
++
+The global block reserve is accounted within Metadata. In case the filesystem
+metadata are exhausted, 'GlobalReserve/total + Metadata/used = Metadata/total'.
++
+`Options`
++
+-b|--raw::::
+raw numbers in bytes, without the 'B' suffix
+-h|--human-readable::::
+print human friendly numbers, base 1024, this is the default
+-H::::
+print human friendly numbers, base 1000
+--iec::::
+select the 1024 base for the following options, according to the IEC standard
+--si::::
+select the 1000 base for the following options, according to the SI standard
+-k|--kbytes::::
+show sizes in KiB, or kB with --si
+-m|--mbytes::::
+show sizes in MiB, or MB with --si
+-g|--gbytes::::
+show sizes in GiB, or GB with --si
+-t|--tbytes::::
+show sizes in TiB, or TB with --si
++
+If conflicting options are passed, the last one takes precedence.
+
+*defragment* [options] <file>|<dir> [<file>|<dir>...]::
+Defragment file data on a mounted filesytem.
++
+If '-r' is passed, files in dir will be defragmented recursively.
+The start position and the number of bytes to defragment can be specified by
+start and len using '-s' and '-l' options below.
+Extents bigger than value given by '-t' will be skipped, otherwise this value
+is used as a target extent size, but is only advisory and may not be reached
+if the free space is too fragmented.
+Use 0 to take the kernel default, which is 256kB but may change in the future.
+You can also turn on compression in defragment operations.
++
+WARNING: Defragmenting with Linux kernel versions < 3.9 or ≥ 3.14-rc2 as well as
+with Linux stable kernel versions ≥ 3.10.31, ≥ 3.12.12 or ≥ 3.13.4 will break up
+the ref-links of COW data (for example files copied with `cp --reflink`,
+snapshots or de-duplicated data).
+This may cause considerable increase of space usage depending on the broken up
+ref-links.
++
+`Options`
++
+-v::::
+be verbose, print file names as they're submitted for defragmentation
+-c[<algo>]::::
+compress file contents while defragmenting. Optional argument selects the compression
+algorithm, 'zlib' (default) or 'lzo'. Currently it's not possible to select no
+compression. See also section 'EXAMPLES'.
+-r::::
+defragment files recursively in given directories
+-f::::
+flush data for each file before going to the next file. This will limit the amount
+of dirty data to current file, otherwise the amount cumulates from several files
+and may increase system load.
+-s <start>[kKmMgGtTpPeE]::::
+defragmentation will start from the given offset, default is beginning of a file
+-l <len>[kKmMgGtTpPeE]::::
+defragment only up to 'len' bytes, default is the file size
+-t <size>[kKmMgGtTpPeE]::::
+target extent size, do not touch extents bigger than 'size'
++
+For 'start', 'len', 'size' it is possible to append
+units designator: \'K', \'M', \'G', \'T', \'P', or \'E', which represent
+KiB, MiB, GiB, TiB, PiB, or EiB, respectively (case does not matter).
++
+NOTE: Directory arguments without '-r' do not defragment files recursively but will
+defragment certain internal trees (extent tree and the subvolume tree). This has been
+confusing and could be removed in the future.
+
+*label* [<dev>|<mountpoint>] [<newlabel>]::
+Show or update the label of a filesystem. This works on a mounted filesystem or
+a filesystem image.
++
+The 'newlabel' argument is optional. Current label is printed if the the argument
+is omitted.
++
+NOTE: the maximum allowable length shall be less than 256 chars and must not contain
+a newline. The trailing newline is stripped automatically.
+
+// Some wording are extracted by the resize2fs man page
+*resize* [<devid>:][+/-]<size>[kKmMgGtTpPeE]|[<devid>:]max <path>::
+Resize a mounted filesystem identified by 'path'. A particular device
+can be resized by specifying a 'devid'.
++
+WARNING: If 'path' is a file containing a BTRFS image then resize does not work
+as expected and does not resize the image. This would resize the underlying
+filesystem instead.
++
+The 'devid' can be found in the output of *btrfs filesystem show* and
+defaults to 1 if not specified.
+The 'size' parameter specifies the new size of the filesystem.
+If the prefix '+' or '-' is present the size is increased or decreased
+by the quantity 'size'.
+If no units are specified, bytes are assumed for 'size'.
+Optionally, the size parameter may be suffixed by one of the following
+units designators: \'K', \'M', \'G', \'T', \'P', or \'E', which represent
+KiB, MiB, GiB, TiB, PiB, or EiB, respectively (case does not matter).
++
+If 'max' is passed, the filesystem will occupy all available space on the
+device respecting 'devid' (remember, devid 1 by default).
++
+The resize command does not manipulate the size of underlying
+partition. If you wish to enlarge/reduce a filesystem, you must make sure you
+can expand the partition before enlarging the filesystem and shrink the
+partition after reducing the size of the filesystem. This can done using
+`fdisk`(8) or `parted`(8) to delete the existing partition and recreate
+it with the new desired size. When recreating the partition make sure to use
+the same starting partition offset as before.
++
+Growing is usually instant as it only updates the size. However, shrinking could
+take a long time if there are data in the device area that's beyond the new
+end. Relocation of the data takes time.
++
+See also section 'EXAMPLES'.
+
+*show* [options] [<path>|<uuid>|<device>|<label>]::
+Show the btrfs filesystem with some additional info about devices and space
+allocation.
++
+If no option none of 'path'/'uuid'/'device'/'label' is passed, information
+about all the BTRFS filesystems is shown, both mounted and unmounted.
++
+`Options`
++
+-m|--mounted::::
+probe kernel for mounted BTRFS filesystems
+-d|--all-devices::::
+scan all devices under /dev, otherwise the devices list is extracted from the
+/proc/partitions file. This is a fallback option if there's no device node
+manager (like udev) available in the system.
+--raw::::
+raw numbers in bytes, without the 'B' suffix
+--human-readable::::
+print human friendly numbers, base 1024, this is the default
+--iec::::
+select the 1024 base for the following options, according to the IEC standard
+--si::::
+select the 1000 base for the following options, according to the SI standard
+--kbytes::::
+show sizes in KiB, or kB with --si
+--mbytes::::
+show sizes in MiB, or MB with --si
+--gbytes::::
+show sizes in GiB, or GB with --si
+--tbytes::::
+show sizes in TiB, or TB with --si
+
+*sync* <path>::
+Force a sync of the filesystem at 'path'. This is done via a special ioctl and
+will also trigger cleaning of deleted subvolumes. Besides that it's equivalent
+to the `sync`(1) command.
+
+*usage* [options] <path> [<path>...]::
+Show detailed information about internal filesystem usage. This is supposed to
+replace the *btrfs filesystem df* command in the long run.
++
+The level of detail can differ if the command is run under a regular or the
+root user (due to use of restricted ioctl). For both there's a summary section
+with information about space usage:
++
+-------------------------
+$ btrfs fi usage /path
+WARNING: cannot read detailed chunk info, RAID5/6 numbers will be incorrect, run as root
+Overall:
+ Device size: 1.82TiB
+ Device allocated: 1.17TiB
+ Device unallocated: 669.99GiB
+ Device missing: 0.00B
+ Used: 1.14TiB
+ Free (estimated): 692.57GiB (min: 692.57GiB)
+ Data ratio: 1.00
+ Metadata ratio: 1.00
+ Global reserve: 512.00MiB (used: 0.00B)
+-------------------------
++
+The root user will also see stats broken down by block group types:
++
+-------------------------
+Data,single: Size:1.15TiB, Used:1.13TiB
+ /dev/sdb 1.15TiB
+
+Metadata,single: Size:12.00GiB, Used:6.45GiB
+ /dev/sdb 12.00GiB
+
+System,single: Size:32.00MiB, Used:144.00KiB
+ /dev/sdb 32.00MiB
+
+Unallocated:
+ /dev/sdb 669.99GiB
+-------------------------
++
+`Options`
++
+-b|--raw::::
+raw numbers in bytes, without the 'B' suffix
+-h|--human-readable::::
+print human friendly numbers, base 1024, this is the default
+-H::::
+print human friendly numbers, base 1000
+--iec::::
+select the 1024 base for the following options, according to the IEC standard
+--si::::
+select the 1000 base for the following options, according to the SI standard
+-k|--kbytes::::
+show sizes in KiB, or kB with --si
+-m|--mbytes::::
+show sizes in MiB, or MB with --si
+-g|--gbytes::::
+show sizes in GiB, or GB with --si
+-t|--tbytes::::
+show sizes in TiB, or TB with --si
+-T::::
+show data in tabular format
++
+If conflicting options are passed, the last one takes precedence.
+
+EXAMPLES
+--------
+
+*$ btrfs filesystem defrag -v -r dir/*
+
+Recursively defragment files under 'dir/', print files as they are processed.
+The file names will be printed in batches, similarly the amount of data triggered
+by defragmentation will be proportional to last N printed files. The system dirty
+memory throttling will slow down the defragmentation but there can still be a lot
+of IO load and the system may stall for a moment.
+
+*$ btrfs filesystem defrag -v -r -f dir/*
+
+Recusively defragment files under 'dir/', be verbose and wait until all blocks
+are flushed before processing next file. You can note slower progress of the
+output and lower IO load (proportional to currently defragmented file).
+
+*$ btrfs filesystem defrag -v -r -f -clzo dir/*
+
+Recusively defragment files under 'dir/', be verbose, wait until all blocks are
+flushed and force file compression.
+
+*$ btrfs filesystem defrag -v -r -t 64M dir/*
+
+Recusively defragment files under 'dir/', be verbose and try to merge extents
+to be about 64MiB. As stated above, the success rate depends on actual free
+space fragmentation and the final result is not guaranteed to meet the target
+even if run repeatedly.
+
+*$ btrfs filesystem resize -1G /path*
+
+*$ btrfs filesystem resize 1:-1G /path*
+
+Shrink size of the filesystem's device id 1 by 1GiB. The first syntax expects a
+device with id 1 to exist, otherwise fails. The second is equivalent and more
+explicit. For a single-device filesystem it's typically not necessary to
+specify the devid though.
+
+*$ btrfs filesystem resize max /path*
+
+*$ btrfs filesystem resize 1:max /path*
+
+Let's assume that devid 1 exists, the filesystem does not occupy the whole block
+device, eg. it has been enlarged and we wan the grow the filesystem. Simply using
+'max' as size we will achieve that.
+
+NOTE: There are two ways to minimize the filesystem on a given device. The
+*btrfs inspect-internal min-dev-size* command, or iteratively shrink in steps.
+
+EXIT STATUS
+-----------
+*btrfs filesystem* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
diff --git a/Documentation/btrfs-find-root.asciidoc b/Documentation/btrfs-find-root.asciidoc
new file mode 100644
index 00000000..e04cd3e8
--- /dev/null
+++ b/Documentation/btrfs-find-root.asciidoc
@@ -0,0 +1,35 @@
+btrfs-find-root(8)
+==================
+
+NAME
+----
+btrfs-find-root - filter to find btrfs root
+
+SYNOPSIS
+--------
+*btrfs-find-root* [options] <dev>
+
+DESCRIPTION
+-----------
+*btrfs-find-root* is used to find the satisfied root, you can filter by
+root tree's objectid, generation, level.
+
+OPTIONS
+-------
+-a::
+Search through all the metadata extents, even the root is already found.
+-g <generation>::
+Filter root tree by it's original transaction id, tree root's generation in default.
+-o <objectid>::
+Filter root tree by it's objectid,tree root's objectid in default.
+-l <level>::
+Filter root tree by B-+ tree's level, level 0 in default.
+
+EXIT STATUS
+-----------
+*btrfs-find-root* will return 0 if no error happened.
+If any problems happened, 1 will be returned.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8)
diff --git a/Documentation/btrfs-image.asciidoc b/Documentation/btrfs-image.asciidoc
new file mode 100644
index 00000000..38265171
--- /dev/null
+++ b/Documentation/btrfs-image.asciidoc
@@ -0,0 +1,66 @@
+btrfs-image(8)
+==============
+
+NAME
+----
+btrfs-image - create/restore an image of the filesystem
+
+SYNOPSIS
+--------
+*btrfs-image* [options] <source> <target>
+
+DESCRIPTION
+-----------
+*btrfs-image* is used to create an image of a btrfs filesystem.
+All data will be zeroed, but metadata and the like is preserved.
+Mainly used for debugging purposes.
+
+In the dump mode, source is the btrfs device/file and target is the output
+file (use '-' for stdout).
+
+In the restore mode (option -r), source is the dumped image and target is the btrfs device/file.
+
+
+OPTIONS
+-------
+-r::
+Restore metadump image. By default, this fixes super's chunk tree, by
+using 1 stripe pointing to primary device, so that file system can be
+restored by running tree log reply if possible. To restore without
+changing number of stripes in chunk tree check -o option.
+
+-c <value>::
+Compression level (0 ~ 9).
+
+-t <value>::
+Number of threads (1 ~ 32) to be used to process the image dump or restore.
+
+-o::
+Use the old restore method, this does not fixup the chunk tree so the restored
+file system will not be able to be mounted.
+
+-s::
+Sanitize the file names when generating the image. One -s means just
+generate random garbage, which means that the directory indexes won't match up
+since the hashes won't match with the garbage filenames. Using -ss will
+calculate a collision for the filename so that the hashes match, and if it
+can't calculate a collision then it will just generate garbage. The collision
+calculator is very time and CPU intensive so only use it if you are having
+problems with your file system tree and need to have it mostly working.
+
+-w::
+Walk all the trees manually and copy any blocks that are referenced. Use this
+option if your extent tree is corrupted to make sure that all of the metadata is
+captured.
+
+-m::
+Restore for multiple devices, more than 1 device should be provided.
+
+EXIT STATUS
+-----------
+*btrfs-image* will return 0 if no error happened.
+If any problems happened, 1 will be returned.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8)
diff --git a/Documentation/btrfs-inspect-internal.asciidoc b/Documentation/btrfs-inspect-internal.asciidoc
new file mode 100644
index 00000000..1c7c3611
--- /dev/null
+++ b/Documentation/btrfs-inspect-internal.asciidoc
@@ -0,0 +1,84 @@
+btrfs-inspect-internal(8)
+=========================
+
+NAME
+----
+btrfs-inspect-internal - query various internal information
+
+SYNOPSIS
+--------
+*btrfs inspect-internal* <subcommand> <args>
+
+DESCRIPTION
+-----------
+
+This command group provides an interface to query internal information. The
+functionality ranges from a simple UI to an ioctl or a more complex query that
+assembles the result from several internal structures. The latter usually
+requires calls to privileged ioctls.
+
+SUBCOMMAND
+----------
+*inode-resolve* [-v] <ino> <path>::
+(needs root privileges)
++
+resolve paths to all files with given inode number 'ino' in a given subvolume
+at 'path', ie. all hardlinks
++
+`Options`
++
+-v::::
+verbose mode, print count of returned paths and ioctl() return value
+
+*logical-resolve* [-Pv] [-s <bufsize>] <logical> <path>::
+(needs root privileges)
++
+resolve paths to all files at given 'logical' address in the linear filesystem space
++
+`Options`
++
+-P::::
+skip the path resolving and print the inodes instead
+-v::::
+verbose mode, print count of returned paths and all ioctl() return values
+-s <bufsize>::::
+set internal buffer for storing the file names to 'bufsize', default is 4096, maximum 64k
+
+*min-dev-size* [options] <path>::
+(needs root privileges)
++
+return the minimum size the device can be shrunk to, without performing any
+resize operation, this may be useful before executing the actual resize operation
++
+`Options`
++
+--id <id>::::
+specify the device 'id' to query, default is 1 if this option is not used
+
+*rootid* <path>::
+for a given file or directory, return the containing tree root id, for a
+subvolume itself return it's own tree id (ie. subvol id)
++
+NOTE: The result is undefined for the so-called empty subvolumes (identified by
+inode number 2), but such subvolume does not contain any files anyway
+
+*subvolid-resolve* <subvolid> <path>::
+(needs root privileges)
++
+resolve the absolute path of a the subvolume id 'subvolid'
+
+EXIT STATUS
+-----------
+*btrfs inspect-internal* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-debug-tree`(8)
diff --git a/Documentation/btrfs-man5.asciidoc b/Documentation/btrfs-man5.asciidoc
new file mode 100644
index 00000000..d4323917
--- /dev/null
+++ b/Documentation/btrfs-man5.asciidoc
@@ -0,0 +1,406 @@
+btrfs-man5(5)
+==============
+
+NAME
+----
+btrfs-man5 - topics about the BTRFS filesystem (mount options, supported file attributes and other)
+
+DESCRIPTION
+-----------
+This document describes topics related to BTRFS that are not specific to the
+tools. Currently covers:
+
+1. mount options
+
+2. file attributes
+
+MOUNT OPTIONS
+-------------
+
+This section describes mount options specific to BTRFS. For the generic mount
+options please refer to `mount`(8) manpage. The options are sorted alphabetically
+(discarding the 'no' prefix).
+
+*acl*::
+*noacl*::
+(default: on)
++
+Enable/disable support for Posix Access Control Lists (ACLs). See the
+`acl`(5) manual page for more information about ACLs.
+
+*alloc_start='bytes'*::
+(default: 1M, minimum: 1M)
++
+Debugging option to force all block allocations above a certain
+byte threshold on each block device. The value is specified in
+bytes, optionally with a K, M, or G suffix (case insensitive).
++
+This option was used for testing and has not practial use, it's slated to be
+removed in the future.
+
+*autodefrag*::
+*noautodefrag*::
+(since: 3.0, default: off)
++
+Enable automatic file defragmentation.
+When enabled, small random writes into files (in a range of tens of kilobytes,
+currently it's 64K) are detected and queued up for the defragmentation process.
+Not well suited for large database workloads.
++
+The read latency may increase due to reading the adjacent blocks that make up the
+range for defragmentation, successive write will merge the blocks in the new
+location.
++
+WARNING: Defragmenting with Linux kernel versions < 3.9 or ≥ 3.14-rc2 as
+well as with Linux stable kernel versions ≥ 3.10.31, ≥ 3.12.12 or
+≥ 3.13.4 will break up the ref-links of CoW data (for example files
+copied with `cp --reflink`, snapshots or de-duplicated data).
+This may cause considerable increase of space usage depending on the
+broken up ref-links.
+
+*barrier*::
+*nobarrier*::
+(default: on)
++
+Ensure that all IO write operations make it through the device cache and are stored
+permanently when the filesystem is at it's consistency checkpoint. This
+typically means that a flush command is sent to the device that will
+synchronize all pending data and ordinary metadata blocks, then writes the
+superblock and issues another flush.
++
+The write flushes incur a slight hit and also prevent the IO block
+scheduler to reorder requests in more effective way. Disabling barriers gets
+rid of that penalty but will most certainly lead to a corrupted filesystem in
+case of a crash or power loss. The ordinary metadata blocks could be yet
+unwrittent at the time the new superblock is stored permanently, expecting that
+the block pointers to metadata were stored permanently before.
++
+On a device with a volatile battery-backed write-back cache, the 'nobarrier'
+option will not lead to filesystem corruption as the pending blocks are
+supposed to make it to the permanent storage.
+
+*check_int*::
+*check_int_data*::
+*check_int_print_mask='value'*::
+(since: 3.0, default: off)
++
+These debugging options control the behavior of the integrity checking
+module (the BTRFS_FS_CHECK_INTEGRITY config option required). +
++
+`check_int` enables the integrity checker module, which examines all
+block write requests to ensure on-disk consistency, at a large
+memory and CPU cost. +
++
+`check_int_data` includes extent data in the integrity checks, and
+implies the check_int option. +
++
+`check_int_print_mask` takes a bitmask of BTRFSIC_PRINT_MASK_* values
+as defined in 'fs/btrfs/check-integrity.c', to control the integrity
+checker module behavior. +
++
+See comments at the top of 'fs/btrfs/check-integrity.c'
+for more info.
+
+*clear_cache*::
+Force clearing and rebuilding of the disk space cache if something
+has gone wrong. See also: 'space_cache'.
+
+*commit='seconds'*::
+(since: 3.12, default: 30)
++
+Set the interval of periodic commit. Higher
+values defer data being synced to permanent storage with obvious
+consequences when the system crashes. The upper bound is not forced,
+but a warning is printed if it's more than 300 seconds (5 minutes).
+
+*compress*::
+*compress='type'*::
+*compress-force*::
+*compress-force='type'*::
+(default: off)
++
+Control BTRFS file data compression. Type may be specified as 'zlib',
+'lzo' or 'no' (for no compression, used for remounting). If no type
+is specified, 'zlib' is used. If compress-force is specified,
+all files will be compressed, whether or not they compress well.
++
+NOTE: If compression is enabled, 'nodatacow' and 'nodatasum' are disabled.
+
+*datacow*::
+*nodatacow*::
+(default: on)
++
+Enable data copy-on-write for newly created files.
+'Nodatacow' implies 'nodatasum', and disables 'compression'. All files created
+under 'nodatacow' are also set the NOCOW file attribute (see `chattr`(1)).
+
+*datasum*::
+*nodatasum*::
+(default: on)
++
+Enable data checksumming for newly created files.
+'Datasum' implies 'datacow', ie. the normal mode of operation. All files created
+under 'nodatasum' inherit the "no checksums" property, however there's no
+corresponding file attribute (see `chattr`(1)).
+
+*degraded*::
+(default: off)
++
+Allow mounts with less devices than the raid profile constraints
+require. A read-write mount (or remount) may fail with too many devices
+missing, for example if a stripe member is completely missing from RAID0.
+
+*device='devicepath'*::
+Specify a path to a device that will be scanned for BTRFS filesystem during
+mount. This is usually done automatically by a device manager (like udev) or
+using the *btrfs device scan* command (eg. run from the initial ramdisk). In
+cases where this is not possible the 'device' mount option can help.
++
+NOTE: booting eg. a RAID1 system may fail even if all filesystem's 'device'
+paths are provided as the actual device nodes may not be discovered by the
+system at that point.
+
+*discard*::
+*nodiscard*::
+(default: off)
++
+Enable discarding of freed file blocks using TRIM operation. This is useful
+for SSD devices, thinly provisioned LUNs or virtual machine images where the
+backing device understands the operation. Depending on support of the
+underlying device, the operation may severly hurt performance in case the TRIM
+operation is synchronous (eg. with SATA devices up to revision 3.0).
++
+If discarding is not necessary to be done at the block freeing time, there's
+*fstrim* tool that lets the filesystem discard all free blocks in a batch,
+possibly not much interfering with other operations.
+
+*enospc_debug*::
+*noenospc_debug*::
+(default: off)
++
+Enable verbose output for some ENOSPC conditions. It's safe to use but can
+be noisy if the system hits reaches near-full state.
+
+*fatal_errors='action'*::
+(since: 3.4, default: bug)
++
+Action to take when encountering a fatal error.
++
+*bug*::::
+'BUG()' on a fatal error, the system will stay in the crashed state and may be
+still partially usable, but reboot is required for full operation
++
+*panic*::::
+'panic()' on a fatal error, depending on other system configuration, this may
+be followed by a reboot. Please refer to the documentation of kernel boot
+parameters, eg. 'panic', 'oops' or 'crashkernel'.
+
+*flushoncommit*::
+*noflushoncommit*::
+(default: on)
++
+This option forces any data dirtied by a write in a prior transaction to commit
+as part of the current commit. This makes the committed state a fully
+consistent view of the file system from the application's perspective (i.e., it
+includes all completed file system operations). This was previously the
+behavior only when a snapshot was created.
++
+Disabling flushing may improve performance but is not crash-safe.
+
+*fragment='type'*::
+(depends on compile-time option BTRFS_DEBUG, since: 4.4, default: off)
++
+A debugging helper to intentionally fragment given 'type' of block groups. The
+type can be 'data', 'metadata' or 'all'. This mount option should not be used
+outside of debugging environments and is not recognized if the kernel config
+option 'BTRFS_DEBUG' is not enabled.
+
+*inode_cache*::
+*noinode_cache*::
+(since: 3.0, default: off)
++
+Enable free inode number caching. Not recommended to use unless files on your
+filesystem get assigned inode numbers that are approaching 2^64^. Normally, new
+files in each subvolume get assigned incrementally (plus one from the last
+time) and are not reused. The mount option turns on caching of the existing
+inode numbers and reuse of inode numbers of deleted files.
++
+This option may slow down your system at first run, or after mounting without
+the option.
++
+NOTE: Defaults to off due to a potential overflow problem when the free space
+checksums don't fit inside a single page.
+
+*max_inline='bytes'*::
+(default: min(8192, page size) )
++
+Specify the maximum amount of space, in bytes, that can be inlined in
+a metadata B-tree leaf. The value is specified in bytes, optionally
+with a K suffix (case insensitive). In practice, this value
+is limited by the filesystem block size (named 'sectorsize' at mkfs time),
+and memory page size of the system. In case of sectorsize limit, there's
+some space unavailable due to leaf headers. For example, a 4k sectorsize, max
+inline data is ~3900 bytes.
++
+Inlining can be completely turned off specifying 0. This will increase data
+block slack if file sizes are much smaller than block size but will reduce
+metadata consumption in return.
+
+*metadata_ratio='value'*::
+(default: 0, internal logic)
++
+Specifies that 1 metadata chunk should be allocated after every 'value' data
+chunks. Default behaviour depends on internal logic, some percent of unused
+metadata space is attempted to be maintained but is not always possible if
+there's not space left for chunk allocation. The option could be useful to
+override the internal logic in favor of the metadata allocation if the expected
+workload is supposed to be metadata intense (snapshots, reflinks, xattrs,
+inlined files).
+
+*recovery*::
+(since: 3.2, default: off)
++
+Enable autorecovery attempts if a bad tree root is found at mount time.
+Currently this scans a backup list of several previous tree roots and tries to
+use the first readable. This can be used with read-only mounts as well.
+
+*rescan_uuid_tree*::
+(since: 3.12, default: off)
++
+Force check and rebuild procedure of the UUID tree. This should not
+normally be needed.
+
+*skip_balance*::
+(since: 3.3, default: off)
++
+Skip automatic resume of interrupted balance operation after mount.
+May be resumed with *btrfs balance resume* or the paused state can be removed
+by *btrfs balance cancel*.
+
+*space_cache*::
+*nospace_cache*::
+('nospace_cache' since: 3.2, default: on)
++
+Disable freespace cache loading without clearing the cache and the free space
+cache will not be used during the mount. This affects performance as searching
+for new free blocks could take longer. On the other hand, managing the space
+cache consumes some resources.
+
+*ssd*::
+*nossd*::
+*ssd_spread*::
+(default: SSD autodetected)
++
+Options to control SSD allocation schemes. By default, BTRFS will
+enable or disable SSD allocation heuristics depending on whether a
+rotational or nonrotational disk is in use. The 'ssd' and 'nossd' options
+can override this autodetection.
++
+The 'ssd_spread' mount option attempts to allocate into bigger and aligned
+chunks of unused space, and may perform better on low-end SSDs. 'ssd_spread'
+implies 'ssd', enabling all other SSD heuristics as well.
+
+*subvol='path'*::
+Mount subvolume from 'path' rather than the toplevel subvolume. The
+'path' is absolute (ie. starts at the toplevel subvolume).
+This mount option overrides the default subvolume set for the given filesystem.
+
+*subvolid='subvolid'*::
+Mount subvolume specified by a 'subvolid' number rather than the toplevel
+subvolume. You can use *btrfs subvolume list* to see subvolume ID numbers.
+This mount option overrides the default subvolume set for the given filesystem.
+
+*subvolrootid='objectid'*::
+(irrelevant since: 3.2, formally deprecated since: 3.10)
++
+A workaround option from times (pre 3.2) when it was not possible to mount a
+subvolume that did not reside directly under the toplevel subvolume.
+
+*thread_pool='number'*::
+(default: min(NRCPUS + 2, 8) )
++
+The number of worker threads to allocate. NRCPUS is number of on-line CPUs
+detected at the time of mount. Small number leads to less parallelism in
+processing data and metadata, higher numbers could lead to a performance due to
+increased locking contention, cache-line bouncing or costly data transfers
+between local CPU memories.
+
+*treelog*::
+*notreelog*::
+(default: on)
++
+Enable the tree logging used for 'fsync' and 'O_SYNC' writes. The tree log
+stores changes without the need of a full filesystem sync. The log operations
+are flushed at sync and transaction commit. If the system crashes between two
+such syncs, the pending tree log operations are replayed during mount.
++
+WARNING: currently, the tree log is replayed even with a read-only mount!
++
+The tree log could contain new files/directories, these would not exist on
+a mounted filesystm if the log is not replayed.
+
+*user_subvol_rm_allowed*::
+(default: off)
++
+Allow subvolumes to be deleted by their respective owner. Otherwise, only the
+root user can do that.
+
+FILE ATTRIBUTES
+---------------
+The btrfs filesystem supports setting the following file attributes using the
+`chattr`(1) utility:
+
+*a*::
+'append only', new writes are always written at the end of the file
+
+*A*::
+'no atime updates'
+
+*c*::
+'compress data', all data written after this attribute is set will be compressed.
+Please note that compression is also affected by the mount options or the parent
+directory attributes.
++
+When set on a directory, all newly created files will inherit this attribute.
+
+*C*::
+'no copy-on-write', file modifications are done in-place
++
+When set on a directory, all newly created files will inherit this attribute.
++
+NOTE: due to implementation limitations, this flag can be set/unset only on
+empty files.
+
+*d*::
+'no dump', makes sense with 3rd party tools like `dump`(8), on BTRFS the
+attribute can be set/unset on no other special handling is done
+
+*D*::
+'synchronous directory updates', for more details search `open`(2) for 'O_SYNC'
+and 'O_DSYNC'
+
+*i*::
+'immutable', no file data and metadata changes allowed even to the root user as
+long as this attribute is set (obviously the exception is unsetting the attribute)
+
+*S*::
+'synchronous updates', for more details search `open`(2) for 'O_SYNC' and
+'O_DSYNC'
+
+*X*::
+'no compression', permanently turn off compression on the given file, other
+compression mount options will not affect that
++
+When set on a directory, all newly created files will inherit this attribute.
+
+No other attributes are supported. For the complete list please refer to the
+`chattr`(1) manual page.
+
+SEE ALSO
+--------
+`acl`(5),
+`btrfs`(8),
+`chattr`(1),
+`fstrim`(8),
+`mkfs.btrfs`(8),
+`mount`(8)
diff --git a/Documentation/btrfs-map-logical.asciidoc b/Documentation/btrfs-map-logical.asciidoc
new file mode 100644
index 00000000..a3d110cb
--- /dev/null
+++ b/Documentation/btrfs-map-logical.asciidoc
@@ -0,0 +1,37 @@
+btrfs-map-logical(8)
+====================
+
+NAME
+----
+btrfs-map-logical - map btrfs logical extent to physical extent
+
+SYNOPSIS
+--------
+*btrfs-map-logical* <options> <device>
+
+DESCRIPTION
+-----------
+*btrfs-map-logical* can be used to find out what the physical offsets are
+on the mirrors, the result is dumped into stdout in default.
+
+Mainly used for debug purpose.
+
+OPTIONS
+-------
+-l|--logical <logical_num>::
+Logical extent to map.
+-c|--copy <copy>::
+Copy of the extent to read(usually 1 or 2).
+-o|--output <filename>::
+Output file to hold the extent.
+-b|--bytes <bytes>::
+Number of bytes to read.
+
+EXIT STATUS
+-----------
+*btrfs-map-logical* will return 0 if no error happened.
+If any problems happened, 1 will be returned.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8)
diff --git a/Documentation/btrfs-property.asciidoc b/Documentation/btrfs-property.asciidoc
new file mode 100644
index 00000000..8b9b7f03
--- /dev/null
+++ b/Documentation/btrfs-property.asciidoc
@@ -0,0 +1,73 @@
+btrfs-property(8)
+=================
+
+NAME
+----
+btrfs-property - get/set/list properties for given btrfs object.
+
+SYNOPSIS
+--------
+*btrfs property* <subcommand> <args>
+
+DESCRIPTION
+-----------
+*btrfs property* is used to get/set/list property for given btrfs object.
+See the description of *get* subcommand for more information about
+both btrfs object and property.
+
+*btrfs property* provides an unified and user-friendly method to tune different
+btrfs properties instead of using the traditional method like `chattr`(1) or
+`lsattr`(1).
+
+SUBCOMMAND
+----------
+*get* [-t <type>] <object> [<name>]::
+Gets a property from a btrfs object.
++
+A btrfs object, which is set by <object>, can be a btrfs filesystem
+itself, a btrfs subvolume, an inode(file or directory) inside btrfs,
+or a device on which a btrfs exists.
++
+The '-t <type>' option can be used to explicitly
+specify what type of object you meant. This is only needed when a
+property could be set for more then one object type.
++
+Possible types are 's[ubvol]', 'f[ilesystem]', 'i[node]' and 'd[evice]'.
++
+Set the name of property by '<name>'. If no '<name>' is specified,
+all properties for the given object are printed. '<name>' is one of
+the followings.
+
+ro::::
+read-only flag of subvolume: true or false
+label::::
+label of device
+compression::::
+compression setting for an inode: lzo, zlib, or "" (empty string)
+
+*list* [-t <type>] <object>::
+Lists available properties with their descriptions for the given object.
++
+See the description of *get* subcommand for the meaning of each option.
+
+*set* [-t <type>] <object> <name> <value>::
+Sets a property on a btrfs object.
++
+See the description of *get* subcommand for the meaning of each option.
+
+EXIT STATUS
+-----------
+*btrfs property* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`lsattr`(1),
+`chattr`(1)
diff --git a/Documentation/btrfs-qgroup.asciidoc b/Documentation/btrfs-qgroup.asciidoc
new file mode 100644
index 00000000..57cf012d
--- /dev/null
+++ b/Documentation/btrfs-qgroup.asciidoc
@@ -0,0 +1,145 @@
+btrfs-qgroup(8)
+===============
+
+NAME
+----
+btrfs-qgroup - control the quota group of a btrfs filesystem
+
+SYNOPSIS
+--------
+*btrfs qgroup* <subcommand> <args>
+
+DESCRIPTION
+-----------
+*btrfs qgroup* is used to control quota group (qgroup) of a btrfs filesystem.
+
+NOTE: To use qgroup you need to enable quota first using *btrfs quota enable*
+command.
+
+WARNING: Qgroup is not stable yet and will impact performance in current mainline
+kernel (v3.14 so far).
+
+QGROUP
+------
+Quota groups or qgroup in btrfs make a tree hierarchy, the leaf qgroups are
+attached to subvolumes. The size limits are set per qgroup and apply when any
+limit is reached in tree that contains a given subvolume.
+
+The limit sare separated between shared and exclusive and reflect the extent
+ownership. For example a fresh snapshot shares almost all the blocks with the
+original subvolume, new writes to either subvolume will raise towards the
+exclusive limit.
+
+The qgroup identifiers conform to 'level/id' where level 0 is reserved to the
+qgroups associated with subvolumes. Such qgroups are created automatically.
+
+The qgroup hierarchy is built by commands *create* and *assign*.
+
+NOTE: If the qgroup of a subvolume is destroyed, quota about the subvolume
+will not be functional until qgroup '0/<subvolume id>' is created again.
+
+SUBCOMMAND
+----------
+*assign* [options] <src> <dst> <path>::
+Assign qgroup <src> as the child qgroup of <dst> in the btrfs filesystem
+identified by <path>.
++
+`Options`
++
+--rescan::::
+Automatically schedule quota rescan if the new qgroup assignment leads to
+quota inconsistency.
+--no-rescan::::
+Explicitly ask not to do a rescan.
+
+*create* <qgroupid> <path>::
+Create a subvolume quota group.
++
+For the '0/<subvolume id>' qgroup, a qgroup can be created even before the
+subvolume created.
+
+*destroy* <qgroupid> <path>::
+Destroy a qgroup.
++
+If a qgroup is no isolated,which means it is a parent or child qgroup, it
+can't be destroyed.
+
+*limit* [options] <size>|none [<qgroupid>] <path>::
+Limit the size of a qgroup to <size> or no limit in the btrfs filesystem
+identified by <path>.
++
+If <qgroupid> is not given, qgroup of the subvolume identified by <path>
+is used if possible.
++
+`Options`
++
+-c::::
+limit amount of data after compression. This is the default, it is currently not
+possible to turn off this option.
++
+-e::::
+limit space exclusively assigned to this qgroup.
+
+*remove* <src> <dst> <path>::
+Remove the relationship between child qgroup <src> and parent qgroup <dst> in
+the btrfs filesystem identified by <path>.
+
+*show* [options] <path>::
+Show all qgroups in the btrfs filesystem identified by <path>.
++
+`Options`
++
+-p::::
+print parent qgroup id.
+-c::::
+print child qgroup id.
+-r::::
+print limit of referenced size of qgroup.
+-e::::
+print limit of exclusive size of qgroup.
+-F::::
+list all qgroups which impact the given path(include ancestral qgroups)
+-f::::
+list all qgroups which impact the given path(exclude ancestral qgroups)
+--raw::::
+raw numbers in bytes, without the 'B' suffix.
+--human-readable::::
+print human friendly numbers, base 1024, this is the default
+--iec::::
+select the 1024 base for the following options, according to the IEC standard.
+--si::::
+select the 1000 base for the following options, according to the SI standard.
+--kbytes::::
+show sizes in KiB, or kB with --si.
+--mbytes::::
+show sizes in MiB, or MB with --si.
+--gbytes::::
+show sizes in GiB, or GB with --si.
+--tbytes::::
+show sizes in TiB, or TB with --si.
+--sort=[\+/-]<attr>[,[+/-]<attr>]...::::
+list qgroups in order of <attr>.
++
+<attr> can be one or more of qgroupid,rfer,excl,max_rfer,max_excl.
++
+Prefix \'+' means ascending order and \'-' means descending order of <attr>.
+If no prefix is given, use ascending order by default.
++
+If multiple <attr>s is given, use comma to separate.
+
+EXIT STATUS
+-----------
+*btrfs qgroup* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-subvolume`(8),
+`btrfs-quota`(8),
diff --git a/Documentation/btrfs-quota.asciidoc b/Documentation/btrfs-quota.asciidoc
new file mode 100644
index 00000000..00e09c61
--- /dev/null
+++ b/Documentation/btrfs-quota.asciidoc
@@ -0,0 +1,56 @@
+btrfs-quota(8)
+==============
+
+NAME
+----
+btrfs-quota - control the quota of a btrfs filesystem
+
+SYNOPSIS
+--------
+*btrfs quota* <subcommand> <args>
+
+DESCRIPTION
+-----------
+*btrfs quota* is used to enable/disable or rescan subvolume quota of a btrfs
+filesystem.
+
+For setting quota or other quota operations on a btrfs filesystem, please see
+`btrfs-qgroup`(8) for details.
+
+WARNING: Quota and qgroup in btrfs filesystem is not stable and impacts
+performance in mainline kernel yet(v3.14 so far).
+
+SUBCOMMAND
+----------
+*disable* <path>::
+Disable subvolume quota support for a filesystem.
+
+*enable* <path>::
+Enable subvolume quota support for a filesystem.
+
+*rescan* [-s] <path>::
+Trash all qgroup numbers and scan the metadata again with the current config.
++
+`Options`
++
+-s::::
+show status of a running rescan operation.
+-w::::
+wait for rescan operation to finish(can be already in progress).
+
+EXIT STATUS
+-----------
+*btrfs quota* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-subvolume`(8),
+`btrfs-qgroup`(8)
diff --git a/Documentation/btrfs-receive.asciidoc b/Documentation/btrfs-receive.asciidoc
new file mode 100644
index 00000000..84b85c1c
--- /dev/null
+++ b/Documentation/btrfs-receive.asciidoc
@@ -0,0 +1,67 @@
+btrfs-receive(8)
+================
+
+NAME
+----
+btrfs-receive - receive subvolumes from stdin/file.
+
+SYNOPSIS
+--------
+*btrfs receive* [options] <mount>
+
+DESCRIPTION
+-----------
+Receives one or more subvolumes that were previously
+sent with *btrfs send*. The received subvolumes are stored
+into <mount>.
+
+*btrfs receive* will fail with the following case:
+
+1. a receiving subvolume already exists.
+
+2. a previously received subvolume was changed after it was received.
+
+3. default subvolume is changed or you don't mount btrfs filesystem with
+fs tree.
+
+After receiving a subvolume, it is immediately set to read only.
+
+`Options`
+
+-v::
+Enable verbose debug output. Each occurrence of this option increases the
+verbose level more.
+-f <infile>::
+By default, btrfs receive uses stdin to receive the subvolumes.
+Use this option to specify a file to use instead.
+-C|--chroot::
+Confine the process to <mount> using chroot.
+-e::
+Terminate after receiving an <end cmd> in the data stream.
+Without this option, the receiver terminates only if an error is recognized
+or on EOF.
+--max-errors <N>::
+Terminate as soon as N errors happened while processing commands from the send
+stream. Default value is 1. A value of 0 means no limit.
+-m::
+The root mount point of the destination fs.
++
+By default the mountpoint is searched in /proc/self/mounts.
+If you do not have /proc, eg. in a chroot environment, use this option to tell
+us where this filesystem is mounted.
+
+EXIT STATUS
+-----------
+*btrfs receive* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-send`(8)
diff --git a/Documentation/btrfs-replace.asciidoc b/Documentation/btrfs-replace.asciidoc
new file mode 100644
index 00000000..5a14a40a
--- /dev/null
+++ b/Documentation/btrfs-replace.asciidoc
@@ -0,0 +1,76 @@
+btrfs-replace(8)
+===============
+
+NAME
+----
+btrfs-replace - replace devices managed by btrfs with other device.
+
+SYNOPSIS
+--------
+*btrfs replace* <subcommand> <args>
+
+DESCRIPTION
+-----------
+*btrfs replace* is used to replace btrfs managed devices with other device.
+
+SUBCOMMAND
+----------
+*cancel* <mount_point>::
+Cancel a running device replace operation.
+
+*start* [-Bfr] <srcdev>|<devid> <targetdev> <path>::
+Replace device of a btrfs filesystem.
++
+On a live filesystem, duplicate the data to the target device which
+is currently stored on the source device.
+If the source device is not available anymore, or if the -r option is set,
+the data is built only using the RAID redundancy mechanisms.
+After completion of the operation, the source device is removed from the
+filesystem.
+If the <srcdev> is a numerical value, it is assumed to be the device id
+of the filesystem which is mounted at <path>, otherwise is is
+the path to the source device. If the source device is disconnected,
+from the system, you have to use the devid parameter format.
+The <targetdev> needs to be same size or larger than the <srcdev>.
++
+`Options`
++
+-r::::
+only read from <srcdev> if no other zero-defect mirror exists.
+(enable this if your drive has lots of read errors, the access would be very
+slow)
+-f::::
+force using and overwriting <targetdev> even if it looks like
+containing a valid btrfs filesystem.
++
+A valid filesystem is assumed if a btrfs superblock is found which contains a
+correct checksum. Devices which are currently mounted are
+never allowed to be used as the <targetdev>.
++
+-B::::
+no background replace.
+
+*status* [-1] <mount_point>::
+Print status and progress information of a running device replace operation.
++
+`Options`
++
+-1::::
+print once instead of print continuously until the replace
+operation finishes (or is canceled)
+
+EXIT STATUS
+-----------
+*btrfs replace* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-device`(8),
diff --git a/Documentation/btrfs-rescue.asciidoc b/Documentation/btrfs-rescue.asciidoc
new file mode 100644
index 00000000..42aca645
--- /dev/null
+++ b/Documentation/btrfs-rescue.asciidoc
@@ -0,0 +1,90 @@
+btrfs-rescue(8)
+==============
+
+NAME
+----
+btrfs-rescue - Recover a damaged btrfs filesystem
+
+SYNOPSIS
+--------
+*btrfs rescue* <subcommand> <args>
+
+DESCRIPTION
+-----------
+*btrfs rescue* is used to try to recover a damaged btrfs filesystem.
+
+SUBCOMMAND
+----------
+*chunk-recover* [options] <device>::
+Recover the chunk tree by scanning the devices
++
+`Options`
++
+-y::::
+assume an answer of 'yes' to all questions.
+-v::::
+verbose mode.
+-h::::
+help.
+
+NOTE: Since *chunk-recover* will scan the whole device, it will be *VERY* slow
+especially executed on a large device.
+
+*super-recover* [options] <device>::
+Recover bad superblocks from good copies.
++
+`Options`
++
+-y::::
+assume an answer of 'yes' to all questions.
+-v::::
+verbose mode.
+
+*zero-log* <device>::
+clear the filesystem log tree
+
+This command will clear the filesystem log tree. This may fix a specific
+set of problem when the filesystem mount fails due to the log replay. See below
+for sample stacktraces that may show up in system log.
+
+The common case where this happens has been fixed a long time ago,
+so it is unlikely that you will see this particular problem, but the utility is
+kept around.
+
+NOTE: clearing the log may lead to loss of changes that were made since the
+last transaction commit. This may be up to 30 seconds (default commit period)
+or less if the commit was implied by other filesystem activity.
+
+One can determine whether *zero-log* is needed according to the kernel
+backtrace:
+----
+? replay_one_dir_item+0xb5/0xb5 [btrfs]
+? walk_log_tree+0x9c/0x19d [btrfs]
+? btrfs_read_fs_root_no_radix+0x169/0x1a1 [btrfs]
+? btrfs_recover_log_trees+0x195/0x29c [btrfs]
+? replay_one_dir_item+0xb5/0xb5 [btrfs]
+? btree_read_extent_buffer_pages+0x76/0xbc [btrfs]
+? open_ctree+0xff6/0x132c [btrfs]
+----
+
+If the errors are like above, then *zero-log* should be used to clear
+the log and the filesystem may be mounted normally again. The keywords to look
+for are 'open_ctree' which says that it's during mount and function names
+that contain 'replay', 'recover' or 'log_tree'.
+
+EXIT STATUS
+-----------
+*btrfs rescue* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-scrub`(8),
+`btrfs-check`(8)
diff --git a/Documentation/btrfs-restore.asciidoc b/Documentation/btrfs-restore.asciidoc
new file mode 100644
index 00000000..ec3a08bc
--- /dev/null
+++ b/Documentation/btrfs-restore.asciidoc
@@ -0,0 +1,91 @@
+btrfs-restore(8)
+================
+
+NAME
+----
+btrfs-restore - try to restore files from a damaged btrfs filesystem image
+
+SYNOPSIS
+--------
+*btrfs restore* [options] <device> <path> | -l <device>
+
+DESCRIPTION
+-----------
+*btrfs restore* is used to try to salvage files from a damaged filesystem and
+restore them into <path> or just list the tree roots.
+
+Since current `btrfs-check`(8) or `btrfs-rescue`(8) only has very limited usage,
+*btrfs restore* is normally a better choice.
+
+NOTE: It is recommended to read the following btrfs wiki page if your data is
+not salvaged with default option: +
+https://btrfs.wiki.kernel.org/index.php/Restore
+
+OPTIONS
+-------
+-s|--snapshots::
+get snapshots, btrfs restore skips snapshots in default.
+
+-x|--xattr::
+get extended attributes.
+
+-m|--metadata::
+restore owner, mode and times.
+
+-S|--symlinks::
+restore symbolic links as well as normal files.
+
+-v|--verbose::
+verbose.
+
+-i|--ignore-errors::
+ignore errors.
+
+-o|--overwrite::
+overwrite directories/files in <path>.
+
+-t <bytenr>::
+use <bytenr> to read root tree.
+
+-f <bytenr>::
+only restore files that are under specified root whose root bytenr is <bytenr>.
+
+-u|--super <mirror>::
+use given superblock mirror identified by <mirror>, it can be 0,1,2.
+
+-r|--root <rootid>::
+only restore files that are under specified root whose objectid is <rootid>.
+
+-d::
+find dir.
+
+-l|--list-roots::
+list tree roots.
+
+-D|--dry-run::
+dry run (only list files that would be recovered).
+
+--path-regex <regex>::
+restore only filenames matching regex, you have to use following syntax (possibly quoted):
++
++^/(|home(|/username(|/Desktop(|/.*))))$+
+
+-c::
+ignore case (--path-regex only).
+
+EXIT STATUS
+-----------
+*btrfs restore* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-rescue`(8),
+`btrfs-check`(8)
diff --git a/Documentation/btrfs-scrub.asciidoc b/Documentation/btrfs-scrub.asciidoc
new file mode 100644
index 00000000..7750868d
--- /dev/null
+++ b/Documentation/btrfs-scrub.asciidoc
@@ -0,0 +1,94 @@
+btrfs-scrub(8)
+==============
+
+NAME
+----
+btrfs-scrub - scrub btrfs filesystem
+
+SYNOPSIS
+--------
+*btrfs scrub* <subcommand> <args>
+
+DESCRIPTION
+-----------
+*btrfs scrub* is used to scrub a btrfs filesystem, which will read all data
+from all disks and verify checksums.
+
+SUBCOMMAND
+----------
+*cancel* <path>|<device>::
+If a scrub is running on the filesystem identified by <path>, cancel it.
++
+Progress is saved in the scrub progress file and scrubbing can be resumed later
+using the scrub resume command.
+If a <device> is given, the corresponding filesystem is found and
+scrub cancel behaves as if it was called on that filesystem.
+
+*resume* [-BdqrR] [-c <ioprio_class> -n <ioprio_classdata>] <path>|<device>::
+Resume a canceled or interrupted scrub cycle on the filesystem identified by
+<path> or on a given <device>.
++
+Does not start a new scrub if the last scrub finished successfully.
++
+`Options`
++
+see *scrub start*.
+
+*start* [-BdqrRf] [-c <ioprio_class> -n <ioprio_classdata>] <path>|<device>::
+Start a scrub on all devices of the filesystem identified by <path> or on
+a single <device>. If a scrub is already running, the new one fails.
++
+Without options, scrub is started as a background process.
+Progress can be obtained with the *scrub status* command. Scrubbing
+involves reading all data from all disks and verifying checksums. Errors are
+corrected along the way if possible.
++
+The default IO priority of scrub is the idle class. The priority can be
+configured similar to the `ionice`(1) syntax using '-c' and '-n' options.
++
+`Options`
++
+-B::::
+Do not background and print scrub statistics when finished.
+-d::::
+Print separate statistics for each device of the filesystem (-B only).
+-q::::
+Quiet. Omit error messages and statistics.
+-r::::
+Read only mode. Do not attempt to correct anything.
+-R::::
+Raw print mode. Print full data instead of summary.
+-c <ioprio_class>::::
+Set IO priority class (see `ionice`(1) manpage).
+-n <ioprio_classdata>::::
+Set IO priority classdata (see `ionice`(1) manpage).
+-f::::
+Force starting new scrub even if a scrub is already running.
+This is useful when scrub stat record file is damaged.
+
+*status* [-d] <path>|<device>::
+Show status of a running scrub for the filesystem identified by <path> or
+for the specified <device>.
++
+If no scrub is running, show statistics of the last finished or canceled scrub
+for that filesystem or device.
++
+`Options`
++
+-d::::
+Print separate statistics for each device of the filesystem.
+
+EXIT STATUS
+-----------
+*btrfs scrub* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
diff --git a/Documentation/btrfs-select-super.asciidoc b/Documentation/btrfs-select-super.asciidoc
new file mode 100644
index 00000000..a8d7ef00
--- /dev/null
+++ b/Documentation/btrfs-select-super.asciidoc
@@ -0,0 +1,29 @@
+btrfs-select-super(8)
+=====================
+
+NAME
+----
+btrfs-select-super - overwrite superblock with a backup
+
+SYNOPSIS
+--------
+*btrfs-select-super* -s number dev
+
+DESCRIPTION
+-----------
+*btrfs-select-super* destructively overwrites all copies of the superblock
+with a specified copy. This helps with certain cases of damage, especially
+when barriers were disabled during a power failure. You can find a valid
+copy of the superblock with *btrfs check -s*.
+
+The filesystem specified by `dev` must not be mounted.
+
+OPTIONS
+-------
+-s|--super <superblock>::
+use <superblock>th superblock copy, valid values are 0 up to 2 if the
+respective superblock offset is within the filesystem
+
+SEE ALSO
+--------
+`btrfsck check`(8)
diff --git a/Documentation/btrfs-send.asciidoc b/Documentation/btrfs-send.asciidoc
new file mode 100644
index 00000000..e05342ff
--- /dev/null
+++ b/Documentation/btrfs-send.asciidoc
@@ -0,0 +1,63 @@
+btrfs-send(8)
+=============
+
+NAME
+----
+btrfs-send - send data of subvolume(s) to stdout/file.
+
+SYNOPSIS
+--------
+*btrfs send* [-ve] [-p <parent>] [-c <clone-src>] [-f <outfile>] <subvol> [<subvol>...]
+
+DESCRIPTION
+-----------
+Sends the subvolume(s) specified by <subvol> to stdout.
+<subvol> should be read-only here.
+
+By default, this will send the whole subvolume. To do an incremental
+send, use '-p <parent>'.
+
+If you want to allow btrfs to clone from any additional local snapshots,
+use '-c <clone-src>' (multiple times where applicable).
+
+You must not specify clone sources unless you guarantee that these snapshots
+are exactly in the same state on both sides, the sender and the receiver.
+
+It is allowed to omit the '-p <parent>' option when '-c <clone-src>' options
+are given, in which case *btrfs send* will determine a suitable parent among the
+clone sources itself.
+
+`Options`
+
+-v::
+Enable verbose debug output. Each occurrence of this option increases the
+verbose level more.
+-e::
+If sending multiple subvols at once, use the new format and omit the <end cmd> between the subvols.
+-p <parent>::
+Send an incremental stream from <parent> to <subvol>.
+-c <clone-src>::
+Use this snapshot as a clone source for an incremental send (multiple allowed).
+-f <outfile>::
+Output is normally written to stdout. To write to a file, use this option.
+An alternative would be to use pipes.
+--no-data::
+Send in NO_FILE_DATA mode. The output stream does not contain any file
+data and thus cannot be used to transfer changes. This mode is faster and
+useful to show the differences in metadata.
+
+EXIT STATUS
+-----------
+*btrfs send* returns a zero exit status if it succeeds. Non zero is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-receive`(8)
diff --git a/Documentation/btrfs-show-super.asciidoc b/Documentation/btrfs-show-super.asciidoc
new file mode 100644
index 00000000..8866c940
--- /dev/null
+++ b/Documentation/btrfs-show-super.asciidoc
@@ -0,0 +1,54 @@
+btrfs-show-super(8)
+====================
+
+NAME
+----
+btrfs-show-super - show btrfs superblock information stored in devices
+
+SYNOPSIS
+--------
+*btrfs-show-super* [options] <dev> [<dev>...]
+
+DESCRIPTION
+-----------
+*btrfs-show-super* is used to print the information of superblock,
+you can specify which mirror to print out.
+
+By default, every device's first superblock will be printed out.
+
+Mainly used for debug purpose.
+
+OPTIONS
+-------
+-f::
+Print full superblock information.
++
+Including the system chunk array and backup roots.
+
+-a::
+Print information of all superblocks.
++
+If this option is given, '-i' option will be ignored.
+
+-i <super_mirror>::
+Specify which mirror to print out.
++
+<super_mirror> is between 0 and 2.
+If several '-i <super_mirror>' are given, only the last one is valid.
+
+-F::
+Attempt to print the superblock even if no superblock magic is found. May end
+badly.
+
+-s <bytenr>::
+specifiy offset to a superblock in a non-standard location at 'bytenr', useful
+for debugging (disables the '-f' option)
+
+EXIT STATUS
+-----------
+*btrfs-show-super* will return 0 if no error happened.
+If any problems happened, 1 will be returned.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8)
diff --git a/Documentation/btrfs-subvolume.asciidoc b/Documentation/btrfs-subvolume.asciidoc
new file mode 100644
index 00000000..96cfe4ac
--- /dev/null
+++ b/Documentation/btrfs-subvolume.asciidoc
@@ -0,0 +1,182 @@
+btrfs-subvolume(8)
+==================
+
+NAME
+----
+btrfs-subvolume - control btrfs subvolume(s)
+
+SYNOPSIS
+--------
+*btrfs subvolume* <subcommand> [<args>]
+
+DESCRIPTION
+-----------
+*btrfs subvolume* is used to control the filesystem to create/delete/list/show
+subvolumes and snapshots.
+
+SUBVOLUME AND SNAPSHOT
+----------------------
+A subvolume in btrfs is not like an LVM logical volume, which is quite
+independent from each other, a btrfs subvolume has its hierarchy and relations
+between other subvolumes.
+
+A subvolume in btrfs can be accessed in two ways.
+
+1. From the parent subvolume +
+When accessing from the parent subvolume, the subvolume can be used just
+like a directory. It can have child subvolumes and its own files/directories.
+
+2. Separate mounted filesystem +
+When `mount`(8) using 'subvol' or 'subvolid' mount option, one can access
+files/directories/subvolumes inside it, but nothing in parent subvolumes.
+
+Also every btrfs filesystem has a default subvolume as its initially top-level
+subvolume, whose subvolume id is 5. (0 is also acceptable as an alias.)
+
+A btrfs snapshot is much like a subvolume, but shares its data(and metadata)
+with other subvolume/snapshot. Due to the capabilities of COW, modifications
+inside a snapshot will only show in a snapshot but not in its source subvolume.
+
+Although in btrfs, subvolumes/snapshots are treated as directories, only
+subvolume/snapshot can be the source of a snapshot, snapshot can not be made
+from normal directories.
+
+SUBCOMMAND
+-----------
+*create* [-i <qgroupid>] [<dest>]<name>::
+Create a subvolume <name> in <dest>.
++
+If <dest> is not given, subvolume <name> will be created in the currently
+directory.
++
+`Options`
++
+-i <qgroupid>::::
+Add the newly created subvolume to a qgroup. This option can be given multiple
+times.
+
+*delete* [options] <subvolume> [<subvolume>...]::
+Delete the subvolume(s) from the filesystem.
++
+If <subvolume> is not a subvolume, btrfs returns an error but continues if
+there are more arguments to process.
++
+The corresponding directory is removed instantly but the data blocks are
+removed later. The deletion does not involve full commit by default due to
+performance reasons (as a consequence, the subvolume may appear again after a
+crash). Use one of the '--commit' options to wait until the operation is safely
+stored on the media.
++
+`Options`
++
+-c|--commit-after::::
+wait for transaction commit at the end of the operation
++
+-C|--commit-each::::
+wait for transaction commit after delet each subvolume
+
+*find-new* <subvolume> <last_gen>::
+List the recently modified files in a subvolume, after <last_gen> ID.
+
+*get-default* <path>::
+Get the default subvolume of the filesystem <path>.
++
+The output format is similar to *subvolume list* command.
+
+*list* [options] [-G [\+|-]<value>] [-C [+|-]<value>] [--sort=rootid,gen,ogen,path] <path>::
+List the subvolumes present in the filesystem <path>.
++
+For every subvolume the following information is shown by default. +
+ID <ID> top level <ID> path <path> +
+where path is the relative path of the subvolume to the top level subvolume.
+The subvolume's ID may be used by the subvolume set-default command,
+or at mount time via the subvolid= option.
+If `-p` is given, then parent <ID> is added to the output between ID
+and top level. The parent's ID may be used at mount time via the
+`subvolrootid=` option.
++
+`Options`
++
+-p::::
+print parent ID.
+-a::::
+print all the subvolumes in the filesystem and distinguish between
+absolute and relative path with respect to the given <path>.
+-c::::
+print the ogeneration of the subvolume, aliases: ogen or origin generation.
+-g::::
+print the generation of the subvolume.
+-o::::
+print only subvolumes below specified <path>.
+-u::::
+print the UUID of the subvolume.
+-q::::
+print the parent uuid of subvolumes (and snapshots).
+-R::::
+print the UUID of the sent subvolume, where the subvolume is the result of a receive operation
+-t::::
+print the result as a table.
+-s::::
+only snapshot subvolumes in the filesystem will be listed.
+-r::::
+only readonly subvolumes in the filesystem will be listed.
+-G [+|-]<value>::::
+list subvolumes in the filesystem that its generation is
+>=, \<= or = value. \'\+' means >= value, \'-' means \<= value, If there is
+neither \'+' nor \'-', it means = value.
+-C [+|-]<value>::::
+list subvolumes in the filesystem that its ogeneration is
+>=, \<= or = value. The usage is the same to '-g' option.
+--sort=rootid,gen,ogen,path::::
+list subvolumes in order by specified items.
+you can add \'\+' or \'-' in front of each items, \'+' means ascending,
+\'-' means descending. The default is ascending.
++
+for --sort you can combine some items together by \',', just like
+-sort=+ogen,-gen,path,rootid.
+
+*set-default* <id> <path>::
+Set the subvolume of the filesystem <path> which is mounted as
+default.
++
+The subvolume is identified by <id>, which is returned by the *subvolume list*
+command.
+
+*show* <path>::
+Show information of a given subvolume in the <path>.
+
+*snapshot* [-r] <source> <dest>|[<dest>/]<name>::
+Create a writable/readonly snapshot of the subvolume <source> with the
+name <name> in the <dest> directory.
++
+If only <dest> is given, the subvolume will be named the basename of <source>.
+If <source> is not a subvolume, btrfs returns an error.
+If '-r' is given, the snapshot will be readonly.
+
+*sync* <path> [subvolid...]::
+Wait until given subvolume(s) are completely removed from the filesystem
+after deletion. If no subvolume id is given, wait until all current deletion
+requests are completed, but do not wait for subvolumes deleted meanwhile.
+The status of subvolume ids is checked periodically.
++
+`Options`
++
+-s <N>::::
+sleep N seconds between checks (default: 1)
+
+EXIT STATUS
+-----------
+*btrfs subvolume* returns a zero exit status if it succeeds. A non-zero value is
+returned in case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8),
+`btrfs-quota`(8),
+`btrfs-qgroup`(8),
diff --git a/Documentation/btrfs.asciidoc b/Documentation/btrfs.asciidoc
new file mode 100644
index 00000000..abf1ff89
--- /dev/null
+++ b/Documentation/btrfs.asciidoc
@@ -0,0 +1,127 @@
+btrfs(8)
+========
+
+NAME
+----
+btrfs - control a btrfs filesystem
+
+SYNOPSIS
+--------
+*btrfs* <command> [<args>]
+
+DESCRIPTION
+-----------
+The *btrfs* utility is a toolbox for managing btrfs filesystems. There are
+command groups to work with subvolumes, devices, for whole filesystem or other
+specific actions. See section *COMMANDS*.
+
+COMMAND SYTNAX
+--------------
+
+Any command name can be shortened as far as it stays unambiguous,
+however it is recommended to use full command names in scripts.
+All command groups have their manual page named *btrfs-<group>*.
+
+For example: it is possible to run *btrfs sub snaps* instead of
+*btrfs subvolume snapshot*.
+But *btrfs file s* is not allowed, because *file s* may be interpreted
+both as *filesystem show* and as *filesystem sync*.
+
+If the command name is ambiguous, the list of conflicting options is
+printed.
+
+For an overview of a given command use 'btrfs command --help'
+or 'btrfs [command...] --help --full' to print all available options.
+
+COMMANDS
+--------
+*balance*::
+ Balance btrfs filesystem chunks across single or several devices. +
+ See `btrfs-balance`(8) for details.
+
+*check*::
+ Do off-line check on a btrfs filesystem. +
+ See `btrfs-check`(8) for details.
+
+*device*::
+ Manage devices managed by btrfs, including add/delete/scan and so
+ on. +
+ See `btrfs-device`(8) for details.
+
+*filesystem*::
+ Manage a btrfs filesystem, including label setting/sync and so on. +
+ See `btrfs-filesystem`(8) for details.
+
+*inspect-internal*::
+ Debug tools for developers/hackers. +
+ See `btrfs-inspect-internal`(8) for details.
+
+*property*::
+ Get/set a property from/to a btrfs object. +
+ See `btrfs-property`(8) for details.
+
+*qgroup*::
+ Manage quota group(qgroup) for btrfs filesystem. +
+ See `btrfs-qgroup`(8) for details.
+
+*quota*::
+ Manage quota on btrfs filesystem like enabling/rescan and etc. +
+ See `btrfs-quota`(8) and `btrfs-qgroup`(8) for details.
+
+*receive*::
+ Receive subvolume data from stdin/file for restore and etc. +
+ See `btrfs-receive`(8) for details.
+
+*replace*::
+ Replace btrfs devices. +
+ See `btrfs-replace`(8) for details.
+
+*rescue*::
+ Try to rescue damaged btrfs filesystem. +
+ See `btrfs-rescue`(8) for details.
+
+*restore*::
+ Try to restore files from a damaged btrfs filesystem. +
+ See `btrfs-restore`(8) for details.
+
+*scrub*::
+ Scrub a btrfs filesystem. +
+ See `btrfs-scrub`(8) for details.
+
+*send*::
+ Send subvolume data to stdout/file for backup and etc. +
+ See `btrfs-send`(8) for details.
+
+*subvolume*::
+ Create/delete/list/manage btrfs subvolume. +
+ See `btrfs-subvolume`(8) for details.
+
+EXIT STATUS
+-----------
+*btrfs* returns a zero exit status if it succeeds. Non zero is returned in
+case of failure.
+
+AVAILABILITY
+------------
+*btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8), `ionice`(1),
+`btrfs-balance`(8),
+`btrfs-check`(8),
+`btrfs-device`(8),
+`btrfs-filesystem`(8),
+`btrfs-inspect-internal`(8),
+`btrfs-property`(8),
+`btrfs-qgroup`(8),
+`btrfs-quota`(8),
+`btrfs-receive`(8),
+`btrfs-replace`(8),
+`btrfs-rescue`(8),
+`btrfs-restore`(8),
+`btrfs-scrub`(8),
+`btrfs-send`(8),
+`btrfs-subvolume`(8),
diff --git a/Documentation/btrfstune.asciidoc b/Documentation/btrfstune.asciidoc
new file mode 100644
index 00000000..f5cf15e7
--- /dev/null
+++ b/Documentation/btrfstune.asciidoc
@@ -0,0 +1,75 @@
+btrfstune(8)
+============
+
+NAME
+----
+btrfstune - tune various filesystem parameters
+
+SYNOPSIS
+--------
+*btrfstune* [options] <dev> [<dev>...]
+
+DESCRIPTION
+-----------
+*btrfstune* can be used to enable, disable or set various filesystem
+parameters. The filesystem must be unmounted.
+
+The common usecase is to enable features that were not enabled at mkfs time.
+Please make sure that you have kernel support for the features. You can find a
+complete list of features and kernel version of their introduction at
+https://btrfs.wiki.kernel.org/index.php/Changelog#By_feature .
+
+OPTIONS
+-------
+-S <0|1>::
+Enable seeding on a given device. Value 1 will enable seeding, 0 will disable it. +
+A seeding filesystem is forced to be mounted read-only. A new device can be added
+to the filesystem and will capture all writes keeping the seeding device intact.
+-r::
+Enable extended inode refs (hardlink limit per file in a directory is 65536),
+enabled by mkfs feature 'extref'. Since kernel 3.7.
+-x::
+Enable skinny metadata extent refs (more efficient representation of extents),
+enabled by mkfs feature 'skinny-metadata'. Since kernel 3.10.
+-n::
+Enable no-holes feature (more efficient representation of file holes), enabled
+by mkfs feature 'no-holes'. Since kernel 3.14.
+-f::
+Allow dangerous changes, e.g. clear the seeding flag or change fsid. Make sure
+that you are aware of the dangers.
+-u::
+Change fsid to a randomly generated UUID or continue previous fsid change
+operation in case it was interrupted.
+-U <UUID>::
+Change fsid to 'UUID'.
++
+The 'UUID' should be a 36 bytes string in `printf`(3) format
+'"%08x-%04x-%04x-%04x-%012x"'.
+If there is a previous unfinished fsid change, it will continue only if the
+'UUID' matches the unfinished one or if you use the option '-u'.
+
+WARNING: Cancelling or interrupting a UUID change operation will make the
+filesystem temporarily unmountable. To fix it, rerun 'btrfstune -u' to restore
+the UUID and let it complete.
+
+WARNING: Clearing the seeding flag on a device may be dangerous.
+If a previously-seeding device is changed, all filesystems that used that
+device will become unmountable. Setting the seeding flag back will not fix
+that. +
+A valid usecase is 'seeding device as a base image'. Clear the seeding
+flag, update the filesystem and make it seeding again, provided that it's ok
+to throw away all filesystems built on top of the previous base.
+
+EXIT STATUS
+-----------
+*btrfstune* returns 0 if no error happened, 1 otherwise.
+
+COMPATIBILITY NOTE
+------------------
+This tool exists for historical reasons but is still in use today. The
+functionality is about to be merged to the main tool someday and *btrfstune*
+will become deprecated and removed afterwards.
+
+SEE ALSO
+--------
+`mkfs.btrfs`(8)
diff --git a/Documentation/fsck.btrfs.asciidoc b/Documentation/fsck.btrfs.asciidoc
new file mode 100644
index 00000000..0bad075b
--- /dev/null
+++ b/Documentation/fsck.btrfs.asciidoc
@@ -0,0 +1,51 @@
+fsck.btrfs(8)
+=============
+
+NAME
+----
+fsck.btrfs - do nothing, successfully
+
+SYNOPSIS
+--------
+*fsck.btrfs* [-aApy] [<device>...]
+
+DESCRIPTION
+-----------
+*fsck.btrfs* is a type of utility that should exist for any filesystem and is
+called during system setup when the corresponding `/etc/fstab` entries
+contain non-zero value for `fs_passno` , see `fstab`(5) for more.
+
+Traditional filesystems need to run their respective fsck utility in case the
+filesystem was not unmounted cleanly and the log needs to be replayed before
+mount. This is not needed for BTRFS. You should set fs_passno to 0.
+
+If you wish to check the consistency of a BTRFS filesystem or repair a damaged
+filesystem, see `btrfs-check`(8). By default the filesystem
+consistency is checked, the repair mode is enabled via '--repair' option (use
+with care!).
+
+OPTIONS
+-------
+The options are all the same and detect if *fsck.btrfs* is executed in
+non-interactive mode and exits with success,
+otherwise prints a message about btrfs check.
+
+EXIT STATUS
+-----------
+There are two possible exit code returned:
+
+0::
+No error
+
+8::
+Operational error, eg. device does not exist
+
+FILES
+-----
+`/etc/fstab`
+
+SEE ALSO
+--------
+`btrfs`(8),
+`fsck`(8),
+`fstab`(5),
diff --git a/Documentation/manpage-base.xsl b/Documentation/manpage-base.xsl
new file mode 100644
index 00000000..a264fa61
--- /dev/null
+++ b/Documentation/manpage-base.xsl
@@ -0,0 +1,35 @@
+<!-- manpage-base.xsl:
+ special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<!-- these params silence some output from xmlto -->
+<xsl:param name="man.output.quietly" select="1"/>
+<xsl:param name="refentry.meta.get.quietly" select="1"/>
+
+<!-- convert asciidoc callouts to man page format;
+ git.docbook.backslash and git.docbook.dot params
+ must be supplied by another XSL file or other means -->
+<xsl:template match="co">
+ <xsl:value-of select="concat(
+ $git.docbook.backslash,'fB(',
+ substring-after(@id,'-'),')',
+ $git.docbook.backslash,'fR')"/>
+</xsl:template>
+<xsl:template match="calloutlist">
+ <xsl:value-of select="$git.docbook.dot"/>
+ <xsl:text>sp&#10;</xsl:text>
+ <xsl:apply-templates/>
+ <xsl:text>&#10;</xsl:text>
+</xsl:template>
+<xsl:template match="callout">
+ <xsl:value-of select="concat(
+ $git.docbook.backslash,'fB',
+ substring-after(@arearefs,'-'),
+ '. ',$git.docbook.backslash,'fR')"/>
+ <xsl:apply-templates/>
+ <xsl:value-of select="$git.docbook.dot"/>
+ <xsl:text>br&#10;</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/Documentation/manpage-bold-literal.xsl b/Documentation/manpage-bold-literal.xsl
new file mode 100644
index 00000000..608eb5df
--- /dev/null
+++ b/Documentation/manpage-bold-literal.xsl
@@ -0,0 +1,17 @@
+<!-- manpage-bold-literal.xsl:
+ special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<!-- render literal text as bold (instead of plain or monospace);
+ this makes literal text easier to distinguish in manpages
+ viewed on a tty -->
+<xsl:template match="literal">
+ <xsl:value-of select="$git.docbook.backslash"/>
+ <xsl:text>fB</xsl:text>
+ <xsl:apply-templates/>
+ <xsl:value-of select="$git.docbook.backslash"/>
+ <xsl:text>fR</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/Documentation/manpage-normal.xsl b/Documentation/manpage-normal.xsl
new file mode 100644
index 00000000..a48f5b11
--- /dev/null
+++ b/Documentation/manpage-normal.xsl
@@ -0,0 +1,13 @@
+<!-- manpage-normal.xsl:
+ special settings for manpages rendered from asciidoc+docbook
+ handles anything we want to keep away from docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the normal values for the roff control characters -->
+<xsl:param name="git.docbook.backslash">\</xsl:param>
+<xsl:param name="git.docbook.dot" >.</xsl:param>
+
+</xsl:stylesheet>
diff --git a/Documentation/mkfs.btrfs.asciidoc b/Documentation/mkfs.btrfs.asciidoc
new file mode 100644
index 00000000..6a492658
--- /dev/null
+++ b/Documentation/mkfs.btrfs.asciidoc
@@ -0,0 +1,341 @@
+mkfs.btrfs(8)
+=============
+
+NAME
+----
+mkfs.btrfs - create a btrfs filesystem
+
+SYNOPSIS
+--------
+*mkfs.btrfs*
+$$[-A|--alloc-start <alloc-start>]$$
+$$[-b|--byte-count <byte-count>]$$
+$$[-d|--data <data-profile>]$$
+$$[-m|--metadata <metadata profile>]$$
+$$[-M|--mixed]$$
+$$[-l|--leafsize <leafsize>]$$
+$$[-n|--nodesize <nodesize>]$$
+$$[-s|--sectorsize <sectorsize>]$$
+$$[-L|--label <label>]$$
+$$[-K|--nodiscard]$$
+$$[-r|--rootdir <rootdir>]$$
+$$[-O|--features <feature1>[,<feature2>...]]$$
+$$[-U|--uuid <UUID>]$$
+$$[-f|--force]$$
+$$[-q|--quiet]$$
+$$[--help]$$
+$$[-V|--version]$$
+$$<device> [<device>...]$$
+
+DESCRIPTION
+-----------
+*mkfs.btrfs* is used to create the btrfs filesystem on a single or multiple
+devices. <device> is typically a block device but can be a file-backed image
+as well. Multiple devices are grouped by UUID of the filesystem.
+
+Before mounting such filesystem, the kernel module must know all the devices
+either via preceding execution of *btrfs device scan* or using the *device*
+mount option. See section *MULTIPLE DEVICES* for more details.
+
+OPTIONS
+-------
+*-A|--alloc-start <offset>*::
+(An option to help debugging chunk allocator.)
+Specify the (physical) offset from the start of the device at which allocations
+start. The default value is zero.
+
+*-b|--byte-count <size>*::
+Specify the size of the filesystem. If this option is not used,
+mkfs.btrfs uses the entire device space for the filesystem.
+
+*-d|--data <profile>*::
+Specify the profile for the data block groups. Valid values are 'raid0',
+'raid1', 'raid5', 'raid6', 'raid10' or 'single' or dup (case does not matter).
++
+See 'DUP PROFILES ON A SINGLE DEVICE' for more.
+
+*-m|--metadata <profile>*::
+Specify the profile for the metadata block groups.
+Valid values are 'raid0', 'raid1', 'raid5', 'raid6', 'raid10', 'single' or
+'dup', (case does not matter).
++
+A single device filesystem will default to 'DUP', unless a SSD is detected. Then
+it will default to 'single'. The detection is based on the value of
+`/sys/block/DEV/queue/rotational`, where 'DEV' is the short name of the device.
++
+Note that the rotational status can be arbitrarily set by the underlying block
+device driver and may not reflect the true status (network block device, memory-backed
+SCSI devices etc). Use the options '--data/--metadata' to avoid confusion.
++
+See 'DUP PROFILES ON A SINGLE DEVICE' for more details.
+
+*-M|--mixed*::
+Normally the data and metadata block groups are isolated. The 'mixed' mode
+will remove the isolation and store both types in the same block group type.
+This helps to utilize the free space regardless of the purpose and is suitable
+for small devices. The separate allocation of block groups leads to a situation
+where the space is reserved for the other block group type, is not available for
+allocation and can lead to ENOSPC state.
++
+The recommended size for the mixed mode is for filesystems less than 1GiB. The
+soft recommendation is to use it for filesystems smaller than 5GiB. The mixed
+mode may lead to degraded performance on larger filesystems, but is otherwise
+usable, even on multiple devices.
++
+The 'nodesize' and 'sectorsize' must be equal, and the block group types must
+match.
++
+NOTE: versions up to 4.2.x forced the mixed mode for devices smaller than 1GiB.
+This has been removed in 4.3+ as it caused some usability issues.
+
+*-l|--leafsize <size>*::
+Alias for --nodesize. Deprecated.
+
+*-n|--nodesize <size>*::
+Specify the nodesize, the tree block size in which btrfs stores metadata. The
+default value is 16KiB (16384) or the page size, whichever is bigger. Must be a
+multiple of the sectorsize, but not larger than 64KiB (65536). Leafsize always
+equals nodesize and the options are aliases.
++
+Smaller node size increases fragmentation but lead to higher b-trees which in
+turn leads to lower locking contention. Higher node sizes give better packing
+and less fragmentation at the cost of more expensive memory operations while
+updating the metadata blocks.
++
+NOTE: versions up to 3.11 set the nodesize to 4k.
+
+*-s|--sectorsize <size>*::
+Specify the sectorsize, the minimum data block allocation unit.
++
+The default value is the page size and is autodetected. If the sectorsize
+differs from the page size, the created filesystem may not be mountable by the
+kernel. Therefore it is not recommended to use this option unless you are going
+to mount it on a system with the appropriate page size.
+
+*-L|--label <string>*::
+Specify a label for the filesystem. The 'string' should be less than 256
+bytes and must not contain newline characters.
+
+*-K|--nodiscard*::
+Do not perform whole device TRIM operation on devices that are capable of that.
+
+*-r|--rootdir <rootdir>*::
+Populate the toplevel subvolume with files from 'rootdir'. This does not
+require root permissions and does not mount the filesystem.
+
+*-O|--features <feature1>[,<feature2>...]*::
+A list of filesystem features turned on at mkfs time. Not all features are
+supported by old kernels. To disable a feature, prefix it with '^'.
++
+See section *FILESYSTEM FEATURES* for more details. To see all available
+features that mkfs.btrfs supports run:
++
++mkfs.btrfs -O list-all+
+
+*-f|--force*::
+Forcibly overwrite the block devices when an existing filesystem is detected.
+By default, mkfs.btrfs will utilize 'libblkid' to check for any known
+filesystem on the devices. Alternatively you can use the `wipefs` utility
+to clear the devices.
+
+*-q|--quiet*::
+Print only error or warning messages. Options --features or --help are unaffected.
+
+*-U|--uuid <UUID>*::
+Create the filesystem with the given 'UUID'. The UUID must not exist on any
+filesystem currently present.
+
+*-V|--version*::
+Print the *mkfs.btrfs* version and exit.
+
+*--help*::
+Print help.
+
+SIZE UNITS
+----------
+The default unit is 'byte'. All size parameters accept suffixes in the 1024
+base. The recognized suffixes are: 'k', 'm', 'g', 't', 'p', 'e', both uppercase
+and lowercase.
+
+MULTIPLE DEVICES
+----------------
+
+Before mounting a multiple device filesystem, the kernel module must know the
+association of the block devices that are attached to the filesystem UUID.
+
+There is typically no action needed from the user. On a system that utilizes a
+udev-like daemon, any new block device is automatically registered. The rules
+call *btrfs device scan*.
+
+The same command can be used to trigger the device scanning if the btrfs kernel
+module is reloaded (naturally all previous information about the device
+registration is lost).
+
+Another possibility is to use the mount options *device* to specify the list of
+devices to scan at the time of mount.
+
+ # mount -o device=/dev/sdb,device=/dev/sdc /dev/sda /mnt
+
+NOTE: that this means only scanning, if the devices do not exist in the system,
+mount will fail anyway. This can happen on systems without initramfs/initrd and
+root partition created with RAID1/10/5/6 profiles. The mount action can happen
+before all block devices are discovered. The waiting is usually done on the
+initramfs/initrd systems.
+
+FILESYSTEM FEATURES
+-------------------
+
+*mixed-bg*::
+mixed data and metadata block groups, also set by option '--mixed'
+
+*extref*::
+(default since btrfs-progs 3.12, kernel support since 3.7)
++
+increased hardlink limit per file in a directory to 65536, older kernels
+supported a varying number of hardlinks depending on the sum of all file name
+sizes that can be stored into one metadata block
+
+*raid56*::
+extended format for RAID5/6, also enabled if raid5 or raid6 block groups
+are selected
+
+*skinny-metadata*::
+(default since btrfs-progs 3.18, kernel support since 3.10)
++
+reduced-size metadata for extent references, saves a few percent of metadata
+
+*no-holes*::
+improved representation of file extents where holes are not explicitly
+stored as an extent, saves a few percent of metadata if sparse files are used
+
+BLOCK GROUPS, CHUNKS, RAID
+--------------------------
+
+The highlevel organizational units of a filesystem are block groups of three types:
+data, metadata and system.
+
+*DATA*::
+store data blocks and nothing else
+
+*METADATA*::
+store internal metadata in b-trees, can store file data if they fit into the
+inline limit
+
+*SYSTEM*::
+store structures that describe the mapping between the physical devices and the
+linear logical space representing the filesystem
+
+Other terms commonly used:
+
+*block group*::
+*chunk*::
+a logical range of space of a given profile, stores data, metadata or both;
+sometimes the terms are used interchangably
++
+A typical size of metadata block group is 256MiB (filesystem smaller than
+50GiB) and 1GiB (larger than 50GiB), for data it's 1GiB. The system block group
+size is a few megabytes.
+
+*RAID*::
+a block group profile type that utilizes RAID-like features on multiple
+devices: striping, mirroring, parity
+
+*profile*::
+when used in connection with block groups refers to the allocation strategy
+and constraints, see the section 'PROFILES' for more details
+
+PROFILES
+--------
+
+There are the following block group types available:
+
+[ cols="^,^,^,^,^",width="60%" ]
+|=============================================================
+.2+^.<h| Profile 3+^.^h| Redundancy .2+^.<h| Min/max devices
+ ^.^h| Copies ^.^h| Parity ^.<h| Striping
+| single | 1 | | | 1/any
+| DUP | 2 / 1 device | | | 1/1 ^(see note)^
+| RAID0 | | | 1 to N | 2/any
+| RAID1 | 2 | | | 2/any
+| RAID10 | 2 | | 1 to N | 4/any
+| RAID5 | 1 | 1 | 2 to N - 1 | 2/any
+| RAID6 | 1 | 2 | 3 to N - 2 | 3/any
+|=============================================================
+
+'Note:' DUP may exist on more than 1 device if it starts on a single device and
+another one is added, but *mkfs.btrfs* will not let you create DUP on multiple
+devices.
+
+DUP PROFILES ON A SINGLE DEVICE
+-------------------------------
+
+The mkfs utility will let the user create a filesystem with profiles that write
+the logical blocks to 2 physical locations. Whether there are really 2
+physical copies highly depends on the underlying device type.
+
+For example, a SSD drive can remap the blocks internally to a single copy thus
+deduplicating them. This negates the purpose of increased redunancy and just
+wastes space.
+
+The duplicated data/metadata may still be useful to statistically improve the
+chances on a device that might perform some internal optimizations. The actual
+details are not usually disclosed by vendors. As another example, the widely
+used USB flash or SD cards use a translation layer. The data lifetime may
+be affected by frequent plugging. The memory cells could get damaged, hopefully
+not destroying both copies of particular data.
+
+The traditional rotational hard drives usually fail at the sector level.
+
+In any case, a device that starts to misbehave and repairs from the DUP copy
+should be replaced! *DUP is not backup*.
+
+KNOWN ISSUES
+------------
+
+**SMALL FILESYSTEMS AND LARGE NODESIZE**
+
+The combination of small filesystem size and large nodesize is not recommended
+in general and can lead to various ENOSPC-related issues during mount time or runtime.
+
+Since mixed block group creation is optional, we allow small
+filesystem instances with differing values for 'sectorsize' and 'nodesize'
+to be created and could end up in the following situation:
+
+ # mkfs.btrfs -f -n 65536 /dev/loop0
+ btrfs-progs v3.19-rc2-405-g976307c
+ See http://btrfs.wiki.kernel.org for more information.
+
+ Performing full device TRIM (512.00MiB) ...
+ Label: (null)
+ UUID: 49fab72e-0c8b-466b-a3ca-d1bfe56475f0
+ Node size: 65536
+ Sector size: 4096
+ Filesystem size: 512.00MiB
+ Block group profiles:
+ Data: single 8.00MiB
+ Metadata: DUP 40.00MiB
+ System: DUP 12.00MiB
+ SSD detected: no
+ Incompat features: extref, skinny-metadata
+ Number of devices: 1
+ Devices:
+ ID SIZE PATH
+ 1 512.00MiB /dev/loop0
+
+ # mount /dev/loop0 /mnt/
+ mount: mount /dev/loop0 on /mnt failed: No space left on device
+
+The ENOSPC occurs during the creation of the UUID tree. This is caused
+by large metadata blocks and space reservation strategy that allocates more
+than can fit into the filesystem.
+
+
+AVAILABILITY
+------------
+*mkfs.btrfs* is part of btrfs-progs.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+
+SEE ALSO
+--------
+`btrfs`(8), `wipefs`(8)
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 00000000..85a839f1
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,79 @@
+Installation instructions
+=========================
+
+The Btrfs utility programs require the following libraries/tools to build:
+
+- libuuid - provided by util-linux, e2fsprogs/e2fslibs or libuuid
+- libblkid - block device id library
+- liblzo2 - LZO data compression library
+- zlib - ZLIB data compression library
+
+For the btrfs-convert utility:
+
+- e2fsprogs - ext2/ext3/ext4 file system libraries, or called e2fslibs
+
+Generating documentation:
+
+- asciidoc - text document format tool
+- xmlto - text document format tool
+
+XATTR library should be provided by the standard C library or by
+
+- libattr - extended attribute library
+
+Please note that the package names may differ according to the distribution.
+See https://btrfs.wiki.kernel.org/index.php/Btrfs_source_repositories#Dependencies .
+
+
+Building from sources
+---------------------
+
+To build from git sources you need to generate the configure script using the
+autotools:
+
+ $ ./autogen.sh
+
+To build from the released tarballs:
+
+ $ ./configure
+ $ make
+ $ make install
+
+You may disable building some parts like documentation, btrfs-convert or
+backtrace support. See ./configure --help for more.
+
+Specific CFLAGS or LDFLAGS should be set like
+
+ $ CFLAGS=... LDFLAGS=... ./configure --prefix=/usr
+
+and not as arguments to make. You can specify additional flags to build via
+variables EXTRA_CFLAGS and EXTRA_LDFLAGS that get appended to the predefined
+values of the respective variables.
+
+ $ make EXTRA_CFLAGS=-ggdb3
+
+The build utilizes autotools, dependencies for generating the configure
+scripts are:
+
+* autconf, autoheader
+* automake, aclocal
+* pkg-config
+
+
+Staticly built binaries
+-----------------------
+
+The makefiles are ready to let you build static binaries of the utilities. This
+may be handy in rescue environments. Your system has to provide static version
+of the libraries.
+
+$ make static
+$ make btrfs.static
+$ make btrfs-convert.static
+
+The resulting static binaries have the '.static' suffix, the intermediate object
+files do not conflict with the normal (dynamic) build.
+
+
+References:
+* https://btrfs.wiki.kernel.org
diff --git a/Makefile.extrawarn b/Makefile.extrawarn
new file mode 100644
index 00000000..1f4bda94
--- /dev/null
+++ b/Makefile.extrawarn
@@ -0,0 +1,90 @@
+# From linux.git/scripts/Kbuild.include
+#
+# try-run
+# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
+# Exit code chooses option. "$$TMP" is can be used as temporary file and
+# is automatically cleaned up.
+try-run = $(shell set -e; \
+ TMP="$(TMPOUT).$$$$.tmp"; \
+ TMPO="$(TMPOUT).$$$$.o"; \
+ if ($(1)) >/dev/null 2>&1; \
+ then echo "$(2)"; \
+ else echo "$(3)"; \
+ fi; \
+ rm -f "$$TMP" "$$TMPO")
+
+ # cc-option
+ # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
+
+ cc-option = $(call try-run,\
+ $(CC) $(CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
+
+# From linux.git/scripts/Makefile.extrawarn
+# ==========================================================================
+#
+# make W=... settings
+#
+# W=1 - warnings that may be relevant and does not occur too often
+# W=2 - warnings that occur quite often but may still be relevant
+# W=3 - the more obscure warnings, can most likely be ignored
+#
+# $(call cc-option, -W...) handles gcc -W.. options which
+# are not supported by all versions of the compiler
+# ==========================================================================
+
+ifeq ("$(origin W)", "command line")
+ export BUILD_ENABLE_EXTRA_GCC_CHECKS := $(W)
+endif
+
+ifdef BUILD_ENABLE_EXTRA_GCC_CHECKS
+warning- := $(empty)
+
+warning-1 := -Wextra -Wunused -Wno-unused-parameter
+warning-1 += -Wmissing-declarations
+warning-1 += -Wmissing-format-attribute
+warning-1 += $(call cc-option, -Wmissing-prototypes)
+warning-1 += -Wold-style-definition
+warning-1 += $(call cc-option, -Wmissing-include-dirs)
+warning-1 += $(call cc-option, -Wunused-but-set-variable)
+warning-1 += $(call cc-disable-warning, missing-field-initializers)
+
+warning-2 := -Waggregate-return
+warning-2 += -Wcast-align
+warning-2 += -Wdisabled-optimization
+warning-2 += -Wnested-externs
+warning-2 += -Wshadow
+warning-2 += $(call cc-option, -Wlogical-op)
+warning-2 += $(call cc-option, -Wmissing-field-initializers)
+
+warning-3 := -Wbad-function-cast
+warning-3 += -Wcast-qual
+warning-3 += -Wconversion
+warning-3 += -Wpacked
+warning-3 += -Wpadded
+warning-3 += -Wpointer-arith
+warning-3 += -Wredundant-decls
+warning-3 += -Wswitch-default
+warning-3 += $(call cc-option, -Wpacked-bitfield-compat)
+warning-3 += $(call cc-option, -Wvla)
+
+warning := $(warning-$(findstring 1, $(BUILD_ENABLE_EXTRA_GCC_CHECKS)))
+warning += $(warning-$(findstring 2, $(BUILD_ENABLE_EXTRA_GCC_CHECKS)))
+warning += $(warning-$(findstring 3, $(BUILD_ENABLE_EXTRA_GCC_CHECKS)))
+
+ifeq ("$(strip $(warning))","")
+ $(error W=$(BUILD_ENABLE_EXTRA_GCC_CHECKS) is unknown)
+endif
+
+EXTRAWARN_CFLAGS += $(warning)
+else
+
+ifeq ($(COMPILER),clang)
+EXTRAWARN_CFLAGS += $(call cc-disable-warning, initializer-overrides)
+EXTRAWARN_CFLAGS += $(call cc-disable-warning, unused-value)
+EXTRAWARN_CFLAGS += $(call cc-disable-warning, format)
+EXTRAWARN_CFLAGS += $(call cc-disable-warning, unknown-warning-option)
+EXTRAWARN_CFLAGS += $(call cc-disable-warning, sign-compare)
+EXTRAWARN_CFLAGS += $(call cc-disable-warning, format-zero-length)
+EXTRAWARN_CFLAGS += $(call cc-disable-warning, uninitialized)
+endif
+endif
diff --git a/Makefile.in b/Makefile.in
new file mode 100644
index 00000000..91847896
--- /dev/null
+++ b/Makefile.in
@@ -0,0 +1,399 @@
+# btrfs-progs
+#
+# Basic build targets:
+# all all main tools
+# static build static bnaries, requires static version of the libraries
+# test run the full testsuite
+# install install to default location (/usr/local)
+# clean clean built binaries (not the documentation)
+#
+# Tuning by variables (environment or make arguments):
+# V=1 verbose, print command lines (default: quiet)
+# C=1 run checker before compilation (default checker: sparse)
+# W=123 build with warnings (default: off)
+# EXTRA_CFLAGS additional compiler flags
+# EXTRA_LDFLAGS additional linker flags
+#
+# Static checkers:
+# CHECKER static checker binary to be called (default: sparse)
+# CHECKER_FLAGS flags to pass to CHECKER, can override CFLAGS
+#
+
+# Export all variables to sub-makes by default
+export
+
+include Makefile.extrawarn
+
+CC = @CC@
+LN_S = @LN_S@
+AR = @AR@
+RM = @RM@
+RMDIR = @RMDIR@
+INSTALL = @INSTALL@
+DISABLE_DOCUMENTATION = @DISABLE_DOCUMENTATION@
+DISABLE_BTRFSCONVERT = @DISABLE_BTRFSCONVERT@
+
+EXTRA_CFLAGS :=
+EXTRA_LDFLAGS :=
+
+# Common build flags
+CFLAGS = @CFLAGS@ \
+ -include config.h \
+ -DBTRFS_FLAT_INCLUDES \
+ -D_XOPEN_SOURCE=700 \
+ -fno-strict-aliasing \
+ -fPIC $(EXTRAWARN_CFLAGS) $(EXTRA_CFLAGS)
+
+LDFLAGS = @LDFLAGS@ \
+ -rdynamic $(EXTRA_LDFLAGS)
+
+LIBS = @UUID_LIBS@ @BLKID_LIBS@ @ZLIB_LIBS@ @LZO2_LIBS@ -L. -pthread
+LIBBTRFS_LIBS = $(LIBS)
+
+# Static compilation flags
+STATIC_CFLAGS = $(CFLAGS) -ffunction-sections -fdata-sections
+STATIC_LDFLAGS = -static -Wl,--gc-sections
+STATIC_LIBS = @UUID_LIBS_STATIC@ @BLKID_LIBS_STATIC@ \
+ @ZLIB_LIBS_STATIC@ @LZO2_LIBS_STATIC@ -L. -pthread
+
+# don't use FORTIFY with sparse because glibc with FORTIFY can
+# generate so many sparse errors that sparse stops parsing,
+# which masks real errors that we want to see.
+CHECKER := sparse
+check_defs := .cc-defines.h
+CHECKER_FLAGS := -include $(check_defs) -D__CHECKER__ \
+ -D__CHECK_ENDIAN__ -Wbitwise -Wuninitialized -Wshadow -Wundef \
+ -U_FORTIFY_SOURCE
+
+objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
+ root-tree.o dir-item.o file-item.o inode-item.o inode-map.o \
+ extent-cache.o extent_io.o volumes.o utils.o repair.o \
+ qgroup.o raid6.o free-space-cache.o list_sort.o props.o \
+ ulist.o qgroup-verify.o backref.o string-table.o task-utils.o \
+ inode.o file.o find-root.o free-space-tree.o help.o
+cmds_objects = cmds-subvolume.o cmds-filesystem.o cmds-device.o cmds-scrub.o \
+ cmds-inspect.o cmds-balance.o cmds-send.o cmds-receive.o \
+ cmds-quota.o cmds-qgroup.o cmds-replace.o cmds-check.o \
+ cmds-restore.o cmds-rescue.o chunk-recover.o super-recover.o \
+ cmds-property.o cmds-fi-usage.o
+libbtrfs_objects = send-stream.o send-utils.o rbtree.o btrfs-list.o crc32c.o \
+ uuid-tree.o utils-lib.o rbtree-utils.o
+libbtrfs_headers = send-stream.h send-utils.h send.h rbtree.h btrfs-list.h \
+ crc32c.h list.h kerncompat.h radix-tree.h extent-cache.h \
+ extent_io.h ioctl.h ctree.h btrfsck.h version.h
+TESTS = fsck-tests.sh convert-tests.sh
+
+prefix ?= @prefix@
+exec_prefix = @exec_prefix@
+bindir = @bindir@
+libdir ?= @libdir@
+incdir = @includedir@/btrfs
+
+ifeq ("$(origin V)", "command line")
+ BUILD_VERBOSE = $(V)
+endif
+ifndef BUILD_VERBOSE
+ BUILD_VERBOSE = 0
+endif
+
+ifeq ($(BUILD_VERBOSE),1)
+ Q =
+else
+ Q = @
+endif
+
+MAKEOPTS = --no-print-directory Q=$(Q)
+
+# build all by default
+progs = $(progs_install) btrfsck btrfs-corrupt-block btrfs-calc-size
+
+# install only selected
+progs_install = btrfs mkfs.btrfs btrfs-debug-tree \
+ btrfs-map-logical btrfs-image btrfs-zero-log \
+ btrfs-find-root btrfstune btrfs-show-super \
+ btrfs-select-super
+
+progs_extra = btrfs-fragments
+
+progs_static = $(foreach p,$(progs),$(p).static)
+
+ifneq ($(DISABLE_BTRFSCONVERT),1)
+progs_install += btrfs-convert
+endif
+
+# external libs required by various binaries; for btrfs-foo,
+# specify btrfs_foo_libs = <list of libs>; see $($(subst...)) rules below
+btrfs_convert_libs = @EXT2FS_LIBS@ @COM_ERR_LIBS@
+btrfs_fragments_libs = -lgd -lpng -ljpeg -lfreetype
+
+SUBDIRS =
+BUILDDIRS = $(patsubst %,build-%,$(SUBDIRS))
+INSTALLDIRS = $(patsubst %,install-%,$(SUBDIRS))
+CLEANDIRS = $(patsubst %,clean-%,$(SUBDIRS))
+
+ifneq ($(DISABLE_DOCUMENTATION),1)
+BUILDDIRS += build-Documentation
+INSTALLDIRS += install-Documentation
+endif
+
+.PHONY: $(SUBDIRS)
+.PHONY: $(BUILDDIRS)
+.PHONY: $(INSTALLDIRS)
+.PHONY: $(TESTDIRS)
+.PHONY: $(CLEANDIRS)
+.PHONY: all install clean
+
+# Create all the static targets
+static_objects = $(patsubst %.o, %.static.o, $(objects))
+static_cmds_objects = $(patsubst %.o, %.static.o, $(cmds_objects))
+static_libbtrfs_objects = $(patsubst %.o, %.static.o, $(libbtrfs_objects))
+
+libs_shared = libbtrfs.so.0.1
+libs_static = libbtrfs.a
+libs = $(libs_shared) $(libs_static)
+lib_links = libbtrfs.so.0 libbtrfs.so
+headers = $(libbtrfs_headers)
+
+# make C=1 to enable sparse
+ifdef C
+ # We're trying to use sparse against glibc headers which go wild
+ # trying to use internal compiler macros to test features. We
+ # copy gcc's and give them to sparse. But not __SIZE_TYPE__
+ # 'cause sparse defines that one.
+ #
+ dummy := $(shell $(CC) -dM -E -x c - < /dev/null | \
+ grep -v __SIZE_TYPE__ > $(check_defs))
+ check = $(CHECKER)
+ check_echo = echo
+else
+ check = true
+ check_echo = true
+endif
+
+%.o.d: %.c
+ $(Q)$(CC) -MM -MG -MF $@ -MT $(@:.o.d=.o) -MT $(@:.o.d=.static.o) -MT $@ $(CFLAGS) $<
+
+.c.o:
+ @$(check_echo) " [SP] $<"
+ $(Q)$(check) $(CFLAGS) $(CHECKER_FLAGS) $<
+ @echo " [CC] $@"
+ $(Q)$(CC) $(CFLAGS) -c $<
+
+%.static.o: %.c
+ @echo " [CC] $@"
+ $(Q)$(CC) $(STATIC_CFLAGS) -c $< -o $@
+
+all: $(progs) $(BUILDDIRS)
+$(SUBDIRS): $(BUILDDIRS)
+$(BUILDDIRS):
+ @echo "Making all in $(patsubst build-%,%,$@)"
+ $(Q)$(MAKE) $(MAKEOPTS) -C $(patsubst build-%,%,$@)
+
+test-convert: btrfs btrfs-convert
+ @echo " [TEST] convert-tests.sh"
+ $(Q)bash tests/convert-tests.sh
+
+test-fsck: btrfs btrfs-image btrfs-corrupt-block btrfs-debug-tree mkfs.btrfs
+ @echo " [TEST] fsck-tests.sh"
+ $(Q)bash tests/fsck-tests.sh
+
+test-misc: btrfs btrfs-image btrfs-corrupt-block btrfs-debug-tree mkfs.btrfs btrfstune
+ @echo " [TEST] misc-tests.sh"
+ $(Q)bash tests/misc-tests.sh
+
+test-mkfs: btrfs mkfs.btrfs
+ @echo " [TEST] mkfs-tests.sh"
+ $(Q)bash tests/mkfs-tests.sh
+
+test-fuzz: btrfs
+ @echo " [TEST] fuzz-tests.sh"
+ $(Q)bash tests/fuzz-tests.sh
+
+test-clean:
+ @echo "Cleaning tests"
+ $(Q)bash tests/clean-tests.sh
+
+test: test-fsck test-mkfs test-convert test-misc test-fuzz
+
+#
+# NOTE: For static compiles, you need to have all the required libs
+# static equivalent available
+#
+static: $(progs_static)
+
+version.h: version.sh version.h.in configure.ac
+ @echo " [SH] $@"
+ $(Q)bash ./config.status --silent $@
+
+$(libs_shared): $(libbtrfs_objects) $(lib_links) send.h
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) $(libbtrfs_objects) $(LDFLAGS) $(LIBBTRFS_LIBS) \
+ -shared -Wl,-soname,libbtrfs.so.0 -o libbtrfs.so.0.1
+
+$(libs_static): $(libbtrfs_objects)
+ @echo " [AR] $@"
+ $(Q)$(AR) cr libbtrfs.a $(libbtrfs_objects)
+
+$(lib_links):
+ @echo " [LN] $@"
+ $(Q)$(LN_S) -f libbtrfs.so.0.1 $@
+
+# keep intermediate files from the below implicit rules around
+.PRECIOUS: $(addsuffix .o,$(progs))
+
+# Make any btrfs-foo out of btrfs-foo.o, with appropriate libs.
+# The $($(subst...)) bits below takes the btrfs_*_libs definitions above and
+# turns them into a list of libraries to link against if they exist
+#
+# For static variants, use an extra $(subst) to get rid of the ".static"
+# from the target name before translating to list of libs
+
+btrfs-%.static: $(static_objects) btrfs-%.static.o $(static_libbtrfs_objects)
+ @echo " [LD] $@"
+ $(Q)$(CC) $(STATIC_CFLAGS) -o $@ $@.o $(static_objects) \
+ $(static_libbtrfs_objects) $(STATIC_LDFLAGS) \
+ $($(subst -,_,$(subst .static,,$@)-libs)) $(STATIC_LIBS)
+
+btrfs-%: $(objects) $(libs_static) btrfs-%.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o $@ $(objects) $@.o $(libs_static) \
+ $(LDFLAGS) $(LIBS) $($(subst -,_,$@-libs))
+
+btrfs: $(objects) btrfs.o $(cmds_objects) $(libs_static)
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o btrfs btrfs.o $(cmds_objects) \
+ $(objects) $(libs_static) $(LDFLAGS) $(LIBS)
+
+btrfs.static: $(static_objects) btrfs.static.o $(static_cmds_objects) $(static_libbtrfs_objects)
+ @echo " [LD] $@"
+ $(Q)$(CC) $(STATIC_CFLAGS) -o btrfs.static btrfs.static.o $(static_cmds_objects) \
+ $(static_objects) $(static_libbtrfs_objects) $(STATIC_LDFLAGS) $(STATIC_LIBS)
+
+# For backward compatibility, 'btrfs' changes behaviour to fsck if it's named 'btrfsck'
+btrfsck: btrfs
+ @echo " [LN] $@"
+ $(Q)$(LN_S) -f btrfs btrfsck
+
+btrfsck.static: btrfs.static
+ @echo " [LN] $@"
+ $(Q)$(LN_S) -f $^ $@
+
+mkfs.btrfs: $(objects) $(libs_static) mkfs.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o mkfs.btrfs $(objects) $(libs_static) mkfs.o $(LDFLAGS) $(LIBS)
+
+mkfs.btrfs.static: $(static_objects) mkfs.static.o $(static_libbtrfs_objects)
+ @echo " [LD] $@"
+ $(Q)$(CC) $(STATIC_CFLAGS) -o mkfs.btrfs.static mkfs.static.o $(static_objects) \
+ $(static_libbtrfs_objects) $(STATIC_LDFLAGS) $(STATIC_LIBS)
+
+btrfstune: $(objects) $(libs_static) btrfstune.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o btrfstune $(objects) btrfstune.o $(libs_static) $(LDFLAGS) $(LIBS)
+
+btrfstune.static: $(static_objects) btrfstune.static.o $(static_libbtrfs_objects)
+ @echo " [LD] $@"
+ $(Q)$(CC) $(STATIC_CFLAGS) -o $@ btrfstune.static.o $(static_objects) \
+ $(static_libbtrfs_objects) $(STATIC_LDFLAGS) $(STATIC_LIBS)
+
+dir-test: $(objects) $(libs) dir-test.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o dir-test $(objects) $(libs) dir-test.o $(LDFLAGS) $(LIBS)
+
+quick-test: $(objects) $(libs) quick-test.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o quick-test $(objects) $(libs) quick-test.o $(LDFLAGS) $(LIBS)
+
+ioctl-test: $(objects) $(libs) ioctl-test.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o ioctl-test $(objects) $(libs) ioctl-test.o $(LDFLAGS) $(LIBS)
+
+send-test: $(objects) $(libs) send-test.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o send-test $(objects) $(libs) send-test.o $(LDFLAGS) $(LIBS)
+
+library-test: $(libs_shared) library-test.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o library-test library-test.o $(LDFLAGS) -lbtrfs
+
+library-test.static: $(libs_static) library-test.o
+ @echo " [LD] $@"
+ $(Q)$(CC) $(CFLAGS) -o library-test-static library-test.o $(LDFLAGS) $(libs_static)
+
+test-build: test-build-pre test-build-real
+
+test-build-pre:
+ $(MAKE) clean-all
+ ./autogen.sh
+ ./configure
+
+test-build-real:
+ $(MAKE) library-test
+ -$(MAKE) library-test.static
+ $(MAKE) -j 8 all
+ -$(MAKE) -j 8 static
+ $(MAKE) -j 8 $(progs_extra)
+
+manpages:
+ $(Q)$(MAKE) $(MAKEOPTS) -C Documentation
+
+
+clean-all: clean clean-doc clean-gen
+
+clean: $(CLEANDIRS)
+ @echo "Cleaning"
+ $(Q)$(RM) -f $(progs) cscope.out *.o *.o.d \
+ dir-test ioctl-test quick-test send-test library-test library-test-static \
+ btrfs.static mkfs.btrfs.static \
+ $(check_defs) \
+ $(libs) $(lib_links) \
+ $(progs_static) $(progs_extra)
+
+clean-doc:
+ @echo "Cleaning Documentation"
+ $(Q)$(MAKE) $(MAKEOPTS) -C Documentation clean
+
+clean-gen:
+ @echo "Cleaning Generated Files"
+ $(Q)$(RM) -rf version.h config.status config.cache connfig.log \
+ configure.lineno config.status.lineno Makefile \
+ Documentation/Makefile \
+ config.log config.h config.h.in~ aclocal.m4 \
+ configure autom4te.cache/ config/
+
+$(CLEANDIRS):
+ @echo "Cleaning $(patsubst clean-%,%,$@)"
+ $(Q)$(MAKE) $(MAKEOPTS) -C $(patsubst clean-%,%,$@) clean
+
+install: $(libs) $(progs_install) $(INSTALLDIRS)
+ $(INSTALL) -m755 -d $(DESTDIR)$(bindir)
+ $(INSTALL) $(progs_install) $(DESTDIR)$(bindir)
+ $(INSTALL) fsck.btrfs $(DESTDIR)$(bindir)
+ # btrfsck is a link to btrfs in the src tree, make it so for installed file as well
+ $(LN_S) -f btrfs $(DESTDIR)$(bindir)/btrfsck
+ $(INSTALL) -m755 -d $(DESTDIR)$(libdir)
+ $(INSTALL) $(libs) $(DESTDIR)$(libdir)
+ cp -a $(lib_links) $(DESTDIR)$(libdir)
+ $(INSTALL) -m755 -d $(DESTDIR)$(incdir)
+ $(INSTALL) -m644 $(headers) $(DESTDIR)$(incdir)
+
+install-static: $(progs_static) $(INSTALLDIRS)
+ $(INSTALL) -m755 -d $(DESTDIR)$(bindir)
+ $(INSTALL) $(progs_static) $(DESTDIR)$(bindir)
+ # btrfsck is a link to btrfs in the src tree, make it so for installed file as well
+ $(LN_S) -f btrfs.static $(DESTDIR)$(bindir)/btrfsck.static
+
+$(INSTALLDIRS):
+ @echo "Making install in $(patsubst install-%,%,$@)"
+ $(Q)$(MAKE) $(MAKEOPTS) -C $(patsubst install-%,%,$@) install
+
+uninstall:
+ $(Q)$(MAKE) $(MAKEOPTS) -C Documentation uninstall
+ cd $(DESTDIR)$(incdir); $(RM) -f $(headers)
+ $(RMDIR) -p --ignore-fail-on-non-empty $(DESTDIR)$(incdir)
+ cd $(DESTDIR)$(libdir); $(RM) -f $(lib_links) $(libs)
+ cd $(DESTDIR)$(bindir); $(RM) -f btrfsck fsck.btrfs $(progs_install)
+
+ifneq ($(MAKECMDGOALS),clean)
+-include $(objects:.o=.o.d) $(cmds_objects:.o=.o.d) $(subst .btrfs,, $(filter-out btrfsck.o.d, $(progs:=.o.d)))
+endif
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..2f9d4e7e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,41 @@
+Btrfs-progs
+===========
+
+Userspace utilities to manage btrfs filesystems.
+License: GPLv2.
+
+Btrfs is a copy on write (COW) filesystem for Linux aimed at implementing
+advanced features while focusing on fault tolerance, repair and easy
+administration.
+
+
+This repository hosts following utilities:
+
+* **btrfs** &mdash; the main administration tool ([manual page](https://btrfs.wiki.kernel.org/index.php/Manpage/btrfs))
+* **mkfs.btrfs** &mdash; utility to create the filesystem ([manual page](https://btrfs.wiki.kernel.org/index.php/Manpage/mkfs.btrfs))
+
+See INSTALL for build instructions.
+
+Release cycle
+-------------
+
+The major version releases are time-based and follow the cycle of the linux
+kernel releases. The cycle usually takes 2 months. A minor version releases may
+happen in the meantime if there are queued bug fixes or minor useful
+improvements.
+
+Development
+-----------
+
+The patch submissions, development or general discussions take place at
+*linux-btrfs@vger.kernel.org* mailinglist, subsciption not required.
+
+References
+----------
+
+* [Wiki with more information](https://btrfs.wiki.kernel.org)
+* [Btrfs-progs changelogs](https://btrfs.wiki.kernel.org/index.php/Changelog#By_version_.28btrfs-progs.29)
+* [wiki/FAQ](https://btrfs.wiki.kernel.org/index.php/FAQ)
+* [wiki/Getting started](https://btrfs.wiki.kernel.org/index.php/Getting_started)
+* [wiki/TODO](https://btrfs.wiki.kernel.org/index.php/Project_ideas#Userspace_tools_projects)
+* [wiki/Developer's FAQ](https://btrfs.wiki.kernel.org/index.php/Developer's_FAQ)
diff --git a/androidcompat.h b/androidcompat.h
new file mode 100644
index 00000000..eec76dad
--- /dev/null
+++ b/androidcompat.h
@@ -0,0 +1,28 @@
+/*
+ * Compatibility layer for Android.
+ *
+ * Stub calls or alternate functions for pthreads.
+ */
+
+#ifndef __ANDROID_H__
+#define __ANDROID_H__
+
+#ifdef ANDROID
+
+#define pthread_setcanceltype(type, oldtype) (0)
+#define pthread_setcancelstate(state, oldstate) (0)
+
+#define pthread_cancel(ret) pthread_kill((ret), SIGUSR1)
+
+typedef struct blkid_struct_probe *blkid_probe;
+
+#include <dirent.h>
+#define direct dirent
+
+#else /* !ANDROID */
+
+#include <sys/dir.h>
+
+#endif /* !ANDROID */
+
+#endif /* __ANDROID_H__ */
diff --git a/autogen.sh b/autogen.sh
new file mode 100755
index 00000000..96698502
--- /dev/null
+++ b/autogen.sh
@@ -0,0 +1,99 @@
+#!/bin/sh
+
+#
+# Helps generate autoconf stuff, when code is checked out from SCM.
+#
+# Copyright (C) 2006-2014 - Karel Zak <kzak@redhat.com>
+#
+
+srcdir=`dirname $0`
+test -z "$srcdir" && srcdir=.
+
+THEDIR=`pwd`
+cd $srcdir
+DIE=0
+
+test -f btrfs.c || {
+ echo
+ echo "You must run this script in the top-level btrfs-progs directory"
+ echo
+ DIE=1
+}
+
+(autoconf --version) < /dev/null > /dev/null 2>&1 || {
+ echo
+ echo "You must have autoconf installed to generate btrfs-progs build system."
+ echo
+ DIE=1
+}
+(autoheader --version) < /dev/null > /dev/null 2>&1 || {
+ echo
+ echo "You must have autoheader installed to generate btrfs-progs build system."
+ echo "The autoheader command is part of the GNU autoconf package."
+ echo
+ DIE=1
+}
+
+(automake --version) < /dev/null > /dev/null 2>&1 || {
+ echo
+ echo "You must have automake installed to generate btrfs-progs build system."
+ echo
+ DIE=1
+}
+
+(pkg-config --version) < /dev/null > /dev/null 2>&1 || {
+ echo
+ echo "You must have pkg-config installed to use btrfs-progs build system."
+ echo "The pkg-config utility was not found in the standard location, set"
+ echo "the PKG_CONFIG/PKG_CONFIG_PATH/PKG_CONFIG_LIBDIR variables at the"
+ echo "configure time."
+ echo
+}
+
+if test "$DIE" -eq 1; then
+ exit 1
+fi
+
+echo
+echo "Generate build-system by:"
+echo " aclocal: $(aclocal --version | head -1)"
+echo " autoconf: $(autoconf --version | head -1)"
+echo " autoheader: $(autoheader --version | head -1)"
+echo " automake: $(automake --version | head -1)"
+
+chmod +x version.sh
+rm -rf autom4te.cache
+
+aclocal $AL_OPTS
+autoconf $AC_OPTS
+autoheader $AH_OPTS
+
+# it's better to use helper files from automake installation than
+# maintain copies in git tree
+find_autofile() {
+ if [ -f "$1" ]; then
+ return
+ fi
+ for HELPER_DIR in $(automake --print-libdir 2>/dev/null) \
+ /usr/share/libtool \
+ /usr/share/automake-* ; do
+ f="$HELPER_DIR/$1"
+ if [ -f "$f" ]; then
+ cp "$f" config/
+ return
+ fi
+ done
+ echo "Cannot find "$1" in known locations"
+ exit 1
+}
+
+mkdir -p config/
+find_autofile config.guess
+find_autofile config.sub
+find_autofile install-sh
+
+cd $THEDIR
+
+echo
+echo "Now type '$srcdir/configure' and 'make' to compile."
+echo
diff --git a/backref.c b/backref.c
new file mode 100644
index 00000000..8f41f829
--- /dev/null
+++ b/backref.c
@@ -0,0 +1,1651 @@
+/*
+ * Copyright (C) 2011 STRATO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "backref.h"
+#include "ulist.h"
+#include "transaction.h"
+
+#define pr_debug(...) do { } while (0)
+
+struct extent_inode_elem {
+ u64 inum;
+ u64 offset;
+ struct extent_inode_elem *next;
+};
+
+static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb,
+ struct btrfs_file_extent_item *fi,
+ u64 extent_item_pos,
+ struct extent_inode_elem **eie)
+{
+ u64 offset = 0;
+ struct extent_inode_elem *e;
+
+ if (!btrfs_file_extent_compression(eb, fi) &&
+ !btrfs_file_extent_encryption(eb, fi) &&
+ !btrfs_file_extent_other_encoding(eb, fi)) {
+ u64 data_offset;
+ u64 data_len;
+
+ data_offset = btrfs_file_extent_offset(eb, fi);
+ data_len = btrfs_file_extent_num_bytes(eb, fi);
+
+ if (extent_item_pos < data_offset ||
+ extent_item_pos >= data_offset + data_len)
+ return 1;
+ offset = extent_item_pos - data_offset;
+ }
+
+ e = kmalloc(sizeof(*e), GFP_NOFS);
+ if (!e)
+ return -ENOMEM;
+
+ e->next = *eie;
+ e->inum = key->objectid;
+ e->offset = key->offset + offset;
+ *eie = e;
+
+ return 0;
+}
+
+static void free_inode_elem_list(struct extent_inode_elem *eie)
+{
+ struct extent_inode_elem *eie_next;
+
+ for (; eie; eie = eie_next) {
+ eie_next = eie->next;
+ kfree(eie);
+ }
+}
+
+static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte,
+ u64 extent_item_pos,
+ struct extent_inode_elem **eie)
+{
+ u64 disk_byte;
+ struct btrfs_key key;
+ struct btrfs_file_extent_item *fi;
+ int slot;
+ int nritems;
+ int extent_type;
+ int ret;
+
+ /*
+ * from the shared data ref, we only have the leaf but we need
+ * the key. thus, we must look into all items and see that we
+ * find one (some) with a reference to our extent item.
+ */
+ nritems = btrfs_header_nritems(eb);
+ for (slot = 0; slot < nritems; ++slot) {
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ continue;
+ fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
+ extent_type = btrfs_file_extent_type(eb, fi);
+ if (extent_type == BTRFS_FILE_EXTENT_INLINE)
+ continue;
+ /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */
+ disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
+ if (disk_byte != wanted_disk_byte)
+ continue;
+
+ ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * this structure records all encountered refs on the way up to the root
+ */
+struct __prelim_ref {
+ struct list_head list;
+ u64 root_id;
+ struct btrfs_key key_for_search;
+ int level;
+ int count;
+ struct extent_inode_elem *inode_list;
+ u64 parent;
+ u64 wanted_disk_byte;
+};
+
+/*
+ * the rules for all callers of this function are:
+ * - obtaining the parent is the goal
+ * - if you add a key, you must know that it is a correct key
+ * - if you cannot add the parent or a correct key, then we will look into the
+ * block later to set a correct key
+ *
+ * delayed refs
+ * ============
+ * backref type | shared | indirect | shared | indirect
+ * information | tree | tree | data | data
+ * --------------------+--------+----------+--------+----------
+ * parent logical | y | - | - | -
+ * key to resolve | - | y | y | y
+ * tree block logical | - | - | - | -
+ * root for resolving | y | y | y | y
+ *
+ * - column 1: we've the parent -> done
+ * - column 2, 3, 4: we use the key to find the parent
+ *
+ * on disk refs (inline or keyed)
+ * ==============================
+ * backref type | shared | indirect | shared | indirect
+ * information | tree | tree | data | data
+ * --------------------+--------+----------+--------+----------
+ * parent logical | y | - | y | -
+ * key to resolve | - | - | - | y
+ * tree block logical | y | y | y | y
+ * root for resolving | - | y | y | y
+ *
+ * - column 1, 3: we've the parent -> done
+ * - column 2: we take the first key from the block to find the parent
+ * (see __add_missing_keys)
+ * - column 4: we use the key to find the parent
+ *
+ * additional information that's available but not required to find the parent
+ * block might help in merging entries to gain some speed.
+ */
+
+static int __add_prelim_ref(struct list_head *head, u64 root_id,
+ struct btrfs_key *key, int level,
+ u64 parent, u64 wanted_disk_byte, int count,
+ gfp_t gfp_mask)
+{
+ struct __prelim_ref *ref;
+
+ if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ return 0;
+
+ ref = kmalloc(sizeof(*ref), gfp_mask);
+ if (!ref)
+ return -ENOMEM;
+
+ ref->root_id = root_id;
+ if (key)
+ ref->key_for_search = *key;
+ else
+ memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
+
+ ref->inode_list = NULL;
+ ref->level = level;
+ ref->count = count;
+ ref->parent = parent;
+ ref->wanted_disk_byte = wanted_disk_byte;
+ list_add_tail(&ref->list, head);
+
+ return 0;
+}
+
+static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
+ struct ulist *parents, struct __prelim_ref *ref,
+ int level, u64 time_seq, const u64 *extent_item_pos,
+ u64 total_refs)
+{
+ int ret = 0;
+ int slot;
+ struct extent_buffer *eb;
+ struct btrfs_key key;
+ struct btrfs_key *key_for_search = &ref->key_for_search;
+ struct btrfs_file_extent_item *fi;
+ struct extent_inode_elem *eie = NULL, *old = NULL;
+ u64 disk_byte;
+ u64 wanted_disk_byte = ref->wanted_disk_byte;
+ u64 count = 0;
+
+ if (level != 0) {
+ eb = path->nodes[level];
+ ret = ulist_add(parents, eb->start, 0, GFP_NOFS);
+ if (ret < 0)
+ return ret;
+ return 0;
+ }
+
+ /*
+ * We normally enter this function with the path already pointing to
+ * the first item to check. But sometimes, we may enter it with
+ * slot==nritems. In that case, go to the next leaf before we continue.
+ */
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
+ ret = btrfs_next_leaf(root, path);
+
+ while (!ret && count < total_refs) {
+ eb = path->nodes[0];
+ slot = path->slots[0];
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+
+ if (key.objectid != key_for_search->objectid ||
+ key.type != BTRFS_EXTENT_DATA_KEY)
+ break;
+
+ fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
+ disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
+
+ if (disk_byte == wanted_disk_byte) {
+ eie = NULL;
+ old = NULL;
+ count++;
+ if (extent_item_pos) {
+ ret = check_extent_in_eb(&key, eb, fi,
+ *extent_item_pos,
+ &eie);
+ if (ret < 0)
+ break;
+ }
+ if (ret > 0)
+ goto next;
+ ret = ulist_add_merge_ptr(parents, eb->start,
+ eie, (void **)&old, GFP_NOFS);
+ if (ret < 0)
+ break;
+ if (!ret && extent_item_pos) {
+ while (old->next)
+ old = old->next;
+ old->next = eie;
+ }
+ eie = NULL;
+ }
+next:
+ ret = btrfs_next_item(root, path);
+ }
+
+ if (ret > 0)
+ ret = 0;
+ else if (ret < 0)
+ free_inode_elem_list(eie);
+ return ret;
+}
+
+/*
+ * resolve an indirect backref in the form (root_id, key, level)
+ * to a logical address
+ */
+static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u64 time_seq,
+ struct __prelim_ref *ref,
+ struct ulist *parents,
+ const u64 *extent_item_pos, u64 total_refs)
+{
+ struct btrfs_root *root;
+ struct btrfs_key root_key;
+ struct extent_buffer *eb;
+ int ret = 0;
+ int root_level;
+ int level = ref->level;
+
+ root_key.objectid = ref->root_id;
+ root_key.type = BTRFS_ROOT_ITEM_KEY;
+ root_key.offset = (u64)-1;
+
+ root = btrfs_read_fs_root(fs_info, &root_key);
+ if (IS_ERR(root)) {
+ ret = PTR_ERR(root);
+ goto out;
+ }
+
+ root_level = btrfs_root_level(&root->root_item);
+
+ if (root_level + 1 == level)
+ goto out;
+
+ path->lowest_level = level;
+ ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path, 0, 0);
+
+ pr_debug("search slot in root %llu (level %d, ref count %d) returned "
+ "%d for key (%llu %u %llu)\n",
+ ref->root_id, level, ref->count, ret,
+ ref->key_for_search.objectid, ref->key_for_search.type,
+ ref->key_for_search.offset);
+ if (ret < 0)
+ goto out;
+
+ eb = path->nodes[level];
+ while (!eb) {
+ if (!level) {
+ ret = 1;
+ WARN_ON(1);
+ goto out;
+ }
+ level--;
+ eb = path->nodes[level];
+ }
+
+ ret = add_all_parents(root, path, parents, ref, level, time_seq,
+ extent_item_pos, total_refs);
+out:
+ path->lowest_level = 0;
+ btrfs_release_path(path);
+ return ret;
+}
+
+/*
+ * resolve all indirect backrefs from the list
+ */
+static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u64 time_seq,
+ struct list_head *head,
+ const u64 *extent_item_pos, u64 total_refs)
+{
+ int err;
+ int ret = 0;
+ struct __prelim_ref *ref;
+ struct __prelim_ref *ref_safe;
+ struct __prelim_ref *new_ref;
+ struct ulist *parents;
+ struct ulist_node *node;
+ struct ulist_iterator uiter;
+
+ parents = ulist_alloc(GFP_NOFS);
+ if (!parents)
+ return -ENOMEM;
+
+ /*
+ * _safe allows us to insert directly after the current item without
+ * iterating over the newly inserted items.
+ * we're also allowed to re-assign ref during iteration.
+ */
+ list_for_each_entry_safe(ref, ref_safe, head, list) {
+ if (ref->parent) /* already direct */
+ continue;
+ if (ref->count == 0)
+ continue;
+ err = __resolve_indirect_ref(fs_info, path, time_seq, ref,
+ parents, extent_item_pos,
+ total_refs);
+ /*
+ * we can only tolerate ENOENT,otherwise,we should catch error
+ * and return directly.
+ */
+ if (err == -ENOENT) {
+ continue;
+ } else if (err) {
+ ret = err;
+ goto out;
+ }
+
+ /* we put the first parent into the ref at hand */
+ ULIST_ITER_INIT(&uiter);
+ node = ulist_next(parents, &uiter);
+ ref->parent = node ? node->val : 0;
+ ref->inode_list = node ?
+ (struct extent_inode_elem *)(uintptr_t)node->aux : NULL;
+
+ /* additional parents require new refs being added here */
+ while ((node = ulist_next(parents, &uiter))) {
+ new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);
+ if (!new_ref) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ memcpy(new_ref, ref, sizeof(*ref));
+ new_ref->parent = node->val;
+ new_ref->inode_list = (struct extent_inode_elem *)
+ (uintptr_t)node->aux;
+ list_add(&new_ref->list, &ref->list);
+ }
+ ulist_reinit(parents);
+ }
+out:
+ ulist_free(parents);
+ return ret;
+}
+
+static inline int ref_for_same_block(struct __prelim_ref *ref1,
+ struct __prelim_ref *ref2)
+{
+ if (ref1->level != ref2->level)
+ return 0;
+ if (ref1->root_id != ref2->root_id)
+ return 0;
+ if (ref1->key_for_search.type != ref2->key_for_search.type)
+ return 0;
+ if (ref1->key_for_search.objectid != ref2->key_for_search.objectid)
+ return 0;
+ if (ref1->key_for_search.offset != ref2->key_for_search.offset)
+ return 0;
+ if (ref1->parent != ref2->parent)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * read tree blocks and add keys where required.
+ */
+static int __add_missing_keys(struct btrfs_fs_info *fs_info,
+ struct list_head *head)
+{
+ struct list_head *pos;
+ struct extent_buffer *eb;
+
+ list_for_each(pos, head) {
+ struct __prelim_ref *ref;
+ ref = list_entry(pos, struct __prelim_ref, list);
+
+ if (ref->parent)
+ continue;
+ if (ref->key_for_search.type)
+ continue;
+ BUG_ON(!ref->wanted_disk_byte);
+ eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte,
+ fs_info->tree_root->leafsize, 0);
+ if (!extent_buffer_uptodate(eb)) {
+ free_extent_buffer(eb);
+ return -EIO;
+ }
+ if (btrfs_header_level(eb) == 0)
+ btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0);
+ else
+ btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0);
+ free_extent_buffer(eb);
+ }
+ return 0;
+}
+
+/*
+ * merge two lists of backrefs and adjust counts accordingly
+ *
+ * mode = 1: merge identical keys, if key is set
+ * FIXME: if we add more keys in __add_prelim_ref, we can merge more here.
+ * additionally, we could even add a key range for the blocks we
+ * looked into to merge even more (-> replace unresolved refs by those
+ * having a parent).
+ * mode = 2: merge identical parents
+ */
+static void __merge_refs(struct list_head *head, int mode)
+{
+ struct list_head *pos1;
+
+ list_for_each(pos1, head) {
+ struct list_head *n2;
+ struct list_head *pos2;
+ struct __prelim_ref *ref1;
+
+ ref1 = list_entry(pos1, struct __prelim_ref, list);
+
+ for (pos2 = pos1->next, n2 = pos2->next; pos2 != head;
+ pos2 = n2, n2 = pos2->next) {
+ struct __prelim_ref *ref2;
+ struct __prelim_ref *xchg;
+ struct extent_inode_elem *eie;
+
+ ref2 = list_entry(pos2, struct __prelim_ref, list);
+
+ if (mode == 1) {
+ if (!ref_for_same_block(ref1, ref2))
+ continue;
+ if (!ref1->parent && ref2->parent) {
+ xchg = ref1;
+ ref1 = ref2;
+ ref2 = xchg;
+ }
+ } else {
+ if (ref1->parent != ref2->parent)
+ continue;
+ }
+
+ eie = ref1->inode_list;
+ while (eie && eie->next)
+ eie = eie->next;
+ if (eie)
+ eie->next = ref2->inode_list;
+ else
+ ref1->inode_list = ref2->inode_list;
+ ref1->count += ref2->count;
+
+ list_del(&ref2->list);
+ kfree(ref2);
+ }
+
+ }
+}
+
+/*
+ * add all inline backrefs for bytenr to the list
+ */
+static int __add_inline_refs(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u64 bytenr,
+ int *info_level, struct list_head *prefs,
+ u64 *total_refs)
+{
+ int ret = 0;
+ int slot;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ unsigned long ptr;
+ unsigned long end;
+ struct btrfs_extent_item *ei;
+ u64 flags;
+ u64 item_size;
+
+ /*
+ * enumerate all inline refs
+ */
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+
+ item_size = btrfs_item_size_nr(leaf, slot);
+ BUG_ON(item_size < sizeof(*ei));
+
+ ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
+ flags = btrfs_extent_flags(leaf, ei);
+ *total_refs += btrfs_extent_refs(leaf, ei);
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+
+ ptr = (unsigned long)(ei + 1);
+ end = (unsigned long)ei + item_size;
+
+ if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
+ flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ struct btrfs_tree_block_info *info;
+
+ info = (struct btrfs_tree_block_info *)ptr;
+ *info_level = btrfs_tree_block_level(leaf, info);
+ ptr += sizeof(struct btrfs_tree_block_info);
+ BUG_ON(ptr > end);
+ } else if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
+ *info_level = found_key.offset;
+ } else {
+ BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
+ }
+
+ while (ptr < end) {
+ struct btrfs_extent_inline_ref *iref;
+ u64 offset;
+ int type;
+
+ iref = (struct btrfs_extent_inline_ref *)ptr;
+ type = btrfs_extent_inline_ref_type(leaf, iref);
+ offset = btrfs_extent_inline_ref_offset(leaf, iref);
+
+ switch (type) {
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ ret = __add_prelim_ref(prefs, 0, NULL,
+ *info_level + 1, offset,
+ bytenr, 1, GFP_NOFS);
+ break;
+ case BTRFS_SHARED_DATA_REF_KEY: {
+ struct btrfs_shared_data_ref *sdref;
+ int count;
+
+ sdref = (struct btrfs_shared_data_ref *)(iref + 1);
+ count = btrfs_shared_data_ref_count(leaf, sdref);
+ ret = __add_prelim_ref(prefs, 0, NULL, 0, offset,
+ bytenr, count, GFP_NOFS);
+ break;
+ }
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ ret = __add_prelim_ref(prefs, offset, NULL,
+ *info_level + 1, 0,
+ bytenr, 1, GFP_NOFS);
+ break;
+ case BTRFS_EXTENT_DATA_REF_KEY: {
+ struct btrfs_extent_data_ref *dref;
+ int count;
+ u64 root;
+
+ dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+ count = btrfs_extent_data_ref_count(leaf, dref);
+ key.objectid = btrfs_extent_data_ref_objectid(leaf,
+ dref);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = btrfs_extent_data_ref_offset(leaf, dref);
+ root = btrfs_extent_data_ref_root(leaf, dref);
+ ret = __add_prelim_ref(prefs, root, &key, 0, 0,
+ bytenr, count, GFP_NOFS);
+ break;
+ }
+ default:
+ WARN_ON(1);
+ }
+ if (ret)
+ return ret;
+ ptr += btrfs_extent_inline_ref_size(type);
+ }
+
+ return 0;
+}
+
+/*
+ * add all non-inline backrefs for bytenr to the list
+ */
+static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u64 bytenr,
+ int info_level, struct list_head *prefs)
+{
+ struct btrfs_root *extent_root = fs_info->extent_root;
+ int ret;
+ int slot;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+
+ while (1) {
+ ret = btrfs_next_item(extent_root, path);
+ if (ret < 0)
+ break;
+ if (ret) {
+ ret = 0;
+ break;
+ }
+
+ slot = path->slots[0];
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
+ if (key.objectid != bytenr)
+ break;
+ if (key.type < BTRFS_TREE_BLOCK_REF_KEY)
+ continue;
+ if (key.type > BTRFS_SHARED_DATA_REF_KEY)
+ break;
+
+ switch (key.type) {
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ ret = __add_prelim_ref(prefs, 0, NULL,
+ info_level + 1, key.offset,
+ bytenr, 1, GFP_NOFS);
+ break;
+ case BTRFS_SHARED_DATA_REF_KEY: {
+ struct btrfs_shared_data_ref *sdref;
+ int count;
+
+ sdref = btrfs_item_ptr(leaf, slot,
+ struct btrfs_shared_data_ref);
+ count = btrfs_shared_data_ref_count(leaf, sdref);
+ ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset,
+ bytenr, count, GFP_NOFS);
+ break;
+ }
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ ret = __add_prelim_ref(prefs, key.offset, NULL,
+ info_level + 1, 0,
+ bytenr, 1, GFP_NOFS);
+ break;
+ case BTRFS_EXTENT_DATA_REF_KEY: {
+ struct btrfs_extent_data_ref *dref;
+ int count;
+ u64 root;
+
+ dref = btrfs_item_ptr(leaf, slot,
+ struct btrfs_extent_data_ref);
+ count = btrfs_extent_data_ref_count(leaf, dref);
+ key.objectid = btrfs_extent_data_ref_objectid(leaf,
+ dref);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = btrfs_extent_data_ref_offset(leaf, dref);
+ root = btrfs_extent_data_ref_root(leaf, dref);
+ ret = __add_prelim_ref(prefs, root, &key, 0, 0,
+ bytenr, count, GFP_NOFS);
+ break;
+ }
+ default:
+ WARN_ON(1);
+ }
+ if (ret)
+ return ret;
+
+ }
+
+ return ret;
+}
+
+/*
+ * this adds all existing backrefs (inline backrefs, backrefs and delayed
+ * refs) for the given bytenr to the refs list, merges duplicates and resolves
+ * indirect refs to their parent bytenr.
+ * When roots are found, they're added to the roots list
+ *
+ * FIXME some caching might speed things up
+ */
+static int find_parent_nodes(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 time_seq, struct ulist *refs,
+ struct ulist *roots, const u64 *extent_item_pos)
+{
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ int info_level = 0;
+ int ret;
+ struct list_head prefs;
+ struct __prelim_ref *ref;
+ struct extent_inode_elem *eie = NULL;
+ u64 total_refs = 0;
+
+ INIT_LIST_HEAD(&prefs);
+
+ key.objectid = bytenr;
+ key.offset = (u64)-1;
+ if (btrfs_fs_incompat(fs_info,
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ else
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ BUG_ON(ret == 0);
+
+ if (path->slots[0]) {
+ struct extent_buffer *leaf;
+ int slot;
+
+ path->slots[0]--;
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid == bytenr &&
+ (key.type == BTRFS_EXTENT_ITEM_KEY ||
+ key.type == BTRFS_METADATA_ITEM_KEY)) {
+ ret = __add_inline_refs(fs_info, path, bytenr,
+ &info_level, &prefs,
+ &total_refs);
+ if (ret)
+ goto out;
+ ret = __add_keyed_refs(fs_info, path, bytenr,
+ info_level, &prefs);
+ if (ret)
+ goto out;
+ }
+ }
+ btrfs_release_path(path);
+
+ ret = __add_missing_keys(fs_info, &prefs);
+ if (ret)
+ goto out;
+
+ __merge_refs(&prefs, 1);
+
+ ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs,
+ extent_item_pos, total_refs);
+ if (ret)
+ goto out;
+
+ __merge_refs(&prefs, 2);
+
+ while (!list_empty(&prefs)) {
+ ref = list_first_entry(&prefs, struct __prelim_ref, list);
+ WARN_ON(ref->count < 0);
+ if (roots && ref->count && ref->root_id && ref->parent == 0) {
+ /* no parent == root of tree */
+ ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS);
+ if (ret < 0)
+ goto out;
+ }
+ if (ref->count && ref->parent) {
+ if (extent_item_pos && !ref->inode_list &&
+ ref->level == 0) {
+ u32 bsz;
+ struct extent_buffer *eb;
+ bsz = btrfs_level_size(fs_info->extent_root,
+ ref->level);
+ eb = read_tree_block(fs_info->extent_root,
+ ref->parent, bsz, 0);
+ if (!extent_buffer_uptodate(eb)) {
+ free_extent_buffer(eb);
+ ret = -EIO;
+ goto out;
+ }
+ ret = find_extent_in_eb(eb, bytenr,
+ *extent_item_pos, &eie);
+ free_extent_buffer(eb);
+ if (ret < 0)
+ goto out;
+ ref->inode_list = eie;
+ }
+ ret = ulist_add_merge_ptr(refs, ref->parent,
+ ref->inode_list,
+ (void **)&eie, GFP_NOFS);
+ if (ret < 0)
+ goto out;
+ if (!ret && extent_item_pos) {
+ /*
+ * we've recorded that parent, so we must extend
+ * its inode list here
+ */
+ BUG_ON(!eie);
+ while (eie->next)
+ eie = eie->next;
+ eie->next = ref->inode_list;
+ }
+ eie = NULL;
+ }
+ list_del(&ref->list);
+ kfree(ref);
+ }
+
+out:
+ btrfs_free_path(path);
+ while (!list_empty(&prefs)) {
+ ref = list_first_entry(&prefs, struct __prelim_ref, list);
+ list_del(&ref->list);
+ kfree(ref);
+ }
+ if (ret < 0)
+ free_inode_elem_list(eie);
+ return ret;
+}
+
+static void free_leaf_list(struct ulist *blocks)
+{
+ struct ulist_node *node = NULL;
+ struct extent_inode_elem *eie;
+ struct ulist_iterator uiter;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((node = ulist_next(blocks, &uiter))) {
+ if (!node->aux)
+ continue;
+ eie = (struct extent_inode_elem *)(uintptr_t)node->aux;
+ free_inode_elem_list(eie);
+ node->aux = 0;
+ }
+
+ ulist_free(blocks);
+}
+
+/*
+ * Finds all leafs with a reference to the specified combination of bytenr and
+ * offset. key_list_head will point to a list of corresponding keys (caller must
+ * free each list element). The leafs will be stored in the leafs ulist, which
+ * must be freed with ulist_free.
+ *
+ * returns 0 on success, <0 on error
+ */
+static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 time_seq, struct ulist **leafs,
+ const u64 *extent_item_pos)
+{
+ int ret;
+
+ *leafs = ulist_alloc(GFP_NOFS);
+ if (!*leafs)
+ return -ENOMEM;
+
+ ret = find_parent_nodes(trans, fs_info, bytenr,
+ time_seq, *leafs, NULL, extent_item_pos);
+ if (ret < 0 && ret != -ENOENT) {
+ free_leaf_list(*leafs);
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * walk all backrefs for a given extent to find all roots that reference this
+ * extent. Walking a backref means finding all extents that reference this
+ * extent and in turn walk the backrefs of those, too. Naturally this is a
+ * recursive process, but here it is implemented in an iterative fashion: We
+ * find all referencing extents for the extent in question and put them on a
+ * list. In turn, we find all referencing extents for those, further appending
+ * to the list. The way we iterate the list allows adding more elements after
+ * the current while iterating. The process stops when we reach the end of the
+ * list. Found roots are added to the roots list.
+ *
+ * returns 0 on success, < 0 on error.
+ */
+static int __btrfs_find_all_roots(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 time_seq, struct ulist **roots)
+{
+ struct ulist *tmp;
+ struct ulist_node *node = NULL;
+ struct ulist_iterator uiter;
+ int ret;
+
+ tmp = ulist_alloc(GFP_NOFS);
+ if (!tmp)
+ return -ENOMEM;
+ *roots = ulist_alloc(GFP_NOFS);
+ if (!*roots) {
+ ulist_free(tmp);
+ return -ENOMEM;
+ }
+
+ ULIST_ITER_INIT(&uiter);
+ while (1) {
+ ret = find_parent_nodes(trans, fs_info, bytenr,
+ time_seq, tmp, *roots, NULL);
+ if (ret < 0 && ret != -ENOENT) {
+ ulist_free(tmp);
+ ulist_free(*roots);
+ return ret;
+ }
+ node = ulist_next(tmp, &uiter);
+ if (!node)
+ break;
+ bytenr = node->val;
+ cond_resched();
+ }
+
+ ulist_free(tmp);
+ return 0;
+}
+
+int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 time_seq, struct ulist **roots)
+{
+ return __btrfs_find_all_roots(trans, fs_info, bytenr, time_seq, roots);
+}
+
+/*
+ * this makes the path point to (inum INODE_ITEM ioff)
+ */
+int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
+ struct btrfs_path *path)
+{
+ struct btrfs_key key;
+ return btrfs_find_item(fs_root, path, inum, ioff,
+ BTRFS_INODE_ITEM_KEY, &key);
+}
+
+static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
+ struct btrfs_path *path,
+ struct btrfs_key *found_key)
+{
+ return btrfs_find_item(fs_root, path, inum, ioff,
+ BTRFS_INODE_REF_KEY, found_key);
+}
+
+int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
+ u64 start_off, struct btrfs_path *path,
+ struct btrfs_inode_extref **ret_extref,
+ u64 *found_off)
+{
+ int ret, slot;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct btrfs_inode_extref *extref;
+ struct extent_buffer *leaf;
+ unsigned long ptr;
+
+ key.objectid = inode_objectid;
+ btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY);
+ key.offset = start_off;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ while (1) {
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(leaf)) {
+ /*
+ * If the item at offset is not found,
+ * btrfs_search_slot will point us to the slot
+ * where it should be inserted. In our case
+ * that will be the slot directly before the
+ * next INODE_REF_KEY_V2 item. In the case
+ * that we're pointing to the last slot in a
+ * leaf, we must move one leaf over.
+ */
+ ret = btrfs_next_leaf(root, path);
+ if (ret) {
+ if (ret >= 1)
+ ret = -ENOENT;
+ break;
+ }
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+
+ /*
+ * Check that we're still looking at an extended ref key for
+ * this particular objectid. If we have different
+ * objectid or type then there are no more to be found
+ * in the tree and we can exit.
+ */
+ ret = -ENOENT;
+ if (found_key.objectid != inode_objectid)
+ break;
+ if (btrfs_key_type(&found_key) != BTRFS_INODE_EXTREF_KEY)
+ break;
+
+ ret = 0;
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ extref = (struct btrfs_inode_extref *)ptr;
+ *ret_extref = extref;
+ if (found_off)
+ *found_off = found_key.offset;
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * this iterates to turn a name (from iref/extref) into a full filesystem path.
+ * Elements of the path are separated by '/' and the path is guaranteed to be
+ * 0-terminated. the path is only given within the current file system.
+ * Therefore, it never starts with a '/'. the caller is responsible to provide
+ * "size" bytes in "dest". the dest buffer will be filled backwards. finally,
+ * the start point of the resulting string is returned. this pointer is within
+ * dest, normally.
+ * in case the path buffer would overflow, the pointer is decremented further
+ * as if output was written to the buffer, though no more output is actually
+ * generated. that way, the caller can determine how much space would be
+ * required for the path to fit into the buffer. in that case, the returned
+ * value will be smaller than dest. callers must check this!
+ */
+char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
+ u32 name_len, unsigned long name_off,
+ struct extent_buffer *eb_in, u64 parent,
+ char *dest, u32 size)
+{
+ int slot;
+ u64 next_inum;
+ int ret;
+ s64 bytes_left = ((s64)size) - 1;
+ struct extent_buffer *eb = eb_in;
+ struct btrfs_key found_key;
+ struct btrfs_inode_ref *iref;
+
+ if (bytes_left >= 0)
+ dest[bytes_left] = '\0';
+
+ while (1) {
+ bytes_left -= name_len;
+ if (bytes_left >= 0)
+ read_extent_buffer(eb, dest + bytes_left,
+ name_off, name_len);
+ if (eb != eb_in)
+ free_extent_buffer(eb);
+ ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
+ if (ret > 0)
+ ret = -ENOENT;
+ if (ret)
+ break;
+
+ next_inum = found_key.offset;
+
+ /* regular exit ahead */
+ if (parent == next_inum)
+ break;
+
+ slot = path->slots[0];
+ eb = path->nodes[0];
+ /* make sure we can use eb after releasing the path */
+ if (eb != eb_in)
+ eb->refs++;
+ btrfs_release_path(path);
+ iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
+
+ name_len = btrfs_inode_ref_name_len(eb, iref);
+ name_off = (unsigned long)(iref + 1);
+
+ parent = next_inum;
+ --bytes_left;
+ if (bytes_left >= 0)
+ dest[bytes_left] = '/';
+ }
+
+ btrfs_release_path(path);
+
+ if (ret)
+ return ERR_PTR(ret);
+
+ return dest + bytes_left;
+}
+
+/*
+ * this makes the path point to (logical EXTENT_ITEM *)
+ * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for
+ * tree blocks and <0 on error.
+ */
+int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
+ struct btrfs_path *path, struct btrfs_key *found_key,
+ u64 *flags_ret)
+{
+ int ret;
+ u64 flags;
+ u64 size = 0;
+ u32 item_size;
+ struct extent_buffer *eb;
+ struct btrfs_extent_item *ei;
+ struct btrfs_key key;
+
+ if (btrfs_fs_incompat(fs_info,
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ else
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.objectid = logical;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ ret = btrfs_previous_extent_item(fs_info->extent_root, path, 0);
+ if (ret) {
+ if (ret > 0)
+ ret = -ENOENT;
+ return ret;
+ }
+ btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]);
+ if (found_key->type == BTRFS_METADATA_ITEM_KEY)
+ size = fs_info->extent_root->leafsize;
+ else if (found_key->type == BTRFS_EXTENT_ITEM_KEY)
+ size = found_key->offset;
+
+ if (found_key->objectid > logical ||
+ found_key->objectid + size <= logical) {
+ pr_debug("logical %llu is not within any extent\n", logical);
+ return -ENOENT;
+ }
+
+ eb = path->nodes[0];
+ item_size = btrfs_item_size_nr(eb, path->slots[0]);
+ BUG_ON(item_size < sizeof(*ei));
+
+ ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
+ flags = btrfs_extent_flags(eb, ei);
+
+ pr_debug("logical %llu is at position %llu within the extent (%llu "
+ "EXTENT_ITEM %llu) flags %#llx size %u\n",
+ logical, logical - found_key->objectid, found_key->objectid,
+ found_key->offset, flags, item_size);
+
+ if (flags_ret) {
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
+ *flags_ret = BTRFS_EXTENT_FLAG_TREE_BLOCK;
+ else if (flags & BTRFS_EXTENT_FLAG_DATA)
+ *flags_ret = BTRFS_EXTENT_FLAG_DATA;
+ else
+ BUG_ON(1);
+ return 0;
+ } else {
+ WARN_ON(1);
+ return -EIO;
+ }
+}
+
+/*
+ * helper function to iterate extent inline refs. ptr must point to a 0 value
+ * for the first call and may be modified. it is used to track state.
+ * if more refs exist, 0 is returned and the next call to
+ * __get_extent_inline_ref must pass the modified ptr parameter to get the
+ * next ref. after the last ref was processed, 1 is returned.
+ * returns <0 on error
+ */
+static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
+ struct btrfs_key *key,
+ struct btrfs_extent_item *ei, u32 item_size,
+ struct btrfs_extent_inline_ref **out_eiref,
+ int *out_type)
+{
+ unsigned long end;
+ u64 flags;
+ struct btrfs_tree_block_info *info;
+
+ if (!*ptr) {
+ /* first call */
+ flags = btrfs_extent_flags(eb, ei);
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ if (key->type == BTRFS_METADATA_ITEM_KEY) {
+ /* a skinny metadata extent */
+ *out_eiref =
+ (struct btrfs_extent_inline_ref *)(ei + 1);
+ } else {
+ WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY);
+ info = (struct btrfs_tree_block_info *)(ei + 1);
+ *out_eiref =
+ (struct btrfs_extent_inline_ref *)(info + 1);
+ }
+ } else {
+ *out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1);
+ }
+ *ptr = (unsigned long)*out_eiref;
+ if ((unsigned long)(*ptr) >= (unsigned long)ei + item_size)
+ return -ENOENT;
+ }
+
+ end = (unsigned long)ei + item_size;
+ *out_eiref = (struct btrfs_extent_inline_ref *)(*ptr);
+ *out_type = btrfs_extent_inline_ref_type(eb, *out_eiref);
+
+ *ptr += btrfs_extent_inline_ref_size(*out_type);
+ WARN_ON(*ptr > end);
+ if (*ptr == end)
+ return 1; /* last */
+
+ return 0;
+}
+
+/*
+ * reads the tree block backref for an extent. tree level and root are returned
+ * through out_level and out_root. ptr must point to a 0 value for the first
+ * call and may be modified (see __get_extent_inline_ref comment).
+ * returns 0 if data was provided, 1 if there was no more data to provide or
+ * <0 on error.
+ */
+int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
+ struct btrfs_key *key, struct btrfs_extent_item *ei,
+ u32 item_size, u64 *out_root, u8 *out_level)
+{
+ int ret;
+ int type;
+ struct btrfs_tree_block_info *info;
+ struct btrfs_extent_inline_ref *eiref;
+
+ if (*ptr == (unsigned long)-1)
+ return 1;
+
+ while (1) {
+ ret = __get_extent_inline_ref(ptr, eb, key, ei, item_size,
+ &eiref, &type);
+ if (ret < 0)
+ return ret;
+
+ if (type == BTRFS_TREE_BLOCK_REF_KEY ||
+ type == BTRFS_SHARED_BLOCK_REF_KEY)
+ break;
+
+ if (ret == 1)
+ return 1;
+ }
+
+ /* we can treat both ref types equally here */
+ info = (struct btrfs_tree_block_info *)(ei + 1);
+ *out_root = btrfs_extent_inline_ref_offset(eb, eiref);
+ *out_level = btrfs_tree_block_level(eb, info);
+
+ if (ret == 1)
+ *ptr = (unsigned long)-1;
+
+ return 0;
+}
+
+static int iterate_leaf_refs(struct extent_inode_elem *inode_list,
+ u64 root, u64 extent_item_objectid,
+ iterate_extent_inodes_t *iterate, void *ctx)
+{
+ struct extent_inode_elem *eie;
+ int ret = 0;
+
+ for (eie = inode_list; eie; eie = eie->next) {
+ pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), "
+ "root %llu\n", extent_item_objectid,
+ eie->inum, eie->offset, root);
+ ret = iterate(eie->inum, eie->offset, root, ctx);
+ if (ret) {
+ pr_debug("stopping iteration for %llu due to ret=%d\n",
+ extent_item_objectid, ret);
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * calls iterate() for every inode that references the extent identified by
+ * the given parameters.
+ * when the iterator function returns a non-zero value, iteration stops.
+ */
+int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
+ u64 extent_item_objectid, u64 extent_item_pos,
+ int search_commit_root,
+ iterate_extent_inodes_t *iterate, void *ctx)
+{
+ int ret;
+ struct btrfs_trans_handle *trans = NULL;
+ struct ulist *refs = NULL;
+ struct ulist *roots = NULL;
+ struct ulist_node *ref_node = NULL;
+ struct ulist_node *root_node = NULL;
+ struct ulist_iterator ref_uiter;
+ struct ulist_iterator root_uiter;
+
+ pr_debug("resolving all inodes for extent %llu\n",
+ extent_item_objectid);
+
+ ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
+ 0, &refs, &extent_item_pos);
+ if (ret)
+ goto out;
+
+ ULIST_ITER_INIT(&ref_uiter);
+ while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
+ ret = __btrfs_find_all_roots(trans, fs_info, ref_node->val,
+ 0, &roots);
+ if (ret)
+ break;
+ ULIST_ITER_INIT(&root_uiter);
+ while (!ret && (root_node = ulist_next(roots, &root_uiter))) {
+ pr_debug("root %llu references leaf %llu, data list "
+ "%#llx\n", root_node->val, ref_node->val,
+ ref_node->aux);
+ ret = iterate_leaf_refs((struct extent_inode_elem *)
+ (uintptr_t)ref_node->aux,
+ root_node->val,
+ extent_item_objectid,
+ iterate, ctx);
+ }
+ ulist_free(roots);
+ }
+
+ free_leaf_list(refs);
+out:
+ return ret;
+}
+
+int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path,
+ iterate_extent_inodes_t *iterate, void *ctx)
+{
+ int ret;
+ u64 extent_item_pos;
+ u64 flags = 0;
+ struct btrfs_key found_key;
+ int search_commit_root = 0;
+
+ ret = extent_from_logical(fs_info, logical, path, &found_key, &flags);
+ btrfs_release_path(path);
+ if (ret < 0)
+ return ret;
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
+ return -EINVAL;
+
+ extent_item_pos = logical - found_key.objectid;
+ ret = iterate_extent_inodes(fs_info, found_key.objectid,
+ extent_item_pos, search_commit_root,
+ iterate, ctx);
+
+ return ret;
+}
+
+typedef int (iterate_irefs_t)(u64 parent, u32 name_len, unsigned long name_off,
+ struct extent_buffer *eb, void *ctx);
+
+static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
+ struct btrfs_path *path,
+ iterate_irefs_t *iterate, void *ctx)
+{
+ int ret = 0;
+ int slot;
+ u32 cur;
+ u32 len;
+ u32 name_len;
+ u64 parent = 0;
+ int found = 0;
+ struct extent_buffer *eb;
+ struct btrfs_item *item;
+ struct btrfs_inode_ref *iref;
+ struct btrfs_key found_key;
+
+ while (!ret) {
+ ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
+ &found_key);
+ if (ret < 0)
+ break;
+ if (ret) {
+ ret = found ? 0 : -ENOENT;
+ break;
+ }
+ ++found;
+
+ parent = found_key.offset;
+ slot = path->slots[0];
+ eb = btrfs_clone_extent_buffer(path->nodes[0]);
+ if (!eb) {
+ ret = -ENOMEM;
+ break;
+ }
+ extent_buffer_get(eb);
+ btrfs_release_path(path);
+
+ item = btrfs_item_nr(slot);
+ iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
+
+ for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) {
+ name_len = btrfs_inode_ref_name_len(eb, iref);
+ /* path must be released before calling iterate()! */
+ pr_debug("following ref at offset %u for inode %llu in "
+ "tree %llu\n", cur, found_key.objectid,
+ fs_root->objectid);
+ ret = iterate(parent, name_len,
+ (unsigned long)(iref + 1), eb, ctx);
+ if (ret)
+ break;
+ len = sizeof(*iref) + name_len;
+ iref = (struct btrfs_inode_ref *)((char *)iref + len);
+ }
+ free_extent_buffer(eb);
+ }
+
+ btrfs_release_path(path);
+
+ return ret;
+}
+
+static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
+ struct btrfs_path *path,
+ iterate_irefs_t *iterate, void *ctx)
+{
+ int ret;
+ int slot;
+ u64 offset = 0;
+ u64 parent;
+ int found = 0;
+ struct extent_buffer *eb;
+ struct btrfs_inode_extref *extref;
+ struct extent_buffer *leaf;
+ u32 item_size;
+ u32 cur_offset;
+ unsigned long ptr;
+
+ while (1) {
+ ret = btrfs_find_one_extref(fs_root, inum, offset, path, &extref,
+ &offset);
+ if (ret < 0)
+ break;
+ if (ret) {
+ ret = found ? 0 : -ENOENT;
+ break;
+ }
+ ++found;
+
+ slot = path->slots[0];
+ eb = btrfs_clone_extent_buffer(path->nodes[0]);
+ if (!eb) {
+ ret = -ENOMEM;
+ break;
+ }
+ extent_buffer_get(eb);
+
+ btrfs_release_path(path);
+
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, slot);
+ ptr = btrfs_item_ptr_offset(leaf, slot);
+ cur_offset = 0;
+
+ while (cur_offset < item_size) {
+ u32 name_len;
+
+ extref = (struct btrfs_inode_extref *)(ptr + cur_offset);
+ parent = btrfs_inode_extref_parent(eb, extref);
+ name_len = btrfs_inode_extref_name_len(eb, extref);
+ ret = iterate(parent, name_len,
+ (unsigned long)&extref->name, eb, ctx);
+ if (ret)
+ break;
+
+ cur_offset += btrfs_inode_extref_name_len(leaf, extref);
+ cur_offset += sizeof(*extref);
+ }
+ free_extent_buffer(eb);
+
+ offset++;
+ }
+
+ btrfs_release_path(path);
+
+ return ret;
+}
+
+static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
+ struct btrfs_path *path, iterate_irefs_t *iterate,
+ void *ctx)
+{
+ int ret;
+ int found_refs = 0;
+
+ ret = iterate_inode_refs(inum, fs_root, path, iterate, ctx);
+ if (!ret)
+ ++found_refs;
+ else if (ret != -ENOENT)
+ return ret;
+
+ ret = iterate_inode_extrefs(inum, fs_root, path, iterate, ctx);
+ if (ret == -ENOENT && found_refs)
+ return 0;
+
+ return ret;
+}
+
+/*
+ * returns 0 if the path could be dumped (probably truncated)
+ * returns <0 in case of an error
+ */
+static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
+ struct extent_buffer *eb, void *ctx)
+{
+ struct inode_fs_paths *ipath = ctx;
+ char *fspath;
+ char *fspath_min;
+ int i = ipath->fspath->elem_cnt;
+ const int s_ptr = sizeof(char *);
+ u32 bytes_left;
+
+ bytes_left = ipath->fspath->bytes_left > s_ptr ?
+ ipath->fspath->bytes_left - s_ptr : 0;
+
+ fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr;
+ fspath = btrfs_ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len,
+ name_off, eb, inum, fspath_min, bytes_left);
+ if (IS_ERR(fspath))
+ return PTR_ERR(fspath);
+
+ if (fspath > fspath_min) {
+ ipath->fspath->val[i] = (u64)(unsigned long)fspath;
+ ++ipath->fspath->elem_cnt;
+ ipath->fspath->bytes_left = fspath - fspath_min;
+ } else {
+ ++ipath->fspath->elem_missed;
+ ipath->fspath->bytes_missing += fspath_min - fspath;
+ ipath->fspath->bytes_left = 0;
+ }
+
+ return 0;
+}
+
+/*
+ * this dumps all file system paths to the inode into the ipath struct, provided
+ * is has been created large enough. each path is zero-terminated and accessed
+ * from ipath->fspath->val[i].
+ * when it returns, there are ipath->fspath->elem_cnt number of paths available
+ * in ipath->fspath->val[]. when the allocated space wasn't sufficient, the
+ * number of missed paths in recored in ipath->fspath->elem_missed, otherwise,
+ * it's zero. ipath->fspath->bytes_missing holds the number of bytes that would
+ * have been needed to return all paths.
+ */
+int paths_from_inode(u64 inum, struct inode_fs_paths *ipath)
+{
+ return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path,
+ inode_to_path, ipath);
+}
+
+struct btrfs_data_container *init_data_container(u32 total_bytes)
+{
+ struct btrfs_data_container *data;
+ size_t alloc_bytes;
+
+ alloc_bytes = max_t(size_t, total_bytes, sizeof(*data));
+ data = vmalloc(alloc_bytes);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ if (total_bytes >= sizeof(*data)) {
+ data->bytes_left = total_bytes - sizeof(*data);
+ data->bytes_missing = 0;
+ } else {
+ data->bytes_missing = sizeof(*data) - total_bytes;
+ data->bytes_left = 0;
+ }
+
+ data->elem_cnt = 0;
+ data->elem_missed = 0;
+
+ return data;
+}
+
+/*
+ * allocates space to return multiple file system paths for an inode.
+ * total_bytes to allocate are passed, note that space usable for actual path
+ * information will be total_bytes - sizeof(struct inode_fs_paths).
+ * the returned pointer must be freed with free_ipath() in the end.
+ */
+struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
+ struct btrfs_path *path)
+{
+ struct inode_fs_paths *ifp;
+ struct btrfs_data_container *fspath;
+
+ fspath = init_data_container(total_bytes);
+ if (IS_ERR(fspath))
+ return (void *)fspath;
+
+ ifp = kmalloc(sizeof(*ifp), GFP_NOFS);
+ if (!ifp) {
+ kfree(fspath);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ifp->btrfs_path = path;
+ ifp->fspath = fspath;
+ ifp->fs_root = fs_root;
+
+ return ifp;
+}
+
+void free_ipath(struct inode_fs_paths *ipath)
+{
+ if (!ipath)
+ return;
+ vfree(ipath->fspath);
+ kfree(ipath);
+}
diff --git a/backref.h b/backref.h
new file mode 100644
index 00000000..3d2ed458
--- /dev/null
+++ b/backref.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2011 STRATO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_BACKREF_H__
+#define __BTRFS_BACKREF_H__
+
+#include "ulist.h"
+#include "extent_io.h"
+
+struct inode_fs_paths {
+ struct btrfs_path *btrfs_path;
+ struct btrfs_root *fs_root;
+ struct btrfs_data_container *fspath;
+};
+
+typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
+ void *ctx);
+
+int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
+ struct btrfs_path *path);
+
+int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
+ struct btrfs_path *path, struct btrfs_key *found_key,
+ u64 *flags);
+
+int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
+ struct btrfs_key *key, struct btrfs_extent_item *ei,
+ u32 item_size, u64 *out_root, u8 *out_level);
+
+int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
+ u64 extent_item_objectid,
+ u64 extent_offset, int search_commit_root,
+ iterate_extent_inodes_t *iterate, void *ctx);
+
+int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path,
+ iterate_extent_inodes_t *iterate, void *ctx);
+
+int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
+
+int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 time_seq, struct ulist **roots);
+char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
+ u32 name_len, unsigned long name_off,
+ struct extent_buffer *eb_in, u64 parent,
+ char *dest, u32 size);
+
+struct btrfs_data_container *init_data_container(u32 total_bytes);
+struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
+ struct btrfs_path *path);
+void free_ipath(struct inode_fs_paths *ipath);
+
+int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
+ u64 start_off, struct btrfs_path *path,
+ struct btrfs_inode_extref **ret_extref,
+ u64 *found_off);
+#endif
diff --git a/bcp b/bcp
new file mode 100755
index 00000000..e7ca641a
--- /dev/null
+++ b/bcp
@@ -0,0 +1,159 @@
+#!/usr/bin/env python
+# Copyright (C) 2007 Oracle. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public
+# License v2 as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this program; if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 021110-1307, USA.
+#
+import sys, os, stat, fcntl
+from optparse import OptionParser
+
+def copylink(srcname, dst, filename, statinfo, force_name):
+ dstname = os.path.join(dst, force_name or filename)
+ if not os.path.exists(dstname):
+ link_target = os.readlink(srcname)
+ os.symlink(link_target, dstname)
+
+def copydev(srcname, dst, filename, statinfo, force_name):
+ devbits = statinfo.st_mode & (stat.S_IFBLK | stat.S_IFCHR)
+ mode = stat.S_IMODE(statinfo.st_mode) | devbits
+ dstname = os.path.join(dst, force_name or filename)
+ if not os.path.exists(dstname):
+ os.mknod(dstname, mode, statinfo.st_rdev)
+
+def copyfile(srcname, dst, filename, statinfo, force_name):
+ written = 0
+ dstname = os.path.join(dst, force_name or filename)
+
+ st_mode = statinfo.st_mode
+ if stat.S_ISLNK(st_mode):
+ copylink(srcname, dst, part, statinfo, None)
+ return
+ elif stat.S_ISBLK(st_mode) or stat.S_ISCHR(st_mode):
+ copydev(srcname, dst, part, statinfo, None)
+ return
+ elif not stat.S_ISREG(st_mode):
+ return
+
+ try:
+ os.unlink(dstname)
+ except:
+ pass
+
+ if options.link:
+ os.link(srcname, dstname)
+ return
+
+ dstf = file(dstname, 'w')
+ srcf = file(srcname, 'r')
+
+ ret = 1
+
+ try:
+ if not options.copy:
+ ret = fcntl.ioctl(dstf.fileno(), 1074041865, srcf.fileno())
+ except:
+ pass
+
+ if ret != 0:
+ while True:
+ buf = srcf.read(256 * 1024)
+ if not buf:
+ break
+ written += len(buf)
+ dstf.write(buf)
+
+ os.chmod(dstname, stat.S_IMODE(statinfo.st_mode))
+ os.chown(dstname, statinfo.st_uid, statinfo.st_gid)
+
+
+usage = "usage: %prog [options]"
+parser = OptionParser(usage=usage)
+parser.add_option("-l", "--link", help="Create hard links", default=False,
+ action="store_true")
+parser.add_option("-c", "--copy", help="Copy file bytes (don't cow)",
+ default=False, action="store_true")
+
+(options,args) = parser.parse_args()
+
+if len(args) < 2:
+ sys.stderr.write("source or destination not specified\n")
+ sys.exit(1)
+
+if options.link and options.copy:
+ sys.stderr.write("Both -l and -c specified, using copy mode\n")
+ options.link = False
+
+
+total_args = len(args)
+src_args = total_args - 1
+orig_dst = args[-1]
+
+if src_args > 1:
+ if not os.path.exists(orig_dst):
+ os.makedirs(orig_dst)
+ if not os.path.isdir(orig_dst):
+ sys.stderr.write("Destination %s is not a directory\n" % orig_dst)
+ exit(1)
+
+for srci in xrange(0, src_args):
+ src = args[srci]
+ if os.path.isfile(src):
+ statinfo = os.lstat(src)
+ force_name = None
+ if src_args == 1:
+ if not os.path.isdir(orig_dst):
+ force_name = os.path.basename(orig_dst)
+ orig_dst = os.path.dirname(orig_dst) or '.'
+ copyfile(src, orig_dst, os.path.basename(src), statinfo, force_name)
+ continue
+
+ if src_args > 1 or os.path.exists(orig_dst):
+ dst = os.path.join(orig_dst, os.path.basename(src))
+ else:
+ dst = orig_dst
+
+ if not os.path.exists(dst):
+ os.makedirs(dst)
+ statinfo = os.stat(src)
+ os.chmod(dst, stat.S_IMODE(statinfo.st_mode))
+ os.chown(dst, statinfo.st_uid, statinfo.st_gid)
+
+ iter = os.walk(src, topdown=True)
+
+ for (dirpath, dirnames, filenames) in iter:
+ for x in dirnames:
+ srcname = os.path.join(dirpath, x)
+ statinfo = os.lstat(srcname)
+
+ part = os.path.relpath(srcname, src)
+
+ if stat.S_ISLNK(statinfo.st_mode):
+ copylink(srcname, dst, part, statinfo, None)
+ continue
+
+ dst_dir = os.path.join(dst, part)
+ if not os.path.exists(dst_dir):
+ os.makedirs(dst_dir)
+
+ os.chmod(dst_dir, stat.S_IMODE(statinfo.st_mode))
+ os.chown(dst_dir, statinfo.st_uid, statinfo.st_gid)
+
+ for f in filenames:
+ srcname = os.path.join(dirpath, f)
+ part = os.path.relpath(srcname, src)
+
+ statinfo = os.lstat(srcname)
+ copyfile(srcname, dst, part, statinfo, None)
+
+
diff --git a/bitops.h b/bitops.h
new file mode 100644
index 00000000..5b35f9fc
--- /dev/null
+++ b/bitops.h
@@ -0,0 +1,224 @@
+#ifndef _PERF_LINUX_BITOPS_H_
+#define _PERF_LINUX_BITOPS_H_
+
+#include <linux/kernel.h>
+
+#ifndef DIV_ROUND_UP
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#endif
+
+#define BITS_PER_BYTE 8
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
+#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
+
+#define for_each_set_bit(bit, addr, size) \
+ for ((bit) = find_first_bit((addr), (size)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_set_bit() but use bit as value to start with */
+#define for_each_set_bit_from(bit, addr, size) \
+ for ((bit) = find_next_bit((addr), (size), (bit)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+static inline void set_bit(int nr, unsigned long *addr)
+{
+ addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
+}
+
+static inline void clear_bit(int nr, unsigned long *addr)
+{
+ addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG));
+}
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+
+static inline unsigned int hweight32(unsigned int w)
+{
+ unsigned int res = w - ((w >> 1) & 0x55555555);
+ res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+ res = (res + (res >> 4)) & 0x0F0F0F0F;
+ res = res + (res >> 8);
+ return (res + (res >> 16)) & 0x000000FF;
+}
+
+static inline unsigned long hweight64(__u64 w)
+{
+#if BITS_PER_LONG == 32
+ return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w);
+#elif BITS_PER_LONG == 64
+ __u64 res = w - ((w >> 1) & 0x5555555555555555ul);
+ res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
+ res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful;
+ res = res + (res >> 8);
+ res = res + (res >> 16);
+ return (res + (res >> 32)) & 0x00000000000000FFul;
+#endif
+}
+
+static inline unsigned long hweight_long(unsigned long w)
+{
+ return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
+}
+
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static __always_inline unsigned long __ffs(unsigned long word)
+{
+ int num = 0;
+
+#if BITS_PER_LONG == 64
+ if ((word & 0xffffffff) == 0) {
+ num += 32;
+ word >>= 32;
+ }
+#endif
+ if ((word & 0xffff) == 0) {
+ num += 16;
+ word >>= 16;
+ }
+ if ((word & 0xff) == 0) {
+ num += 8;
+ word >>= 8;
+ }
+ if ((word & 0xf) == 0) {
+ num += 4;
+ word >>= 4;
+ }
+ if ((word & 0x3) == 0) {
+ num += 2;
+ word >>= 2;
+ }
+ if ((word & 0x1) == 0)
+ num += 1;
+ return num;
+}
+
+#define ffz(x) __ffs(~(x))
+
+/*
+ * Find the first set bit in a memory region.
+ */
+static inline unsigned long
+find_first_bit(const unsigned long *addr, unsigned long size)
+{
+ const unsigned long *p = addr;
+ unsigned long result = 0;
+ unsigned long tmp;
+
+ while (size & ~(BITS_PER_LONG-1)) {
+ if ((tmp = *(p++)))
+ goto found;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+
+ tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+found:
+ return result + __ffs(tmp);
+}
+
+/*
+ * Find the next set bit in a memory region.
+ */
+static inline unsigned long
+find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp &= (~0UL << offset);
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if ((tmp = *(p++)))
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+
+found_first:
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + __ffs(tmp);
+}
+
+/*
+ * This implementation of find_{first,next}_zero_bit was stolen from
+ * Linus' asm-alpha/bitops.h.
+ */
+static inline unsigned long
+find_next_zero_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp |= ~0UL >> (BITS_PER_LONG - offset);
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (~tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if (~(tmp = *(p++)))
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+
+found_first:
+ tmp |= ~0UL << size;
+ if (tmp == ~0UL) /* Are any bits zero? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + ffz(tmp);
+}
+#endif
diff --git a/btrfs-calc-size.c b/btrfs-calc-size.c
new file mode 100644
index 00000000..e3f02d87
--- /dev/null
+++ b/btrfs-calc-size.c
@@ -0,0 +1,506 @@
+/*
+ * Copyright (C) 2011 Red Hat. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <zlib.h>
+#include "kerncompat.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "volumes.h"
+#include "utils.h"
+
+static int verbose = 0;
+static int no_pretty = 0;
+
+struct seek {
+ u64 distance;
+ u64 count;
+ struct rb_node n;
+};
+
+struct root_stats {
+ u64 total_nodes;
+ u64 total_leaves;
+ u64 total_bytes;
+ u64 total_inline;
+ u64 total_seeks;
+ u64 forward_seeks;
+ u64 backward_seeks;
+ u64 total_seek_len;
+ u64 max_seek_len;
+ u64 total_clusters;
+ u64 total_cluster_size;
+ u64 min_cluster_size;
+ u64 max_cluster_size;
+ u64 lowest_bytenr;
+ u64 highest_bytenr;
+ struct rb_root seek_root;
+ int total_levels;
+};
+
+static int add_seek(struct rb_root *root, u64 dist)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct seek *seek = NULL;
+
+ while (*p) {
+ parent = *p;
+ seek = rb_entry(parent, struct seek, n);
+
+ if (dist < seek->distance) {
+ p = &(*p)->rb_left;
+ } else if (dist > seek->distance) {
+ p = &(*p)->rb_right;
+ } else {
+ seek->count++;
+ return 0;
+ }
+ }
+
+ seek = malloc(sizeof(struct seek));
+ if (!seek)
+ return -ENOMEM;
+ seek->distance = dist;
+ seek->count = 1;
+ rb_link_node(&seek->n, parent, p);
+ rb_insert_color(&seek->n, root);
+ return 0;
+}
+
+static int walk_leaf(struct btrfs_root *root, struct btrfs_path *path,
+ struct root_stats *stat, int find_inline)
+{
+ struct extent_buffer *b = path->nodes[0];
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_key found_key;
+ int i;
+
+ stat->total_bytes += root->leafsize;
+ stat->total_leaves++;
+
+ if (!find_inline)
+ return 0;
+
+ for (i = 0; i < btrfs_header_nritems(b); i++) {
+ btrfs_item_key_to_cpu(b, &found_key, i);
+ if (found_key.type != BTRFS_EXTENT_DATA_KEY)
+ continue;
+
+ fi = btrfs_item_ptr(b, i, struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(b, fi) == BTRFS_FILE_EXTENT_INLINE)
+ stat->total_inline +=
+ btrfs_file_extent_inline_item_len(b,
+ btrfs_item_nr(i));
+ }
+
+ return 0;
+}
+
+static u64 calc_distance(u64 block1, u64 block2)
+{
+ if (block1 < block2)
+ return block2 - block1;
+ return block1 - block2;
+}
+
+static int walk_nodes(struct btrfs_root *root, struct btrfs_path *path,
+ struct root_stats *stat, int level, int find_inline)
+{
+ struct extent_buffer *b = path->nodes[level];
+ u64 last_block;
+ u64 cluster_size = root->leafsize;
+ int i;
+ int ret = 0;
+
+ stat->total_bytes += root->nodesize;
+ stat->total_nodes++;
+
+ last_block = btrfs_header_bytenr(b);
+ for (i = 0; i < btrfs_header_nritems(b); i++) {
+ struct extent_buffer *tmp = NULL;
+ u64 cur_blocknr = btrfs_node_blockptr(b, i);
+
+ path->slots[level] = i;
+ if ((level - 1) > 0 || find_inline) {
+ tmp = read_tree_block(root, cur_blocknr,
+ btrfs_level_size(root, level - 1),
+ btrfs_node_ptr_generation(b, i));
+ if (!extent_buffer_uptodate(tmp)) {
+ fprintf(stderr, "Failed to read blocknr %Lu\n",
+ btrfs_node_blockptr(b, i));
+ continue;
+ }
+ path->nodes[level - 1] = tmp;
+ }
+ if (level - 1)
+ ret = walk_nodes(root, path, stat, level - 1,
+ find_inline);
+ else
+ ret = walk_leaf(root, path, stat, find_inline);
+ if (last_block + root->leafsize != cur_blocknr) {
+ u64 distance = calc_distance(last_block +
+ root->leafsize,
+ cur_blocknr);
+ stat->total_seeks++;
+ stat->total_seek_len += distance;
+ if (stat->max_seek_len < distance)
+ stat->max_seek_len = distance;
+ if (add_seek(&stat->seek_root, distance)) {
+ fprintf(stderr, "Error adding new seek\n");
+ ret = -ENOMEM;
+ break;
+ }
+
+ if (last_block < cur_blocknr)
+ stat->forward_seeks++;
+ else
+ stat->backward_seeks++;
+ if (cluster_size != root->leafsize) {
+ stat->total_cluster_size += cluster_size;
+ stat->total_clusters++;
+ if (cluster_size < stat->min_cluster_size)
+ stat->min_cluster_size = cluster_size;
+ if (cluster_size > stat->max_cluster_size)
+ stat->max_cluster_size = cluster_size;
+ }
+ cluster_size = root->leafsize;
+ } else {
+ cluster_size += root->leafsize;
+ }
+ last_block = cur_blocknr;
+ if (cur_blocknr < stat->lowest_bytenr)
+ stat->lowest_bytenr = cur_blocknr;
+ if (cur_blocknr > stat->highest_bytenr)
+ stat->highest_bytenr = cur_blocknr;
+ free_extent_buffer(tmp);
+ if (ret) {
+ fprintf(stderr, "Error walking down path\n");
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void print_seek_histogram(struct root_stats *stat)
+{
+ struct rb_node *n = rb_first(&stat->seek_root);
+ struct seek *seek;
+ u64 tick_interval;
+ u64 group_start = 0;
+ u64 group_count = 0;
+ u64 group_end = 0;
+ u64 i;
+ u64 max_seek = stat->max_seek_len;
+ int digits = 1;
+
+ if (stat->total_seeks < 20)
+ return;
+
+ while ((max_seek /= 10))
+ digits++;
+
+ /* Make a tick count as 5% of the total seeks */
+ tick_interval = stat->total_seeks / 20;
+ printf("\tSeek histogram\n");
+ for (; n; n = rb_next(n)) {
+ u64 ticks, gticks = 0;
+
+ seek = rb_entry(n, struct seek, n);
+ ticks = seek->count / tick_interval;
+ if (group_count)
+ gticks = group_count / tick_interval;
+
+ if (ticks <= 2 && gticks <= 2) {
+ if (group_count == 0)
+ group_start = seek->distance;
+ group_end = seek->distance;
+ group_count += seek->count;
+ continue;
+ }
+
+ if (group_count) {
+
+ gticks = group_count / tick_interval;
+ printf("\t\t%*Lu - %*Lu: %*Lu ", digits, group_start,
+ digits, group_end, digits, group_count);
+ if (gticks) {
+ for (i = 0; i < gticks; i++)
+ printf("#");
+ printf("\n");
+ } else {
+ printf("|\n");
+ }
+ group_count = 0;
+ }
+
+ if (ticks <= 2)
+ continue;
+
+ printf("\t\t%*Lu - %*Lu: %*Lu ", digits, seek->distance,
+ digits, seek->distance, digits, seek->count);
+ for (i = 0; i < ticks; i++)
+ printf("#");
+ printf("\n");
+ }
+ if (group_count) {
+ u64 gticks;
+
+ gticks = group_count / tick_interval;
+ printf("\t\t%*Lu - %*Lu: %*Lu ", digits, group_start,
+ digits, group_end, digits, group_count);
+ if (gticks) {
+ for (i = 0; i < gticks; i++)
+ printf("#");
+ printf("\n");
+ } else {
+ printf("|\n");
+ }
+ group_count = 0;
+ }
+}
+
+static void timeval_subtract(struct timeval *result,struct timeval *x,
+ struct timeval *y)
+{
+ if (x->tv_usec < y->tv_usec) {
+ int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
+ y->tv_usec -= 1000000 * nsec;
+ y->tv_sec += nsec;
+ }
+
+ if (x->tv_usec - y->tv_usec > 1000000) {
+ int nsec = (x->tv_usec - y->tv_usec) / 1000000;
+ y->tv_usec += 1000000 * nsec;
+ y->tv_sec -= nsec;
+ }
+
+ result->tv_sec = x->tv_sec - y->tv_sec;
+ result->tv_usec = x->tv_usec - y->tv_usec;
+}
+
+static int calc_root_size(struct btrfs_root *tree_root, struct btrfs_key *key,
+ int find_inline)
+{
+ struct btrfs_root *root;
+ struct btrfs_path *path;
+ struct rb_node *n;
+ struct timeval start, end, diff = {0};
+ struct root_stats stat;
+ int level;
+ int ret = 0;
+ int size_fail = 0;
+
+ root = btrfs_read_fs_root(tree_root->fs_info, key);
+ if (IS_ERR(root)) {
+ fprintf(stderr, "Failed to read root %Lu\n", key->objectid);
+ return 1;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ fprintf(stderr, "Could not allocate path\n");
+ return 1;
+ }
+
+ memset(&stat, 0, sizeof(stat));
+ level = btrfs_header_level(root->node);
+ stat.lowest_bytenr = btrfs_header_bytenr(root->node);
+ stat.highest_bytenr = stat.lowest_bytenr;
+ stat.min_cluster_size = (u64)-1;
+ stat.max_cluster_size = root->leafsize;
+ path->nodes[level] = root->node;
+ if (gettimeofday(&start, NULL)) {
+ fprintf(stderr, "Error getting time: %d\n", errno);
+ goto out;
+ }
+ if (!level) {
+ ret = walk_leaf(root, path, &stat, find_inline);
+ if (ret)
+ goto out;
+ goto out_print;
+ }
+
+ ret = walk_nodes(root, path, &stat, level, find_inline);
+ if (ret)
+ goto out;
+ if (gettimeofday(&end, NULL)) {
+ fprintf(stderr, "Error getting time: %d\n", errno);
+ goto out;
+ }
+ timeval_subtract(&diff, &end, &start);
+out_print:
+ if (stat.min_cluster_size == (u64)-1) {
+ stat.min_cluster_size = 0;
+ stat.total_clusters = 1;
+ }
+
+ if (no_pretty || size_fail) {
+ printf("\tTotal size: %Lu\n", stat.total_bytes);
+ printf("\t\tInline data: %Lu\n", stat.total_inline);
+ printf("\tTotal seeks: %Lu\n", stat.total_seeks);
+ printf("\t\tForward seeks: %Lu\n", stat.forward_seeks);
+ printf("\t\tBackward seeks: %Lu\n", stat.backward_seeks);
+ printf("\t\tAvg seek len: %llu\n", stat.total_seeks ?
+ stat.total_seek_len / stat.total_seeks : 0);
+ print_seek_histogram(&stat);
+ printf("\tTotal clusters: %Lu\n", stat.total_clusters);
+ printf("\t\tAvg cluster size: %Lu\n", stat.total_cluster_size /
+ stat.total_clusters);
+ printf("\t\tMin cluster size: %Lu\n", stat.min_cluster_size);
+ printf("\t\tMax cluster size: %Lu\n", stat.max_cluster_size);
+ printf("\tTotal disk spread: %Lu\n", stat.highest_bytenr -
+ stat.lowest_bytenr);
+ printf("\tTotal read time: %d s %d us\n", (int)diff.tv_sec,
+ (int)diff.tv_usec);
+ printf("\tLevels: %d\n", level + 1);
+ } else {
+ printf("\tTotal size: %s\n", pretty_size(stat.total_bytes));
+ printf("\t\tInline data: %s\n", pretty_size(stat.total_inline));
+ printf("\tTotal seeks: %Lu\n", stat.total_seeks);
+ printf("\t\tForward seeks: %Lu\n", stat.forward_seeks);
+ printf("\t\tBackward seeks: %Lu\n", stat.backward_seeks);
+ printf("\t\tAvg seek len: %s\n", stat.total_seeks ?
+ pretty_size(stat.total_seek_len / stat.total_seeks) :
+ pretty_size(0));
+ print_seek_histogram(&stat);
+ printf("\tTotal clusters: %Lu\n", stat.total_clusters);
+ printf("\t\tAvg cluster size: %s\n",
+ pretty_size((stat.total_cluster_size /
+ stat.total_clusters)));
+ printf("\t\tMin cluster size: %s\n",
+ pretty_size(stat.min_cluster_size));
+ printf("\t\tMax cluster size: %s\n",
+ pretty_size(stat.max_cluster_size));
+ printf("\tTotal disk spread: %s\n",
+ pretty_size(stat.highest_bytenr -
+ stat.lowest_bytenr));
+ printf("\tTotal read time: %d s %d us\n", (int)diff.tv_sec,
+ (int)diff.tv_usec);
+ printf("\tLevels: %d\n", level + 1);
+ }
+out:
+ while ((n = rb_first(&stat.seek_root)) != NULL) {
+ struct seek *seek = rb_entry(n, struct seek, n);
+ rb_erase(n, &stat.seek_root);
+ free(seek);
+ }
+
+ /*
+ * We only use path to save node data in iterating,
+ * without holding eb's ref_cnt in path.
+ * Don't use btrfs_free_path() here, it will free these
+ * eb again, and cause many problems, as negative ref_cnt
+ * or invalid memory access.
+ */
+ free(path);
+ return ret;
+}
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: calc-size [-v] [-b] <device>\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct btrfs_key key;
+ struct btrfs_root *root;
+ int opt;
+ int ret = 0;
+
+ while ((opt = getopt(argc, argv, "vb")) != -1) {
+ switch (opt) {
+ case 'v':
+ verbose++;
+ break;
+ case 'b':
+ no_pretty = 1;
+ break;
+ default:
+ usage();
+ exit(1);
+ }
+ }
+
+ set_argv0(argv);
+ argc = argc - optind;
+ if (check_argc_min(argc, 1)) {
+ usage();
+ exit(1);
+ }
+
+ /*
+ if ((ret = check_mounted(argv[optind])) < 0) {
+ fprintf(stderr, "Could not check mount status: %d\n", ret);
+ if (ret == -EACCES)
+ fprintf(stderr, "Maybe you need to run as root?\n");
+ return ret;
+ } else if (ret) {
+ fprintf(stderr, "%s is currently mounted. Aborting.\n",
+ argv[optind]);
+ return -EBUSY;
+ }
+ */
+
+ root = open_ctree(argv[optind], 0, 0);
+ if (!root) {
+ fprintf(stderr, "Couldn't open ctree\n");
+ exit(1);
+ }
+
+ printf("Calculating size of root tree\n");
+ key.objectid = BTRFS_ROOT_TREE_OBJECTID;
+ ret = calc_root_size(root, &key, 0);
+ if (ret)
+ goto out;
+
+ printf("Calculating size of extent tree\n");
+ key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
+ ret = calc_root_size(root, &key, 0);
+ if (ret)
+ goto out;
+
+ printf("Calculating size of csum tree\n");
+ key.objectid = BTRFS_CSUM_TREE_OBJECTID;
+ ret = calc_root_size(root, &key, 0);
+ if (ret)
+ goto out;
+
+ key.objectid = BTRFS_FS_TREE_OBJECTID;
+ key.offset = (u64)-1;
+ printf("Calculatin' size of fs tree\n");
+ ret = calc_root_size(root, &key, 1);
+ if (ret)
+ goto out;
+out:
+ close_ctree(root);
+ btrfs_close_all_devices();
+ return ret;
+}
diff --git a/btrfs-completion b/btrfs-completion
new file mode 100644
index 00000000..a34191bd
--- /dev/null
+++ b/btrfs-completion
@@ -0,0 +1,152 @@
+# original by Alfredo Esteban <aedelatorre at xxxxxxxxx>
+# (http://www.spinics.net/lists/linux-btrfs/msg15899.html)
+# edited by Joseph Wang <joequant at gmail.com>
+# (http://lists.alioth.debian.org/pipermail/bash-completion-devel/2013-June/004868.html)
+# edited by John C F <john.ch.fr at gmail.com> on 2015-02-02
+
+_btrfs_devs()
+{
+ local DEVS
+ DEVS=''; while read dev; do DEVS+="$dev "; done < <(lsblk -pnro name)
+ COMPREPLY+=( $( compgen -W "$DEVS" -- "$cur" ) )
+}
+
+_btrfs_mnts()
+{
+ local MNTS
+ MNTS=''
+ while read mnt; do MNTS+="$mnt "
+ done < <(mount | awk '{print $3}')
+ COMPREPLY+=( $( compgen -W "$MNTS" -- "$cur" ) )
+}
+
+_btrfs()
+{
+ local cur prev words cword
+ _init_completion || return
+
+ COMPREPLY=()
+
+ local cmd=${words[1]}
+
+ commands='subvolume filesystem balance device scrub check rescue restore inspect-internal property send receive quota qgroup replace help version'
+ commands_subvolume='create delete list snapshot find-new get-default set-default show sync'
+ commands_filesystem='defragment sync resize show df label usage'
+ commands_balance='start pause cancel resume status'
+ commands_device='scan add delete remove ready stats usage'
+ commands_scrub='start cancel resume status'
+ commands_rescue='chunk-recover super-recover'
+ commands_inspect_internal='inode-resolve logical-resolve subvolid-resolve rootid min-dev-size'
+ commands_property='get set list'
+ commands_quota='enable disable rescan'
+ commands_qgroup='assign remove create destroy show limit'
+ commands_replace='start status cancel'
+
+ if [[ "$cur" == -* && $cword -le 3 && "$cmd" != "help" ]]; then
+ COMPREPLY=( $( compgen -W '--help' -- "$cur" ) )
+ return 0
+ fi
+
+ if [[ $cword -eq 1 ]]; then
+ COMPREPLY=( $( compgen -W "$commands" -- "$cur" ) )
+ return 0
+ elif [[ $cword -eq 2 ]]; then
+ case $cmd in
+ subvolume)
+ opts="$commands_subvolume"
+ ;;
+ filesystem)
+ opts="$commands_filesystem"
+ ;;
+ balance)
+ opts="$commands_balance"
+ ;;
+ device)
+ opts="$commands_device"
+ ;;
+ scrub)
+ opts="$commands_scrub"
+ ;;
+ check)
+ _btrfs_devs
+ return 0
+ ;;
+ rescue)
+ opts="$commands_rescue"
+ ;;
+ restore)
+ _btrfs_devs
+ return 0
+ ;;
+ inspect-internal)
+ opts="$commands_inspect_internal"
+ ;;
+ property)
+ opts="$commands_property"
+ ;;
+ send|receive)
+ _filedir -d
+ return 0
+ ;;
+ quota)
+ opts="$commands_quota"
+ ;;
+ qgroup)
+ opts="$commands_qgroup"
+ ;;
+ replace)
+ opts="$commands_replace"
+ ;;
+ help)
+ opts="--full"
+ ;;
+ version)
+ return 0
+ ;;
+ esac
+ COMPREPLY=( $( compgen -W "$opts" -- "$cur" ) )
+ return 0
+ elif [[ $cword -eq 3 ]]; then
+ case $cmd in
+ filesystem)
+ case $prev in
+ defragment)
+ _filedir
+ return 0
+ ;;
+ label)
+ _btrfs_mnts
+ _btrfs_devs
+ return 0
+ ;;
+ esac
+ ;;
+ device|rescue)
+ _btrfs_devs
+ return 0
+ ;;
+ replace)
+ case $prev in
+ status|cancel)
+ _btrfs_mnts
+ return 0
+ ;;
+ start)
+ _btrfs_devs
+ return 0
+ ;;
+ esac
+ ;;
+ esac
+ fi
+
+ if [[ "$cmd" == "receive" && "$prev" == "-f" ]]; then
+ _filedir
+ return 0
+ fi
+
+ _filedir -d
+ return 0
+}
+
+complete -F _btrfs btrfs
diff --git a/btrfs-convert.c b/btrfs-convert.c
new file mode 100644
index 00000000..4baa68ec
--- /dev/null
+++ b/btrfs-convert.c
@@ -0,0 +1,3206 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "kerncompat.h"
+
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <uuid/uuid.h>
+#include <linux/limits.h>
+#include <getopt.h>
+
+#include "ctree.h"
+#include "disk-io.h"
+#include "volumes.h"
+#include "transaction.h"
+#include "crc32c.h"
+#include "utils.h"
+#include "task-utils.h"
+#include <ext2fs/ext2_fs.h>
+#include <ext2fs/ext2fs.h>
+#include <ext2fs/ext2_ext_attr.h>
+
+#define INO_OFFSET (BTRFS_FIRST_FREE_OBJECTID - EXT2_ROOT_INO)
+#define CONV_IMAGE_SUBVOL_OBJECTID BTRFS_FIRST_FREE_OBJECTID
+
+struct task_ctx {
+ uint32_t max_copy_inodes;
+ uint32_t cur_copy_inodes;
+ struct task_info *info;
+};
+
+static void *print_copied_inodes(void *p)
+{
+ struct task_ctx *priv = p;
+ const char work_indicator[] = { '.', 'o', 'O', 'o' };
+ uint32_t count = 0;
+
+ task_period_start(priv->info, 1000 /* 1s */);
+ while (1) {
+ count++;
+ printf("copy inodes [%c] [%10d/%10d]\r",
+ work_indicator[count % 4], priv->cur_copy_inodes,
+ priv->max_copy_inodes);
+ fflush(stdout);
+ task_period_wait(priv->info);
+ }
+
+ return NULL;
+}
+
+static int after_copied_inodes(void *p)
+{
+ printf("\n");
+ fflush(stdout);
+
+ return 0;
+}
+
+struct btrfs_convert_context;
+struct btrfs_convert_operations {
+ const char *name;
+ int (*open_fs)(struct btrfs_convert_context *cctx, const char *devname);
+ int (*alloc_block)(struct btrfs_convert_context *cctx, u64 goal,
+ u64 *block_ret);
+ int (*alloc_block_range)(struct btrfs_convert_context *cctx, u64 goal,
+ int num, u64 *block_ret);
+ int (*test_block)(struct btrfs_convert_context *cctx, u64 block);
+ void (*free_block)(struct btrfs_convert_context *cctx, u64 block);
+ void (*free_block_range)(struct btrfs_convert_context *cctx, u64 block,
+ int num);
+ int (*copy_inodes)(struct btrfs_convert_context *cctx,
+ struct btrfs_root *root, int datacsum,
+ int packing, int noxattr, struct task_ctx *p);
+ void (*close_fs)(struct btrfs_convert_context *cctx);
+};
+
+struct btrfs_convert_context {
+ u32 blocksize;
+ u32 first_data_block;
+ u32 block_count;
+ u32 inodes_count;
+ u32 free_inodes_count;
+ u64 total_bytes;
+ char *volume_name;
+ const struct btrfs_convert_operations *convert_ops;
+
+ /* The accurate used space of old filesystem */
+ struct cache_tree used;
+
+ /* Batched ranges which must be covered by data chunks */
+ struct cache_tree data_chunks;
+
+ /* Free space which is not covered by data_chunks */
+ struct cache_tree free;
+
+ void *fs_data;
+};
+
+static void init_convert_context(struct btrfs_convert_context *cctx)
+{
+ cache_tree_init(&cctx->used);
+ cache_tree_init(&cctx->data_chunks);
+ cache_tree_init(&cctx->free);
+}
+
+static void clean_convert_context(struct btrfs_convert_context *cctx)
+{
+ free_extent_cache_tree(&cctx->used);
+ free_extent_cache_tree(&cctx->data_chunks);
+ free_extent_cache_tree(&cctx->free);
+}
+
+static inline int convert_alloc_block(struct btrfs_convert_context *cctx,
+ u64 goal, u64 *ret)
+{
+ return cctx->convert_ops->alloc_block(cctx, goal, ret);
+}
+
+static inline int convert_alloc_block_range(struct btrfs_convert_context *cctx,
+ u64 goal, int num, u64 *ret)
+{
+ return cctx->convert_ops->alloc_block_range(cctx, goal, num, ret);
+}
+
+static inline int convert_test_block(struct btrfs_convert_context *cctx,
+ u64 block)
+{
+ return cctx->convert_ops->test_block(cctx, block);
+}
+
+static inline void convert_free_block(struct btrfs_convert_context *cctx,
+ u64 block)
+{
+ cctx->convert_ops->free_block(cctx, block);
+}
+
+static inline void convert_free_block_range(struct btrfs_convert_context *cctx,
+ u64 block, int num)
+{
+ cctx->convert_ops->free_block_range(cctx, block, num);
+}
+
+static inline int copy_inodes(struct btrfs_convert_context *cctx,
+ struct btrfs_root *root, int datacsum,
+ int packing, int noxattr, struct task_ctx *p)
+{
+ return cctx->convert_ops->copy_inodes(cctx, root, datacsum, packing,
+ noxattr, p);
+}
+
+static inline void convert_close_fs(struct btrfs_convert_context *cctx)
+{
+ cctx->convert_ops->close_fs(cctx);
+}
+
+/*
+ * Open Ext2fs in readonly mode, read block allocation bitmap and
+ * inode bitmap into memory.
+ */
+static int ext2_open_fs(struct btrfs_convert_context *cctx, const char *name)
+{
+ errcode_t ret;
+ ext2_filsys ext2_fs;
+ ext2_ino_t ino;
+ ret = ext2fs_open(name, 0, 0, 0, unix_io_manager, &ext2_fs);
+ if (ret) {
+ fprintf(stderr, "ext2fs_open: %s\n", error_message(ret));
+ goto fail;
+ }
+ ret = ext2fs_read_inode_bitmap(ext2_fs);
+ if (ret) {
+ fprintf(stderr, "ext2fs_read_inode_bitmap: %s\n",
+ error_message(ret));
+ goto fail;
+ }
+ ret = ext2fs_read_block_bitmap(ext2_fs);
+ if (ret) {
+ fprintf(stderr, "ext2fs_read_block_bitmap: %s\n",
+ error_message(ret));
+ goto fail;
+ }
+ /*
+ * search each block group for a free inode. this set up
+ * uninit block/inode bitmaps appropriately.
+ */
+ ino = 1;
+ while (ino <= ext2_fs->super->s_inodes_count) {
+ ext2_ino_t foo;
+ ext2fs_new_inode(ext2_fs, ino, 0, NULL, &foo);
+ ino += EXT2_INODES_PER_GROUP(ext2_fs->super);
+ }
+
+ if (!(ext2_fs->super->s_feature_incompat &
+ EXT2_FEATURE_INCOMPAT_FILETYPE)) {
+ fprintf(stderr, "filetype feature is missing\n");
+ goto fail;
+ }
+
+ cctx->fs_data = ext2_fs;
+ cctx->blocksize = ext2_fs->blocksize;
+ cctx->block_count = ext2_fs->super->s_blocks_count;
+ cctx->total_bytes = ext2_fs->blocksize * ext2_fs->super->s_blocks_count;
+ cctx->volume_name = strndup(ext2_fs->super->s_volume_name, 16);
+ cctx->first_data_block = ext2_fs->super->s_first_data_block;
+ cctx->inodes_count = ext2_fs->super->s_inodes_count;
+ cctx->free_inodes_count = ext2_fs->super->s_free_inodes_count;
+ return 0;
+fail:
+ return -1;
+}
+
+static void ext2_close_fs(struct btrfs_convert_context *cctx)
+{
+ if (cctx->volume_name) {
+ free(cctx->volume_name);
+ cctx->volume_name = NULL;
+ }
+ ext2fs_close(cctx->fs_data);
+}
+
+static int ext2_alloc_block(struct btrfs_convert_context *cctx,
+ u64 goal, u64 *block_ret)
+{
+ ext2_filsys fs = cctx->fs_data;
+ blk_t block;
+
+ if (!ext2fs_new_block(fs, goal, NULL, &block)) {
+ ext2fs_fast_mark_block_bitmap(fs->block_map, block);
+ *block_ret = block;
+ return 0;
+ }
+ return -ENOSPC;
+}
+
+static int ext2_alloc_block_range(struct btrfs_convert_context *cctx, u64 goal,
+ int num, u64 *block_ret)
+{
+ ext2_filsys fs = cctx->fs_data;
+ blk_t block;
+ ext2fs_block_bitmap bitmap = fs->block_map;
+ blk_t start = ext2fs_get_block_bitmap_start(bitmap);
+ blk_t end = ext2fs_get_block_bitmap_end(bitmap);
+
+ for (block = max_t(u64, goal, start); block + num < end; block++) {
+ if (ext2fs_fast_test_block_bitmap_range(bitmap, block, num)) {
+ ext2fs_fast_mark_block_bitmap_range(bitmap, block,
+ num);
+ *block_ret = block;
+ return 0;
+ }
+ }
+ return -ENOSPC;
+}
+
+static void ext2_free_block(struct btrfs_convert_context *cctx, u64 block)
+{
+ ext2_filsys fs = cctx->fs_data;
+
+ BUG_ON(block != (blk_t)block);
+ ext2fs_fast_unmark_block_bitmap(fs->block_map, block);
+}
+
+static void ext2_free_block_range(struct btrfs_convert_context *cctx, u64 block, int num)
+{
+ ext2_filsys fs = cctx->fs_data;
+
+ BUG_ON(block != (blk_t)block);
+ ext2fs_fast_unmark_block_bitmap_range(fs->block_map, block, num);
+}
+
+static int cache_free_extents(struct btrfs_root *root,
+ struct btrfs_convert_context *cctx)
+
+{
+ int i, ret = 0;
+ blk_t block;
+ u64 bytenr;
+ u64 blocksize = cctx->blocksize;
+
+ block = cctx->first_data_block;
+ for (; block < cctx->block_count; block++) {
+ if (convert_test_block(cctx, block))
+ continue;
+ bytenr = block * blocksize;
+ ret = set_extent_dirty(&root->fs_info->free_space_cache,
+ bytenr, bytenr + blocksize - 1, 0);
+ BUG_ON(ret);
+ }
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ bytenr &= ~((u64)BTRFS_STRIPE_LEN - 1);
+ if (bytenr >= blocksize * cctx->block_count)
+ break;
+ clear_extent_dirty(&root->fs_info->free_space_cache, bytenr,
+ bytenr + BTRFS_STRIPE_LEN - 1, 0);
+ }
+
+ clear_extent_dirty(&root->fs_info->free_space_cache,
+ 0, BTRFS_SUPER_INFO_OFFSET - 1, 0);
+
+ return 0;
+}
+
+static int custom_alloc_extent(struct btrfs_root *root, u64 num_bytes,
+ u64 hint_byte, struct btrfs_key *ins,
+ int metadata)
+{
+ u64 start;
+ u64 end;
+ u64 last = hint_byte;
+ int ret;
+ int wrapped = 0;
+ struct btrfs_block_group_cache *cache;
+
+ while(1) {
+ ret = find_first_extent_bit(&root->fs_info->free_space_cache,
+ last, &start, &end, EXTENT_DIRTY);
+ if (ret) {
+ if (wrapped++ == 0) {
+ last = 0;
+ continue;
+ } else {
+ goto fail;
+ }
+ }
+
+ start = max(last, start);
+ last = end + 1;
+ if (last - start < num_bytes)
+ continue;
+
+ last = start + num_bytes;
+ if (test_range_bit(&root->fs_info->pinned_extents,
+ start, last - 1, EXTENT_DIRTY, 0))
+ continue;
+
+ cache = btrfs_lookup_block_group(root->fs_info, start);
+ BUG_ON(!cache);
+ if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM ||
+ last > cache->key.objectid + cache->key.offset) {
+ last = cache->key.objectid + cache->key.offset;
+ continue;
+ }
+
+ if (metadata) {
+ BUG_ON(num_bytes != root->nodesize);
+ if (check_crossing_stripes(start, num_bytes)) {
+ last = round_down(start + num_bytes,
+ BTRFS_STRIPE_LEN);
+ continue;
+ }
+ }
+ clear_extent_dirty(&root->fs_info->free_space_cache,
+ start, start + num_bytes - 1, 0);
+
+ ins->objectid = start;
+ ins->offset = num_bytes;
+ ins->type = BTRFS_EXTENT_ITEM_KEY;
+ return 0;
+ }
+fail:
+ fprintf(stderr, "not enough free space\n");
+ return -ENOSPC;
+}
+
+static int intersect_with_sb(u64 bytenr, u64 num_bytes)
+{
+ int i;
+ u64 offset;
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ offset = btrfs_sb_offset(i);
+ offset &= ~((u64)BTRFS_STRIPE_LEN - 1);
+
+ if (bytenr < offset + BTRFS_STRIPE_LEN &&
+ bytenr + num_bytes > offset)
+ return 1;
+ }
+ return 0;
+}
+
+static int custom_free_extent(struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes)
+{
+ return intersect_with_sb(bytenr, num_bytes);
+}
+
+static struct btrfs_extent_ops extent_ops = {
+ .alloc_extent = custom_alloc_extent,
+ .free_extent = custom_free_extent,
+};
+
+static int convert_insert_dirent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const char *name, size_t name_len,
+ u64 dir, u64 objectid,
+ u8 file_type, u64 index_cnt,
+ struct btrfs_inode_item *inode)
+{
+ int ret;
+ u64 inode_size;
+ struct btrfs_key location = {
+ .objectid = objectid,
+ .offset = 0,
+ .type = BTRFS_INODE_ITEM_KEY,
+ };
+
+ ret = btrfs_insert_dir_item(trans, root, name, name_len,
+ dir, &location, file_type, index_cnt);
+ if (ret)
+ return ret;
+ ret = btrfs_insert_inode_ref(trans, root, name, name_len,
+ objectid, dir, index_cnt);
+ if (ret)
+ return ret;
+ inode_size = btrfs_stack_inode_size(inode) + name_len * 2;
+ btrfs_set_stack_inode_size(inode, inode_size);
+
+ return 0;
+}
+
+struct dir_iterate_data {
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root;
+ struct btrfs_inode_item *inode;
+ u64 objectid;
+ u64 index_cnt;
+ u64 parent;
+ int errcode;
+};
+
+static u8 filetype_conversion_table[EXT2_FT_MAX] = {
+ [EXT2_FT_UNKNOWN] = BTRFS_FT_UNKNOWN,
+ [EXT2_FT_REG_FILE] = BTRFS_FT_REG_FILE,
+ [EXT2_FT_DIR] = BTRFS_FT_DIR,
+ [EXT2_FT_CHRDEV] = BTRFS_FT_CHRDEV,
+ [EXT2_FT_BLKDEV] = BTRFS_FT_BLKDEV,
+ [EXT2_FT_FIFO] = BTRFS_FT_FIFO,
+ [EXT2_FT_SOCK] = BTRFS_FT_SOCK,
+ [EXT2_FT_SYMLINK] = BTRFS_FT_SYMLINK,
+};
+
+static int dir_iterate_proc(ext2_ino_t dir, int entry,
+ struct ext2_dir_entry *dirent,
+ int offset, int blocksize,
+ char *buf,void *priv_data)
+{
+ int ret;
+ int file_type;
+ u64 objectid;
+ char dotdot[] = "..";
+ struct dir_iterate_data *idata = (struct dir_iterate_data *)priv_data;
+ int name_len;
+
+ name_len = dirent->name_len & 0xFF;
+
+ objectid = dirent->inode + INO_OFFSET;
+ if (!strncmp(dirent->name, dotdot, name_len)) {
+ if (name_len == 2) {
+ BUG_ON(idata->parent != 0);
+ idata->parent = objectid;
+ }
+ return 0;
+ }
+ if (dirent->inode < EXT2_GOOD_OLD_FIRST_INO)
+ return 0;
+
+ file_type = dirent->name_len >> 8;
+ BUG_ON(file_type > EXT2_FT_SYMLINK);
+
+ ret = convert_insert_dirent(idata->trans, idata->root, dirent->name,
+ name_len, idata->objectid, objectid,
+ filetype_conversion_table[file_type],
+ idata->index_cnt, idata->inode);
+ if (ret < 0) {
+ idata->errcode = ret;
+ return BLOCK_ABORT;
+ }
+
+ idata->index_cnt++;
+ return 0;
+}
+
+static int create_dir_entries(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ struct btrfs_inode_item *btrfs_inode,
+ ext2_filsys ext2_fs, ext2_ino_t ext2_ino)
+{
+ int ret;
+ errcode_t err;
+ struct dir_iterate_data data = {
+ .trans = trans,
+ .root = root,
+ .inode = btrfs_inode,
+ .objectid = objectid,
+ .index_cnt = 2,
+ .parent = 0,
+ .errcode = 0,
+ };
+
+ err = ext2fs_dir_iterate2(ext2_fs, ext2_ino, 0, NULL,
+ dir_iterate_proc, &data);
+ if (err)
+ goto error;
+ ret = data.errcode;
+ if (ret == 0 && data.parent == objectid) {
+ ret = btrfs_insert_inode_ref(trans, root, "..", 2,
+ objectid, objectid, 0);
+ }
+ return ret;
+error:
+ fprintf(stderr, "ext2fs_dir_iterate2: %s\n", error_message(err));
+ return -1;
+}
+
+static int read_disk_extent(struct btrfs_root *root, u64 bytenr,
+ u32 num_bytes, char *buffer)
+{
+ int ret;
+ struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
+
+ ret = pread(fs_devs->latest_bdev, buffer, num_bytes, bytenr);
+ if (ret != num_bytes)
+ goto fail;
+ ret = 0;
+fail:
+ if (ret > 0)
+ ret = -1;
+ return ret;
+}
+
+static int csum_disk_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 disk_bytenr, u64 num_bytes)
+{
+ u32 blocksize = root->sectorsize;
+ u64 offset;
+ char *buffer;
+ int ret = 0;
+
+ buffer = malloc(blocksize);
+ if (!buffer)
+ return -ENOMEM;
+ for (offset = 0; offset < num_bytes; offset += blocksize) {
+ ret = read_disk_extent(root, disk_bytenr + offset,
+ blocksize, buffer);
+ if (ret)
+ break;
+ ret = btrfs_csum_file_block(trans,
+ root->fs_info->csum_root,
+ disk_bytenr + num_bytes,
+ disk_bytenr + offset,
+ buffer, blocksize);
+ if (ret)
+ break;
+ }
+ free(buffer);
+ return ret;
+}
+
+struct blk_iterate_data {
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root;
+ struct btrfs_inode_item *inode;
+ u64 objectid;
+ u64 first_block;
+ u64 disk_block;
+ u64 num_blocks;
+ u64 boundary;
+ int checksum;
+ int errcode;
+};
+
+static void init_blk_iterate_data(struct blk_iterate_data *data,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_inode_item *inode,
+ u64 objectid, int checksum)
+{
+ data->trans = trans;
+ data->root = root;
+ data->inode = inode;
+ data->objectid = objectid;
+ data->first_block = 0;
+ data->disk_block = 0;
+ data->num_blocks = 0;
+ data->boundary = (u64)-1;
+ data->checksum = checksum;
+ data->errcode = 0;
+}
+
+static int record_file_blocks(struct blk_iterate_data *data,
+ u64 file_block, u64 disk_block, u64 num_blocks)
+{
+ int ret;
+ struct btrfs_root *root = data->root;
+ u64 file_pos = file_block * root->sectorsize;
+ u64 disk_bytenr = disk_block * root->sectorsize;
+ u64 num_bytes = num_blocks * root->sectorsize;
+ ret = btrfs_record_file_extent(data->trans, data->root,
+ data->objectid, data->inode, file_pos,
+ disk_bytenr, num_bytes);
+
+ if (ret || !data->checksum || disk_bytenr == 0)
+ return ret;
+
+ return csum_disk_extent(data->trans, data->root, disk_bytenr,
+ num_bytes);
+}
+
+static int block_iterate_proc(u64 disk_block, u64 file_block,
+ struct blk_iterate_data *idata)
+{
+ int ret = 0;
+ int sb_region;
+ int do_barrier;
+ struct btrfs_root *root = idata->root;
+ struct btrfs_block_group_cache *cache;
+ u64 bytenr = disk_block * root->sectorsize;
+
+ sb_region = intersect_with_sb(bytenr, root->sectorsize);
+ do_barrier = sb_region || disk_block >= idata->boundary;
+ if ((idata->num_blocks > 0 && do_barrier) ||
+ (file_block > idata->first_block + idata->num_blocks) ||
+ (disk_block != idata->disk_block + idata->num_blocks)) {
+ if (idata->num_blocks > 0) {
+ ret = record_file_blocks(idata, idata->first_block,
+ idata->disk_block,
+ idata->num_blocks);
+ if (ret)
+ goto fail;
+ idata->first_block += idata->num_blocks;
+ idata->num_blocks = 0;
+ }
+ if (file_block > idata->first_block) {
+ ret = record_file_blocks(idata, idata->first_block,
+ 0, file_block - idata->first_block);
+ if (ret)
+ goto fail;
+ }
+
+ if (sb_region) {
+ bytenr += BTRFS_STRIPE_LEN - 1;
+ bytenr &= ~((u64)BTRFS_STRIPE_LEN - 1);
+ } else {
+ cache = btrfs_lookup_block_group(root->fs_info, bytenr);
+ BUG_ON(!cache);
+ bytenr = cache->key.objectid + cache->key.offset;
+ }
+
+ idata->first_block = file_block;
+ idata->disk_block = disk_block;
+ idata->boundary = bytenr / root->sectorsize;
+ }
+ idata->num_blocks++;
+fail:
+ return ret;
+}
+
+static int __block_iterate_proc(ext2_filsys fs, blk_t *blocknr,
+ e2_blkcnt_t blockcnt, blk_t ref_block,
+ int ref_offset, void *priv_data)
+{
+ int ret;
+ struct blk_iterate_data *idata;
+ idata = (struct blk_iterate_data *)priv_data;
+ ret = block_iterate_proc(*blocknr, blockcnt, idata);
+ if (ret) {
+ idata->errcode = ret;
+ return BLOCK_ABORT;
+ }
+ return 0;
+}
+
+/*
+ * traverse file's data blocks, record these data blocks as file extents.
+ */
+static int create_file_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ struct btrfs_inode_item *btrfs_inode,
+ ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
+ int datacsum, int packing)
+{
+ int ret;
+ char *buffer = NULL;
+ errcode_t err;
+ u32 last_block;
+ u32 sectorsize = root->sectorsize;
+ u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
+ struct blk_iterate_data data;
+
+ init_blk_iterate_data(&data, trans, root, btrfs_inode, objectid,
+ datacsum);
+
+ err = ext2fs_block_iterate2(ext2_fs, ext2_ino, BLOCK_FLAG_DATA_ONLY,
+ NULL, __block_iterate_proc, &data);
+ if (err)
+ goto error;
+ ret = data.errcode;
+ if (ret)
+ goto fail;
+ if (packing && data.first_block == 0 && data.num_blocks > 0 &&
+ inode_size <= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
+ u64 num_bytes = data.num_blocks * sectorsize;
+ u64 disk_bytenr = data.disk_block * sectorsize;
+ u64 nbytes;
+
+ buffer = malloc(num_bytes);
+ if (!buffer)
+ return -ENOMEM;
+ ret = read_disk_extent(root, disk_bytenr, num_bytes, buffer);
+ if (ret)
+ goto fail;
+ if (num_bytes > inode_size)
+ num_bytes = inode_size;
+ ret = btrfs_insert_inline_extent(trans, root, objectid,
+ 0, buffer, num_bytes);
+ if (ret)
+ goto fail;
+ nbytes = btrfs_stack_inode_nbytes(btrfs_inode) + num_bytes;
+ btrfs_set_stack_inode_nbytes(btrfs_inode, nbytes);
+ } else if (data.num_blocks > 0) {
+ ret = record_file_blocks(&data, data.first_block,
+ data.disk_block, data.num_blocks);
+ if (ret)
+ goto fail;
+ }
+ data.first_block += data.num_blocks;
+ last_block = (inode_size + sectorsize - 1) / sectorsize;
+ if (last_block > data.first_block) {
+ ret = record_file_blocks(&data, data.first_block, 0,
+ last_block - data.first_block);
+ }
+fail:
+ free(buffer);
+ return ret;
+error:
+ fprintf(stderr, "ext2fs_block_iterate2: %s\n", error_message(err));
+ return -1;
+}
+
+static int create_symbol_link(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ struct btrfs_inode_item *btrfs_inode,
+ ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
+ struct ext2_inode *ext2_inode)
+{
+ int ret;
+ char *pathname;
+ u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
+ if (ext2fs_inode_data_blocks(ext2_fs, ext2_inode)) {
+ btrfs_set_stack_inode_size(btrfs_inode, inode_size + 1);
+ ret = create_file_extents(trans, root, objectid, btrfs_inode,
+ ext2_fs, ext2_ino, 1, 1);
+ btrfs_set_stack_inode_size(btrfs_inode, inode_size);
+ return ret;
+ }
+
+ pathname = (char *)&(ext2_inode->i_block[0]);
+ BUG_ON(pathname[inode_size] != 0);
+ ret = btrfs_insert_inline_extent(trans, root, objectid, 0,
+ pathname, inode_size + 1);
+ btrfs_set_stack_inode_nbytes(btrfs_inode, inode_size + 1);
+ return ret;
+}
+
+/*
+ * Following xattr/acl related codes are based on codes in
+ * fs/ext3/xattr.c and fs/ext3/acl.c
+ */
+#define EXT2_XATTR_BHDR(ptr) ((struct ext2_ext_attr_header *)(ptr))
+#define EXT2_XATTR_BFIRST(ptr) \
+ ((struct ext2_ext_attr_entry *)(EXT2_XATTR_BHDR(ptr) + 1))
+#define EXT2_XATTR_IHDR(inode) \
+ ((struct ext2_ext_attr_header *) ((void *)(inode) + \
+ EXT2_GOOD_OLD_INODE_SIZE + (inode)->i_extra_isize))
+#define EXT2_XATTR_IFIRST(inode) \
+ ((struct ext2_ext_attr_entry *) ((void *)EXT2_XATTR_IHDR(inode) + \
+ sizeof(EXT2_XATTR_IHDR(inode)->h_magic)))
+
+static int ext2_xattr_check_names(struct ext2_ext_attr_entry *entry,
+ const void *end)
+{
+ struct ext2_ext_attr_entry *next;
+
+ while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
+ next = EXT2_EXT_ATTR_NEXT(entry);
+ if ((void *)next >= end)
+ return -EIO;
+ entry = next;
+ }
+ return 0;
+}
+
+static int ext2_xattr_check_block(const char *buf, size_t size)
+{
+ int error;
+ struct ext2_ext_attr_header *header = EXT2_XATTR_BHDR(buf);
+
+ if (header->h_magic != EXT2_EXT_ATTR_MAGIC ||
+ header->h_blocks != 1)
+ return -EIO;
+ error = ext2_xattr_check_names(EXT2_XATTR_BFIRST(buf), buf + size);
+ return error;
+}
+
+static int ext2_xattr_check_entry(struct ext2_ext_attr_entry *entry,
+ size_t size)
+{
+ size_t value_size = entry->e_value_size;
+
+ if (entry->e_value_block != 0 || value_size > size ||
+ entry->e_value_offs + value_size > size)
+ return -EIO;
+ return 0;
+}
+
+#define EXT2_ACL_VERSION 0x0001
+
+/* 23.2.5 acl_tag_t values */
+
+#define ACL_UNDEFINED_TAG (0x00)
+#define ACL_USER_OBJ (0x01)
+#define ACL_USER (0x02)
+#define ACL_GROUP_OBJ (0x04)
+#define ACL_GROUP (0x08)
+#define ACL_MASK (0x10)
+#define ACL_OTHER (0x20)
+
+/* 23.2.7 ACL qualifier constants */
+
+#define ACL_UNDEFINED_ID ((id_t)-1)
+
+typedef struct {
+ __le16 e_tag;
+ __le16 e_perm;
+ __le32 e_id;
+} ext2_acl_entry;
+
+typedef struct {
+ __le16 e_tag;
+ __le16 e_perm;
+} ext2_acl_entry_short;
+
+typedef struct {
+ __le32 a_version;
+} ext2_acl_header;
+
+static inline int ext2_acl_count(size_t size)
+{
+ ssize_t s;
+ size -= sizeof(ext2_acl_header);
+ s = size - 4 * sizeof(ext2_acl_entry_short);
+ if (s < 0) {
+ if (size % sizeof(ext2_acl_entry_short))
+ return -1;
+ return size / sizeof(ext2_acl_entry_short);
+ } else {
+ if (s % sizeof(ext2_acl_entry))
+ return -1;
+ return s / sizeof(ext2_acl_entry) + 4;
+ }
+}
+
+#define ACL_EA_VERSION 0x0002
+
+typedef struct {
+ __le16 e_tag;
+ __le16 e_perm;
+ __le32 e_id;
+} acl_ea_entry;
+
+typedef struct {
+ __le32 a_version;
+ acl_ea_entry a_entries[0];
+} acl_ea_header;
+
+static inline size_t acl_ea_size(int count)
+{
+ return sizeof(acl_ea_header) + count * sizeof(acl_ea_entry);
+}
+
+static int ext2_acl_to_xattr(void *dst, const void *src,
+ size_t dst_size, size_t src_size)
+{
+ int i, count;
+ const void *end = src + src_size;
+ acl_ea_header *ext_acl = (acl_ea_header *)dst;
+ acl_ea_entry *dst_entry = ext_acl->a_entries;
+ ext2_acl_entry *src_entry;
+
+ if (src_size < sizeof(ext2_acl_header))
+ goto fail;
+ if (((ext2_acl_header *)src)->a_version !=
+ cpu_to_le32(EXT2_ACL_VERSION))
+ goto fail;
+ src += sizeof(ext2_acl_header);
+ count = ext2_acl_count(src_size);
+ if (count <= 0)
+ goto fail;
+
+ BUG_ON(dst_size < acl_ea_size(count));
+ ext_acl->a_version = cpu_to_le32(ACL_EA_VERSION);
+ for (i = 0; i < count; i++, dst_entry++) {
+ src_entry = (ext2_acl_entry *)src;
+ if (src + sizeof(ext2_acl_entry_short) > end)
+ goto fail;
+ dst_entry->e_tag = src_entry->e_tag;
+ dst_entry->e_perm = src_entry->e_perm;
+ switch (le16_to_cpu(src_entry->e_tag)) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ src += sizeof(ext2_acl_entry_short);
+ dst_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
+ break;
+ case ACL_USER:
+ case ACL_GROUP:
+ src += sizeof(ext2_acl_entry);
+ if (src > end)
+ goto fail;
+ dst_entry->e_id = src_entry->e_id;
+ break;
+ default:
+ goto fail;
+ }
+ }
+ if (src != end)
+ goto fail;
+ return 0;
+fail:
+ return -EINVAL;
+}
+
+static char *xattr_prefix_table[] = {
+ [1] = "user.",
+ [2] = "system.posix_acl_access",
+ [3] = "system.posix_acl_default",
+ [4] = "trusted.",
+ [6] = "security.",
+};
+
+static int copy_single_xattr(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ struct ext2_ext_attr_entry *entry,
+ const void *data, u32 datalen)
+{
+ int ret = 0;
+ int name_len;
+ int name_index;
+ void *databuf = NULL;
+ char namebuf[XATTR_NAME_MAX + 1];
+
+ name_index = entry->e_name_index;
+ if (name_index >= ARRAY_SIZE(xattr_prefix_table) ||
+ xattr_prefix_table[name_index] == NULL)
+ return -EOPNOTSUPP;
+ name_len = strlen(xattr_prefix_table[name_index]) +
+ entry->e_name_len;
+ if (name_len >= sizeof(namebuf))
+ return -ERANGE;
+
+ if (name_index == 2 || name_index == 3) {
+ size_t bufsize = acl_ea_size(ext2_acl_count(datalen));
+ databuf = malloc(bufsize);
+ if (!databuf)
+ return -ENOMEM;
+ ret = ext2_acl_to_xattr(databuf, data, bufsize, datalen);
+ if (ret)
+ goto out;
+ data = databuf;
+ datalen = bufsize;
+ }
+ strncpy(namebuf, xattr_prefix_table[name_index], XATTR_NAME_MAX);
+ strncat(namebuf, EXT2_EXT_ATTR_NAME(entry), entry->e_name_len);
+ if (name_len + datalen > BTRFS_LEAF_DATA_SIZE(root) -
+ sizeof(struct btrfs_item) - sizeof(struct btrfs_dir_item)) {
+ fprintf(stderr, "skip large xattr on inode %Lu name %.*s\n",
+ objectid - INO_OFFSET, name_len, namebuf);
+ goto out;
+ }
+ ret = btrfs_insert_xattr_item(trans, root, namebuf, name_len,
+ data, datalen, objectid);
+out:
+ free(databuf);
+ return ret;
+}
+
+static int copy_extended_attrs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ struct btrfs_inode_item *btrfs_inode,
+ ext2_filsys ext2_fs, ext2_ino_t ext2_ino)
+{
+ int ret = 0;
+ int inline_ea = 0;
+ errcode_t err;
+ u32 datalen;
+ u32 block_size = ext2_fs->blocksize;
+ u32 inode_size = EXT2_INODE_SIZE(ext2_fs->super);
+ struct ext2_inode_large *ext2_inode;
+ struct ext2_ext_attr_entry *entry;
+ void *data;
+ char *buffer = NULL;
+ char inode_buf[EXT2_GOOD_OLD_INODE_SIZE];
+
+ if (inode_size <= EXT2_GOOD_OLD_INODE_SIZE) {
+ ext2_inode = (struct ext2_inode_large *)inode_buf;
+ } else {
+ ext2_inode = (struct ext2_inode_large *)malloc(inode_size);
+ if (!ext2_inode)
+ return -ENOMEM;
+ }
+ err = ext2fs_read_inode_full(ext2_fs, ext2_ino, (void *)ext2_inode,
+ inode_size);
+ if (err) {
+ fprintf(stderr, "ext2fs_read_inode_full: %s\n",
+ error_message(err));
+ ret = -1;
+ goto out;
+ }
+
+ if (ext2_ino > ext2_fs->super->s_first_ino &&
+ inode_size > EXT2_GOOD_OLD_INODE_SIZE) {
+ if (EXT2_GOOD_OLD_INODE_SIZE +
+ ext2_inode->i_extra_isize > inode_size) {
+ ret = -EIO;
+ goto out;
+ }
+ if (ext2_inode->i_extra_isize != 0 &&
+ EXT2_XATTR_IHDR(ext2_inode)->h_magic ==
+ EXT2_EXT_ATTR_MAGIC) {
+ inline_ea = 1;
+ }
+ }
+ if (inline_ea) {
+ int total;
+ void *end = (void *)ext2_inode + inode_size;
+ entry = EXT2_XATTR_IFIRST(ext2_inode);
+ total = end - (void *)entry;
+ ret = ext2_xattr_check_names(entry, end);
+ if (ret)
+ goto out;
+ while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
+ ret = ext2_xattr_check_entry(entry, total);
+ if (ret)
+ goto out;
+ data = (void *)EXT2_XATTR_IFIRST(ext2_inode) +
+ entry->e_value_offs;
+ datalen = entry->e_value_size;
+ ret = copy_single_xattr(trans, root, objectid,
+ entry, data, datalen);
+ if (ret)
+ goto out;
+ entry = EXT2_EXT_ATTR_NEXT(entry);
+ }
+ }
+
+ if (ext2_inode->i_file_acl == 0)
+ goto out;
+
+ buffer = malloc(block_size);
+ if (!buffer) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ err = ext2fs_read_ext_attr(ext2_fs, ext2_inode->i_file_acl, buffer);
+ if (err) {
+ fprintf(stderr, "ext2fs_read_ext_attr: %s\n",
+ error_message(err));
+ ret = -1;
+ goto out;
+ }
+ ret = ext2_xattr_check_block(buffer, block_size);
+ if (ret)
+ goto out;
+
+ entry = EXT2_XATTR_BFIRST(buffer);
+ while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
+ ret = ext2_xattr_check_entry(entry, block_size);
+ if (ret)
+ goto out;
+ data = buffer + entry->e_value_offs;
+ datalen = entry->e_value_size;
+ ret = copy_single_xattr(trans, root, objectid,
+ entry, data, datalen);
+ if (ret)
+ goto out;
+ entry = EXT2_EXT_ATTR_NEXT(entry);
+ }
+out:
+ free(buffer);
+ if ((void *)ext2_inode != inode_buf)
+ free(ext2_inode);
+ return ret;
+}
+#define MINORBITS 20
+#define MKDEV(ma, mi) (((ma) << MINORBITS) | (mi))
+
+static inline dev_t old_decode_dev(u16 val)
+{
+ return MKDEV((val >> 8) & 255, val & 255);
+}
+
+static inline dev_t new_decode_dev(u32 dev)
+{
+ unsigned major = (dev & 0xfff00) >> 8;
+ unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
+ return MKDEV(major, minor);
+}
+
+static int copy_inode_item(struct btrfs_inode_item *dst,
+ struct ext2_inode *src, u32 blocksize)
+{
+ btrfs_set_stack_inode_generation(dst, 1);
+ btrfs_set_stack_inode_sequence(dst, 0);
+ btrfs_set_stack_inode_transid(dst, 1);
+ btrfs_set_stack_inode_size(dst, src->i_size);
+ btrfs_set_stack_inode_nbytes(dst, 0);
+ btrfs_set_stack_inode_block_group(dst, 0);
+ btrfs_set_stack_inode_nlink(dst, src->i_links_count);
+ btrfs_set_stack_inode_uid(dst, src->i_uid | (src->i_uid_high << 16));
+ btrfs_set_stack_inode_gid(dst, src->i_gid | (src->i_gid_high << 16));
+ btrfs_set_stack_inode_mode(dst, src->i_mode);
+ btrfs_set_stack_inode_rdev(dst, 0);
+ btrfs_set_stack_inode_flags(dst, 0);
+ btrfs_set_stack_timespec_sec(&dst->atime, src->i_atime);
+ btrfs_set_stack_timespec_nsec(&dst->atime, 0);
+ btrfs_set_stack_timespec_sec(&dst->ctime, src->i_ctime);
+ btrfs_set_stack_timespec_nsec(&dst->ctime, 0);
+ btrfs_set_stack_timespec_sec(&dst->mtime, src->i_mtime);
+ btrfs_set_stack_timespec_nsec(&dst->mtime, 0);
+ btrfs_set_stack_timespec_sec(&dst->otime, 0);
+ btrfs_set_stack_timespec_nsec(&dst->otime, 0);
+
+ if (S_ISDIR(src->i_mode)) {
+ btrfs_set_stack_inode_size(dst, 0);
+ btrfs_set_stack_inode_nlink(dst, 1);
+ }
+ if (S_ISREG(src->i_mode)) {
+ btrfs_set_stack_inode_size(dst, (u64)src->i_size_high << 32 |
+ (u64)src->i_size);
+ }
+ if (!S_ISREG(src->i_mode) && !S_ISDIR(src->i_mode) &&
+ !S_ISLNK(src->i_mode)) {
+ if (src->i_block[0]) {
+ btrfs_set_stack_inode_rdev(dst,
+ old_decode_dev(src->i_block[0]));
+ } else {
+ btrfs_set_stack_inode_rdev(dst,
+ new_decode_dev(src->i_block[1]));
+ }
+ }
+ memset(&dst->reserved, 0, sizeof(dst->reserved));
+
+ return 0;
+}
+
+/*
+ * copy a single inode. do all the required works, such as cloning
+ * inode item, creating file extents and creating directory entries.
+ */
+static int copy_single_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
+ struct ext2_inode *ext2_inode,
+ int datacsum, int packing, int noxattr)
+{
+ int ret;
+ struct btrfs_inode_item btrfs_inode;
+
+ if (ext2_inode->i_links_count == 0)
+ return 0;
+
+ copy_inode_item(&btrfs_inode, ext2_inode, ext2_fs->blocksize);
+ if (!datacsum && S_ISREG(ext2_inode->i_mode)) {
+ u32 flags = btrfs_stack_inode_flags(&btrfs_inode) |
+ BTRFS_INODE_NODATASUM;
+ btrfs_set_stack_inode_flags(&btrfs_inode, flags);
+ }
+
+ switch (ext2_inode->i_mode & S_IFMT) {
+ case S_IFREG:
+ ret = create_file_extents(trans, root, objectid, &btrfs_inode,
+ ext2_fs, ext2_ino, datacsum, packing);
+ break;
+ case S_IFDIR:
+ ret = create_dir_entries(trans, root, objectid, &btrfs_inode,
+ ext2_fs, ext2_ino);
+ break;
+ case S_IFLNK:
+ ret = create_symbol_link(trans, root, objectid, &btrfs_inode,
+ ext2_fs, ext2_ino, ext2_inode);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+ if (ret)
+ return ret;
+
+ if (!noxattr) {
+ ret = copy_extended_attrs(trans, root, objectid, &btrfs_inode,
+ ext2_fs, ext2_ino);
+ if (ret)
+ return ret;
+ }
+ return btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
+}
+
+static int copy_disk_extent(struct btrfs_root *root, u64 dst_bytenr,
+ u64 src_bytenr, u32 num_bytes)
+{
+ int ret;
+ char *buffer;
+ struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
+
+ buffer = malloc(num_bytes);
+ if (!buffer)
+ return -ENOMEM;
+ ret = pread(fs_devs->latest_bdev, buffer, num_bytes, src_bytenr);
+ if (ret != num_bytes)
+ goto fail;
+ ret = pwrite(fs_devs->latest_bdev, buffer, num_bytes, dst_bytenr);
+ if (ret != num_bytes)
+ goto fail;
+ ret = 0;
+fail:
+ free(buffer);
+ if (ret > 0)
+ ret = -1;
+ return ret;
+}
+/*
+ * scan ext2's inode bitmap and copy all used inodes.
+ */
+static int ext2_copy_inodes(struct btrfs_convert_context *cctx,
+ struct btrfs_root *root,
+ int datacsum, int packing, int noxattr, struct task_ctx *p)
+{
+ ext2_filsys ext2_fs = cctx->fs_data;
+ int ret;
+ errcode_t err;
+ ext2_inode_scan ext2_scan;
+ struct ext2_inode ext2_inode;
+ ext2_ino_t ext2_ino;
+ u64 objectid;
+ struct btrfs_trans_handle *trans;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (!trans)
+ return -ENOMEM;
+ err = ext2fs_open_inode_scan(ext2_fs, 0, &ext2_scan);
+ if (err) {
+ fprintf(stderr, "ext2fs_open_inode_scan: %s\n", error_message(err));
+ return -1;
+ }
+ while (!(err = ext2fs_get_next_inode(ext2_scan, &ext2_ino,
+ &ext2_inode))) {
+ /* no more inodes */
+ if (ext2_ino == 0)
+ break;
+ /* skip special inode in ext2fs */
+ if (ext2_ino < EXT2_GOOD_OLD_FIRST_INO &&
+ ext2_ino != EXT2_ROOT_INO)
+ continue;
+ objectid = ext2_ino + INO_OFFSET;
+ ret = copy_single_inode(trans, root,
+ objectid, ext2_fs, ext2_ino,
+ &ext2_inode, datacsum, packing,
+ noxattr);
+ p->cur_copy_inodes++;
+ if (ret)
+ return ret;
+ if (trans->blocks_used >= 4096) {
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+ trans = btrfs_start_transaction(root, 1);
+ BUG_ON(!trans);
+ }
+ }
+ if (err) {
+ fprintf(stderr, "ext2fs_get_next_inode: %s\n", error_message(err));
+ return -1;
+ }
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+ ext2fs_close_inode_scan(ext2_scan);
+
+ return ret;
+}
+
+static int ext2_test_block(struct btrfs_convert_context *cctx, u64 block)
+{
+ ext2_filsys ext2_fs = cctx->fs_data;
+
+ BUG_ON(block != (u32)block);
+ return ext2fs_fast_test_block_bitmap(ext2_fs->block_map, block);
+}
+
+/*
+ * Construct a range of ext2fs image file.
+ * scan block allocation bitmap, find all blocks used by the ext2fs
+ * in this range and create file extents that point to these blocks.
+ *
+ * Note: Before calling the function, no file extent points to blocks
+ * in this range
+ */
+static int create_image_file_range(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ struct btrfs_inode_item *inode,
+ u64 start_byte, u64 end_byte,
+ struct btrfs_convert_context *cctx, int datacsum)
+{
+ u32 blocksize = cctx->blocksize;
+ u32 block = start_byte / blocksize;
+ u32 last_block = (end_byte + blocksize - 1) / blocksize;
+ int ret = 0;
+ struct blk_iterate_data data;
+
+ init_blk_iterate_data(&data, trans, root, inode, objectid, datacsum);
+ data.first_block = block;
+
+ for (; start_byte < end_byte; block++, start_byte += blocksize) {
+ if (!convert_test_block(cctx, block))
+ continue;
+ ret = block_iterate_proc(block, block, &data);
+ if (ret < 0)
+ goto fail;
+ }
+ if (data.num_blocks > 0) {
+ ret = record_file_blocks(&data, data.first_block,
+ data.disk_block, data.num_blocks);
+ if (ret)
+ goto fail;
+ data.first_block += data.num_blocks;
+ }
+ if (last_block > data.first_block) {
+ ret = record_file_blocks(&data, data.first_block, 0,
+ last_block - data.first_block);
+ if (ret)
+ goto fail;
+ }
+fail:
+ return ret;
+}
+/*
+ * Create the fs image file.
+ */
+static int create_image(struct btrfs_convert_context *cctx,
+ struct btrfs_root *root, const char *name, int datacsum)
+{
+ int ret;
+ struct btrfs_key key;
+ struct btrfs_key location;
+ struct btrfs_path path;
+ struct btrfs_inode_item btrfs_inode;
+ struct btrfs_inode_item *inode_item;
+ struct extent_buffer *leaf;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_root *extent_root = fs_info->extent_root;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_extent_item *ei;
+ struct btrfs_extent_inline_ref *iref;
+ struct btrfs_extent_data_ref *dref;
+ u64 bytenr;
+ u64 num_bytes;
+ u64 objectid;
+ u64 last_byte;
+ u64 first_free;
+ u64 total_bytes;
+ u64 flags = BTRFS_INODE_READONLY;
+ u32 sectorsize = root->sectorsize;
+
+ total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
+ first_free = BTRFS_SUPER_INFO_OFFSET + sectorsize * 2 - 1;
+ first_free &= ~((u64)sectorsize - 1);
+ if (!datacsum)
+ flags |= BTRFS_INODE_NODATASUM;
+
+ memset(&btrfs_inode, 0, sizeof(btrfs_inode));
+ btrfs_set_stack_inode_generation(&btrfs_inode, 1);
+ btrfs_set_stack_inode_size(&btrfs_inode, total_bytes);
+ btrfs_set_stack_inode_nlink(&btrfs_inode, 1);
+ btrfs_set_stack_inode_nbytes(&btrfs_inode, 0);
+ btrfs_set_stack_inode_mode(&btrfs_inode, S_IFREG | 0400);
+ btrfs_set_stack_inode_flags(&btrfs_inode, flags);
+ btrfs_init_path(&path);
+ trans = btrfs_start_transaction(root, 1);
+ BUG_ON(!trans);
+
+ objectid = btrfs_root_dirid(&root->root_item);
+ ret = btrfs_find_free_objectid(trans, root, objectid, &objectid);
+ if (ret)
+ goto fail;
+
+ /*
+ * copy blocks covered by extent #0 to new positions. extent #0 is
+ * special, we can't rely on relocate_extents_range to relocate it.
+ */
+ for (last_byte = 0; last_byte < first_free; last_byte += sectorsize) {
+ ret = custom_alloc_extent(root, sectorsize, 0, &key, 0);
+ if (ret)
+ goto fail;
+ ret = copy_disk_extent(root, key.objectid, last_byte,
+ sectorsize);
+ if (ret)
+ goto fail;
+ ret = btrfs_record_file_extent(trans, root, objectid,
+ &btrfs_inode, last_byte,
+ key.objectid, sectorsize);
+ if (ret)
+ goto fail;
+ if (datacsum) {
+ ret = csum_disk_extent(trans, root, key.objectid,
+ sectorsize);
+ if (ret)
+ goto fail;
+ }
+ }
+
+ while(1) {
+ key.objectid = last_byte;
+ key.offset = 0;
+ btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+ ret = btrfs_search_slot(trans, fs_info->extent_root,
+ &key, &path, 0, 0);
+ if (ret < 0)
+ goto fail;
+next:
+ leaf = path.nodes[0];
+ if (path.slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(extent_root, &path);
+ if (ret < 0)
+ goto fail;
+ if (ret > 0)
+ break;
+ leaf = path.nodes[0];
+ }
+ btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+ if (last_byte > key.objectid ||
+ key.type != BTRFS_EXTENT_ITEM_KEY) {
+ path.slots[0]++;
+ goto next;
+ }
+
+ bytenr = key.objectid;
+ num_bytes = key.offset;
+ ei = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_extent_item);
+ if (!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_DATA)) {
+ path.slots[0]++;
+ goto next;
+ }
+
+ BUG_ON(btrfs_item_size_nr(leaf, path.slots[0]) != sizeof(*ei) +
+ btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY));
+
+ iref = (struct btrfs_extent_inline_ref *)(ei + 1);
+ key.type = btrfs_extent_inline_ref_type(leaf, iref);
+ BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY);
+ dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+ if (btrfs_extent_data_ref_root(leaf, dref) !=
+ BTRFS_FS_TREE_OBJECTID) {
+ path.slots[0]++;
+ goto next;
+ }
+
+ if (bytenr > last_byte) {
+ ret = create_image_file_range(trans, root, objectid,
+ &btrfs_inode, last_byte,
+ bytenr, cctx,
+ datacsum);
+ if (ret)
+ goto fail;
+ }
+ ret = btrfs_record_file_extent(trans, root, objectid,
+ &btrfs_inode, bytenr, bytenr,
+ num_bytes);
+ if (ret)
+ goto fail;
+ last_byte = bytenr + num_bytes;
+ btrfs_release_path(&path);
+
+ if (trans->blocks_used >= 4096) {
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+ trans = btrfs_start_transaction(root, 1);
+ BUG_ON(!trans);
+ }
+ }
+ btrfs_release_path(&path);
+ if (total_bytes > last_byte) {
+ ret = create_image_file_range(trans, root, objectid,
+ &btrfs_inode, last_byte,
+ total_bytes, cctx,
+ datacsum);
+ if (ret)
+ goto fail;
+ }
+
+ ret = btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
+ if (ret)
+ goto fail;
+
+ location.objectid = objectid;
+ location.offset = 0;
+ btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
+ ret = btrfs_insert_dir_item(trans, root, name, strlen(name),
+ btrfs_root_dirid(&root->root_item),
+ &location, BTRFS_FT_REG_FILE, objectid);
+ if (ret)
+ goto fail;
+ ret = btrfs_insert_inode_ref(trans, root, name, strlen(name),
+ objectid,
+ btrfs_root_dirid(&root->root_item),
+ objectid);
+ if (ret)
+ goto fail;
+ location.objectid = btrfs_root_dirid(&root->root_item);
+ location.offset = 0;
+ btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
+ ret = btrfs_lookup_inode(trans, root, &path, &location, 1);
+ if (ret)
+ goto fail;
+ leaf = path.nodes[0];
+ inode_item = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_inode_item);
+ btrfs_set_inode_size(leaf, inode_item, strlen(name) * 2 +
+ btrfs_inode_size(leaf, inode_item));
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(&path);
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+fail:
+ btrfs_release_path(&path);
+ return ret;
+}
+
+static struct btrfs_root * link_subvol(struct btrfs_root *root,
+ const char *base, u64 root_objectid)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_root *new_root = NULL;
+ struct btrfs_path *path;
+ struct btrfs_inode_item *inode_item;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ u64 dirid = btrfs_root_dirid(&root->root_item);
+ u64 index = 2;
+ char buf[BTRFS_NAME_LEN + 1]; /* for snprintf null */
+ int len;
+ int i;
+ int ret;
+
+ len = strlen(base);
+ if (len == 0 || len > BTRFS_NAME_LEN)
+ return NULL;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+
+ key.objectid = dirid;
+ key.type = BTRFS_DIR_INDEX_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ BUG_ON(ret <= 0);
+
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.objectid == dirid && key.type == BTRFS_DIR_INDEX_KEY)
+ index = key.offset + 1;
+ }
+ btrfs_release_path(path);
+
+ trans = btrfs_start_transaction(root, 1);
+ BUG_ON(!trans);
+
+ key.objectid = dirid;
+ key.offset = 0;
+ key.type = BTRFS_INODE_ITEM_KEY;
+
+ ret = btrfs_lookup_inode(trans, root, path, &key, 1);
+ BUG_ON(ret);
+ leaf = path->nodes[0];
+ inode_item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_inode_item);
+
+ key.objectid = root_objectid;
+ key.offset = (u64)-1;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+
+ memcpy(buf, base, len);
+ for (i = 0; i < 1024; i++) {
+ ret = btrfs_insert_dir_item(trans, root, buf, len,
+ dirid, &key, BTRFS_FT_DIR, index);
+ if (ret != -EEXIST)
+ break;
+ len = snprintf(buf, ARRAY_SIZE(buf), "%s%d", base, i);
+ if (len < 1 || len > BTRFS_NAME_LEN) {
+ ret = -EINVAL;
+ break;
+ }
+ }
+ if (ret)
+ goto fail;
+
+ btrfs_set_inode_size(leaf, inode_item, len * 2 +
+ btrfs_inode_size(leaf, inode_item));
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(path);
+
+ /* add the backref first */
+ ret = btrfs_add_root_ref(trans, tree_root, root_objectid,
+ BTRFS_ROOT_BACKREF_KEY,
+ root->root_key.objectid,
+ dirid, index, buf, len);
+ BUG_ON(ret);
+
+ /* now add the forward ref */
+ ret = btrfs_add_root_ref(trans, tree_root, root->root_key.objectid,
+ BTRFS_ROOT_REF_KEY, root_objectid,
+ dirid, index, buf, len);
+
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+
+ new_root = btrfs_read_fs_root(fs_info, &key);
+ if (IS_ERR(new_root))
+ new_root = NULL;
+fail:
+ btrfs_free_path(path);
+ return new_root;
+}
+
+static int create_chunk_mapping(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_root *chunk_root = info->chunk_root;
+ struct btrfs_root *extent_root = info->extent_root;
+ struct btrfs_device *device;
+ struct btrfs_block_group_cache *cache;
+ struct btrfs_dev_extent *extent;
+ struct extent_buffer *leaf;
+ struct btrfs_chunk chunk;
+ struct btrfs_key key;
+ struct btrfs_path path;
+ u64 cur_start;
+ u64 total_bytes;
+ u64 chunk_objectid;
+ int ret;
+
+ btrfs_init_path(&path);
+
+ total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
+ chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+
+ BUG_ON(list_empty(&info->fs_devices->devices));
+ device = list_entry(info->fs_devices->devices.next,
+ struct btrfs_device, dev_list);
+ BUG_ON(device->devid != info->fs_devices->latest_devid);
+
+ /* delete device extent created by make_btrfs */
+ key.objectid = device->devid;
+ key.offset = 0;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+ ret = btrfs_search_slot(trans, device->dev_root, &key, &path, -1, 1);
+ if (ret < 0)
+ goto err;
+
+ BUG_ON(ret > 0);
+ ret = btrfs_del_item(trans, device->dev_root, &path);
+ if (ret)
+ goto err;
+ btrfs_release_path(&path);
+
+ /* delete chunk item created by make_btrfs */
+ key.objectid = chunk_objectid;
+ key.offset = 0;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+ ret = btrfs_search_slot(trans, chunk_root, &key, &path, -1, 1);
+ if (ret < 0)
+ goto err;
+
+ BUG_ON(ret > 0);
+ ret = btrfs_del_item(trans, chunk_root, &path);
+ if (ret)
+ goto err;
+ btrfs_release_path(&path);
+
+ /* for each block group, create device extent and chunk item */
+ cur_start = 0;
+ while (cur_start < total_bytes) {
+ cache = btrfs_lookup_block_group(root->fs_info, cur_start);
+ BUG_ON(!cache);
+
+ /* insert device extent */
+ key.objectid = device->devid;
+ key.offset = cache->key.objectid;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+ ret = btrfs_insert_empty_item(trans, device->dev_root, &path,
+ &key, sizeof(*extent));
+ if (ret)
+ goto err;
+
+ leaf = path.nodes[0];
+ extent = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_dev_extent);
+
+ btrfs_set_dev_extent_chunk_tree(leaf, extent,
+ chunk_root->root_key.objectid);
+ btrfs_set_dev_extent_chunk_objectid(leaf, extent,
+ chunk_objectid);
+ btrfs_set_dev_extent_chunk_offset(leaf, extent,
+ cache->key.objectid);
+ btrfs_set_dev_extent_length(leaf, extent, cache->key.offset);
+ write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
+ (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
+ BTRFS_UUID_SIZE);
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(&path);
+
+ /* insert chunk item */
+ btrfs_set_stack_chunk_length(&chunk, cache->key.offset);
+ btrfs_set_stack_chunk_owner(&chunk,
+ extent_root->root_key.objectid);
+ btrfs_set_stack_chunk_stripe_len(&chunk, BTRFS_STRIPE_LEN);
+ btrfs_set_stack_chunk_type(&chunk, cache->flags);
+ btrfs_set_stack_chunk_io_align(&chunk, device->io_align);
+ btrfs_set_stack_chunk_io_width(&chunk, device->io_width);
+ btrfs_set_stack_chunk_sector_size(&chunk, device->sector_size);
+ btrfs_set_stack_chunk_num_stripes(&chunk, 1);
+ btrfs_set_stack_chunk_sub_stripes(&chunk, 0);
+ btrfs_set_stack_stripe_devid(&chunk.stripe, device->devid);
+ btrfs_set_stack_stripe_offset(&chunk.stripe,
+ cache->key.objectid);
+ memcpy(&chunk.stripe.dev_uuid, device->uuid, BTRFS_UUID_SIZE);
+
+ key.objectid = chunk_objectid;
+ key.offset = cache->key.objectid;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+
+ ret = btrfs_insert_item(trans, chunk_root, &key, &chunk,
+ btrfs_chunk_item_size(1));
+ if (ret)
+ goto err;
+
+ cur_start = cache->key.objectid + cache->key.offset;
+ }
+
+ device->bytes_used = total_bytes;
+ ret = btrfs_update_device(trans, device);
+err:
+ btrfs_release_path(&path);
+ return ret;
+}
+
+static int create_subvol(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 root_objectid)
+{
+ struct extent_buffer *tmp;
+ struct btrfs_root *new_root;
+ struct btrfs_key key;
+ struct btrfs_root_item root_item;
+ int ret;
+
+ ret = btrfs_copy_root(trans, root, root->node, &tmp,
+ root_objectid);
+ BUG_ON(ret);
+
+ memcpy(&root_item, &root->root_item, sizeof(root_item));
+ btrfs_set_root_bytenr(&root_item, tmp->start);
+ btrfs_set_root_level(&root_item, btrfs_header_level(tmp));
+ btrfs_set_root_generation(&root_item, trans->transid);
+ free_extent_buffer(tmp);
+
+ key.objectid = root_objectid;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = trans->transid;
+ ret = btrfs_insert_root(trans, root->fs_info->tree_root,
+ &key, &root_item);
+
+ key.offset = (u64)-1;
+ new_root = btrfs_read_fs_root(root->fs_info, &key);
+ BUG_ON(!new_root || IS_ERR(new_root));
+
+ ret = btrfs_make_root_dir(trans, new_root, BTRFS_FIRST_FREE_OBJECTID);
+ BUG_ON(ret);
+
+ return 0;
+}
+
+static int init_btrfs(struct btrfs_root *root)
+{
+ int ret;
+ struct btrfs_key location;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct extent_buffer *tmp;
+
+ trans = btrfs_start_transaction(root, 1);
+ BUG_ON(!trans);
+ ret = btrfs_make_block_groups(trans, root);
+ if (ret)
+ goto err;
+ ret = btrfs_fix_block_accounting(trans, root);
+ if (ret)
+ goto err;
+ ret = create_chunk_mapping(trans, root);
+ if (ret)
+ goto err;
+ ret = btrfs_make_root_dir(trans, fs_info->tree_root,
+ BTRFS_ROOT_TREE_DIR_OBJECTID);
+ if (ret)
+ goto err;
+ memcpy(&location, &root->root_key, sizeof(location));
+ location.offset = (u64)-1;
+ ret = btrfs_insert_dir_item(trans, fs_info->tree_root, "default", 7,
+ btrfs_super_root_dir(fs_info->super_copy),
+ &location, BTRFS_FT_DIR, 0);
+ if (ret)
+ goto err;
+ ret = btrfs_insert_inode_ref(trans, fs_info->tree_root, "default", 7,
+ location.objectid,
+ btrfs_super_root_dir(fs_info->super_copy), 0);
+ if (ret)
+ goto err;
+ btrfs_set_root_dirid(&fs_info->fs_root->root_item,
+ BTRFS_FIRST_FREE_OBJECTID);
+
+ /* subvol for fs image file */
+ ret = create_subvol(trans, root, CONV_IMAGE_SUBVOL_OBJECTID);
+ BUG_ON(ret);
+ /* subvol for data relocation */
+ ret = create_subvol(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
+ BUG_ON(ret);
+
+ extent_buffer_get(fs_info->csum_root->node);
+ ret = __btrfs_cow_block(trans, fs_info->csum_root,
+ fs_info->csum_root->node, NULL, 0, &tmp, 0, 0);
+ BUG_ON(ret);
+ free_extent_buffer(tmp);
+
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+err:
+ return ret;
+}
+
+/*
+ * Migrate super block to its default position and zero 0 ~ 16k
+ */
+static int migrate_super_block(int fd, u64 old_bytenr, u32 sectorsize)
+{
+ int ret;
+ struct extent_buffer *buf;
+ struct btrfs_super_block *super;
+ u32 len;
+ u32 bytenr;
+
+ BUG_ON(sectorsize < sizeof(*super));
+ buf = malloc(sizeof(*buf) + sectorsize);
+ if (!buf)
+ return -ENOMEM;
+
+ buf->len = sectorsize;
+ ret = pread(fd, buf->data, sectorsize, old_bytenr);
+ if (ret != sectorsize)
+ goto fail;
+
+ super = (struct btrfs_super_block *)buf->data;
+ BUG_ON(btrfs_super_bytenr(super) != old_bytenr);
+ btrfs_set_super_bytenr(super, BTRFS_SUPER_INFO_OFFSET);
+
+ csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
+ ret = pwrite(fd, buf->data, sectorsize, BTRFS_SUPER_INFO_OFFSET);
+ if (ret != sectorsize)
+ goto fail;
+
+ ret = fsync(fd);
+ if (ret)
+ goto fail;
+
+ memset(buf->data, 0, sectorsize);
+ for (bytenr = 0; bytenr < BTRFS_SUPER_INFO_OFFSET; ) {
+ len = BTRFS_SUPER_INFO_OFFSET - bytenr;
+ if (len > sectorsize)
+ len = sectorsize;
+ ret = pwrite(fd, buf->data, len, bytenr);
+ if (ret != len) {
+ fprintf(stderr, "unable to zero fill device\n");
+ break;
+ }
+ bytenr += len;
+ }
+ ret = 0;
+ fsync(fd);
+fail:
+ free(buf);
+ if (ret > 0)
+ ret = -1;
+ return ret;
+}
+
+static int prepare_system_chunk_sb(struct btrfs_super_block *super)
+{
+ struct btrfs_chunk *chunk;
+ struct btrfs_disk_key *key;
+ u32 sectorsize = btrfs_super_sectorsize(super);
+
+ key = (struct btrfs_disk_key *)(super->sys_chunk_array);
+ chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
+ sizeof(struct btrfs_disk_key));
+
+ btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
+ btrfs_set_disk_key_offset(key, 0);
+
+ btrfs_set_stack_chunk_length(chunk, btrfs_super_total_bytes(super));
+ btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
+ btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
+ btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
+ btrfs_set_stack_chunk_io_align(chunk, sectorsize);
+ btrfs_set_stack_chunk_io_width(chunk, sectorsize);
+ btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
+ btrfs_set_stack_chunk_num_stripes(chunk, 1);
+ btrfs_set_stack_chunk_sub_stripes(chunk, 0);
+ chunk->stripe.devid = super->dev_item.devid;
+ btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
+ memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
+ btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
+ return 0;
+}
+
+static int prepare_system_chunk(int fd, u64 sb_bytenr)
+{
+ int ret;
+ struct extent_buffer *buf;
+ struct btrfs_super_block *super;
+
+ BUG_ON(BTRFS_SUPER_INFO_SIZE < sizeof(*super));
+ buf = malloc(sizeof(*buf) + BTRFS_SUPER_INFO_SIZE);
+ if (!buf)
+ return -ENOMEM;
+
+ buf->len = BTRFS_SUPER_INFO_SIZE;
+ ret = pread(fd, buf->data, BTRFS_SUPER_INFO_SIZE, sb_bytenr);
+ if (ret != BTRFS_SUPER_INFO_SIZE)
+ goto fail;
+
+ super = (struct btrfs_super_block *)buf->data;
+ BUG_ON(btrfs_super_bytenr(super) != sb_bytenr);
+ BUG_ON(btrfs_super_num_devices(super) != 1);
+
+ ret = prepare_system_chunk_sb(super);
+ if (ret)
+ goto fail;
+
+ csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
+ ret = pwrite(fd, buf->data, BTRFS_SUPER_INFO_SIZE, sb_bytenr);
+ if (ret != BTRFS_SUPER_INFO_SIZE)
+ goto fail;
+
+ ret = 0;
+fail:
+ free(buf);
+ if (ret > 0)
+ ret = -1;
+ return ret;
+}
+
+static int relocate_one_reference(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 extent_start, u64 extent_size,
+ struct btrfs_key *extent_key,
+ struct extent_io_tree *reloc_tree)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_key key;
+ struct btrfs_path path;
+ struct btrfs_inode_item inode;
+ struct blk_iterate_data data;
+ u64 bytenr;
+ u64 num_bytes;
+ u64 cur_offset;
+ u64 new_pos;
+ u64 nbytes;
+ u64 sector_end;
+ u32 sectorsize = root->sectorsize;
+ unsigned long ptr;
+ int datacsum;
+ int fd;
+ int ret;
+
+ btrfs_init_path(&path);
+ ret = btrfs_search_slot(trans, root, extent_key, &path, -1, 1);
+ if (ret)
+ goto fail;
+
+ leaf = path.nodes[0];
+ fi = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_file_extent_item);
+ BUG_ON(btrfs_file_extent_offset(leaf, fi) > 0);
+ if (extent_start != btrfs_file_extent_disk_bytenr(leaf, fi) ||
+ extent_size != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
+ ret = 1;
+ goto fail;
+ }
+
+ bytenr = extent_start + btrfs_file_extent_offset(leaf, fi);
+ num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
+
+ ret = btrfs_del_item(trans, root, &path);
+ if (ret)
+ goto fail;
+
+ ret = btrfs_free_extent(trans, root, extent_start, extent_size, 0,
+ root->root_key.objectid,
+ extent_key->objectid, extent_key->offset);
+ if (ret)
+ goto fail;
+
+ btrfs_release_path(&path);
+
+ key.objectid = extent_key->objectid;
+ key.offset = 0;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ ret = btrfs_lookup_inode(trans, root, &path, &key, 0);
+ if (ret)
+ goto fail;
+
+ leaf = path.nodes[0];
+ ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
+ read_extent_buffer(leaf, &inode, ptr, sizeof(inode));
+ btrfs_release_path(&path);
+
+ BUG_ON(num_bytes & (sectorsize - 1));
+ nbytes = btrfs_stack_inode_nbytes(&inode) - num_bytes;
+ btrfs_set_stack_inode_nbytes(&inode, nbytes);
+ datacsum = !(btrfs_stack_inode_flags(&inode) & BTRFS_INODE_NODATASUM);
+
+ init_blk_iterate_data(&data, trans, root, &inode, extent_key->objectid,
+ datacsum);
+ data.first_block = extent_key->offset;
+
+ cur_offset = extent_key->offset;
+ while (num_bytes > 0) {
+ sector_end = bytenr + sectorsize - 1;
+ if (test_range_bit(reloc_tree, bytenr, sector_end,
+ EXTENT_LOCKED, 1)) {
+ ret = get_state_private(reloc_tree, bytenr, &new_pos);
+ BUG_ON(ret);
+ } else {
+ ret = custom_alloc_extent(root, sectorsize, 0, &key, 0);
+ if (ret)
+ goto fail;
+ new_pos = key.objectid;
+
+ if (cur_offset == extent_key->offset) {
+ fd = root->fs_info->fs_devices->latest_bdev;
+ readahead(fd, bytenr, num_bytes);
+ }
+ ret = copy_disk_extent(root, new_pos, bytenr,
+ sectorsize);
+ if (ret)
+ goto fail;
+ ret = set_extent_bits(reloc_tree, bytenr, sector_end,
+ EXTENT_LOCKED, GFP_NOFS);
+ BUG_ON(ret);
+ ret = set_state_private(reloc_tree, bytenr, new_pos);
+ BUG_ON(ret);
+ }
+
+ ret = block_iterate_proc(new_pos / sectorsize,
+ cur_offset / sectorsize, &data);
+ if (ret < 0)
+ goto fail;
+
+ cur_offset += sectorsize;
+ bytenr += sectorsize;
+ num_bytes -= sectorsize;
+ }
+
+ if (data.num_blocks > 0) {
+ ret = record_file_blocks(&data, data.first_block,
+ data.disk_block, data.num_blocks);
+ if (ret)
+ goto fail;
+ }
+
+ key.objectid = extent_key->objectid;
+ key.offset = 0;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ ret = btrfs_lookup_inode(trans, root, &path, &key, 1);
+ if (ret)
+ goto fail;
+
+ leaf = path.nodes[0];
+ ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
+ write_extent_buffer(leaf, &inode, ptr, sizeof(inode));
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(&path);
+
+fail:
+ btrfs_release_path(&path);
+ return ret;
+}
+
+static int relocate_extents_range(struct btrfs_root *fs_root,
+ struct btrfs_root *image_root,
+ u64 start_byte, u64 end_byte)
+{
+ struct btrfs_fs_info *info = fs_root->fs_info;
+ struct btrfs_root *extent_root = info->extent_root;
+ struct btrfs_root *cur_root = NULL;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_extent_data_ref *dref;
+ struct btrfs_extent_inline_ref *iref;
+ struct btrfs_extent_item *ei;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ struct btrfs_key extent_key;
+ struct btrfs_path path;
+ struct extent_io_tree reloc_tree;
+ unsigned long ptr;
+ unsigned long end;
+ u64 cur_byte;
+ u64 num_bytes;
+ u64 ref_root;
+ u64 num_extents;
+ int pass = 0;
+ int ret;
+
+ btrfs_init_path(&path);
+ extent_io_tree_init(&reloc_tree);
+
+ key.objectid = start_byte;
+ key.offset = 0;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
+ if (ret < 0)
+ goto fail;
+ if (ret > 0) {
+ ret = btrfs_previous_item(extent_root, &path, 0,
+ BTRFS_EXTENT_ITEM_KEY);
+ if (ret < 0)
+ goto fail;
+ if (ret == 0) {
+ leaf = path.nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+ if (key.objectid + key.offset > start_byte)
+ start_byte = key.objectid;
+ }
+ }
+ btrfs_release_path(&path);
+again:
+ cur_root = (pass % 2 == 0) ? image_root : fs_root;
+ num_extents = 0;
+
+ trans = btrfs_start_transaction(cur_root, 1);
+ BUG_ON(!trans);
+
+ cur_byte = start_byte;
+ while (1) {
+ key.objectid = cur_byte;
+ key.offset = 0;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ ret = btrfs_search_slot(trans, extent_root,
+ &key, &path, 0, 0);
+ if (ret < 0)
+ goto fail;
+next:
+ leaf = path.nodes[0];
+ if (path.slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(extent_root, &path);
+ if (ret < 0)
+ goto fail;
+ if (ret > 0)
+ break;
+ leaf = path.nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+ if (key.objectid < cur_byte ||
+ key.type != BTRFS_EXTENT_ITEM_KEY) {
+ path.slots[0]++;
+ goto next;
+ }
+ if (key.objectid >= end_byte)
+ break;
+
+ num_extents++;
+
+ cur_byte = key.objectid;
+ num_bytes = key.offset;
+ ei = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_extent_item);
+ BUG_ON(!(btrfs_extent_flags(leaf, ei) &
+ BTRFS_EXTENT_FLAG_DATA));
+
+ ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
+ end = ptr + btrfs_item_size_nr(leaf, path.slots[0]);
+
+ ptr += sizeof(struct btrfs_extent_item);
+
+ while (ptr < end) {
+ iref = (struct btrfs_extent_inline_ref *)ptr;
+ key.type = btrfs_extent_inline_ref_type(leaf, iref);
+ BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY);
+ dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+ ref_root = btrfs_extent_data_ref_root(leaf, dref);
+ extent_key.objectid =
+ btrfs_extent_data_ref_objectid(leaf, dref);
+ extent_key.offset =
+ btrfs_extent_data_ref_offset(leaf, dref);
+ extent_key.type = BTRFS_EXTENT_DATA_KEY;
+ BUG_ON(btrfs_extent_data_ref_count(leaf, dref) != 1);
+
+ if (ref_root == cur_root->root_key.objectid)
+ break;
+
+ ptr += btrfs_extent_inline_ref_size(key.type);
+ }
+
+ if (ptr >= end) {
+ path.slots[0]++;
+ goto next;
+ }
+
+ ret = relocate_one_reference(trans, cur_root, cur_byte,
+ num_bytes, &extent_key,
+ &reloc_tree);
+ if (ret < 0)
+ goto fail;
+
+ cur_byte += num_bytes;
+ btrfs_release_path(&path);
+
+ if (trans->blocks_used >= 4096) {
+ ret = btrfs_commit_transaction(trans, cur_root);
+ BUG_ON(ret);
+ trans = btrfs_start_transaction(cur_root, 1);
+ BUG_ON(!trans);
+ }
+ }
+ btrfs_release_path(&path);
+
+ ret = btrfs_commit_transaction(trans, cur_root);
+ BUG_ON(ret);
+
+ if (num_extents > 0 && pass++ < 16)
+ goto again;
+
+ ret = (num_extents > 0) ? -1 : 0;
+fail:
+ btrfs_release_path(&path);
+ extent_io_tree_cleanup(&reloc_tree);
+ return ret;
+}
+
+/*
+ * relocate data in system chunk
+ */
+static int cleanup_sys_chunk(struct btrfs_root *fs_root,
+ struct btrfs_root *image_root)
+{
+ struct btrfs_block_group_cache *cache;
+ int i, ret = 0;
+ u64 offset = 0;
+ u64 end_byte;
+
+ while(1) {
+ cache = btrfs_lookup_block_group(fs_root->fs_info, offset);
+ if (!cache)
+ break;
+
+ end_byte = cache->key.objectid + cache->key.offset;
+ if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
+ ret = relocate_extents_range(fs_root, image_root,
+ cache->key.objectid,
+ end_byte);
+ if (ret)
+ goto fail;
+ }
+ offset = end_byte;
+ }
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ offset = btrfs_sb_offset(i);
+ offset &= ~((u64)BTRFS_STRIPE_LEN - 1);
+
+ ret = relocate_extents_range(fs_root, image_root,
+ offset, offset + BTRFS_STRIPE_LEN);
+ if (ret)
+ goto fail;
+ }
+ ret = 0;
+fail:
+ return ret;
+}
+
+static int fixup_chunk_mapping(struct btrfs_root *root)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_root *chunk_root = info->chunk_root;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ struct btrfs_path path;
+ struct btrfs_chunk chunk;
+ unsigned long ptr;
+ u32 size;
+ u64 type;
+ int ret;
+
+ btrfs_init_path(&path);
+
+ trans = btrfs_start_transaction(root, 1);
+ BUG_ON(!trans);
+
+ /*
+ * recow the whole chunk tree. this will move all chunk tree blocks
+ * into system block group.
+ */
+ memset(&key, 0, sizeof(key));
+ while (1) {
+ ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
+ if (ret < 0)
+ goto err;
+
+ ret = btrfs_next_leaf(chunk_root, &path);
+ if (ret < 0)
+ goto err;
+ if (ret > 0)
+ break;
+
+ btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+ btrfs_release_path(&path);
+ }
+ btrfs_release_path(&path);
+
+ /* fixup the system chunk array in super block */
+ btrfs_set_super_sys_array_size(info->super_copy, 0);
+
+ key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+ key.offset = 0;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+
+ ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 0);
+ if (ret < 0)
+ goto err;
+ BUG_ON(ret != 0);
+ while(1) {
+ leaf = path.nodes[0];
+ if (path.slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(chunk_root, &path);
+ if (ret < 0)
+ goto err;
+ if (ret > 0)
+ break;
+ leaf = path.nodes[0];
+ }
+ btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+ if (key.type != BTRFS_CHUNK_ITEM_KEY)
+ goto next;
+
+ ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
+ size = btrfs_item_size_nr(leaf, path.slots[0]);
+ BUG_ON(size != sizeof(chunk));
+ read_extent_buffer(leaf, &chunk, ptr, size);
+ type = btrfs_stack_chunk_type(&chunk);
+
+ if (!(type & BTRFS_BLOCK_GROUP_SYSTEM))
+ goto next;
+
+ ret = btrfs_add_system_chunk(trans, chunk_root, &key,
+ &chunk, size);
+ if (ret)
+ goto err;
+next:
+ path.slots[0]++;
+ }
+
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+err:
+ btrfs_release_path(&path);
+ return ret;
+}
+
+static const struct btrfs_convert_operations ext2_convert_ops = {
+ .name = "ext2",
+ .open_fs = ext2_open_fs,
+ .alloc_block = ext2_alloc_block,
+ .alloc_block_range = ext2_alloc_block_range,
+ .copy_inodes = ext2_copy_inodes,
+ .test_block = ext2_test_block,
+ .free_block = ext2_free_block,
+ .free_block_range = ext2_free_block_range,
+ .close_fs = ext2_close_fs,
+};
+
+static const struct btrfs_convert_operations *convert_operations[] = {
+ &ext2_convert_ops,
+};
+
+static int convert_open_fs(const char *devname,
+ struct btrfs_convert_context *cctx)
+{
+ int i;
+
+ memset(cctx, 0, sizeof(*cctx));
+
+ for (i = 0; i < ARRAY_SIZE(convert_operations); i++) {
+ int ret = convert_operations[i]->open_fs(cctx, devname);
+
+ if (ret == 0) {
+ cctx->convert_ops = convert_operations[i];
+ return ret;
+ }
+ }
+
+ fprintf(stderr, "No file system found to convert.\n");
+ return -1;
+}
+
+static int do_convert(const char *devname, int datacsum, int packing, int noxattr,
+ u32 nodesize, int copylabel, const char *fslabel, int progress,
+ u64 features)
+{
+ int i, ret, blocks_per_node;
+ int fd = -1;
+ int is_btrfs = 0;
+ u32 blocksize;
+ u64 blocks[7];
+ u64 total_bytes;
+ u64 super_bytenr;
+ struct btrfs_root *root;
+ struct btrfs_root *image_root;
+ struct btrfs_convert_context cctx;
+ char *subvol_name = NULL;
+ struct task_ctx ctx;
+ char features_buf[64];
+ struct btrfs_mkfs_config mkfs_cfg;
+
+ init_convert_context(&cctx);
+ ret = convert_open_fs(devname, &cctx);
+ if (ret)
+ goto fail;
+
+ blocksize = cctx.blocksize;
+ total_bytes = (u64)blocksize * (u64)cctx.block_count;
+ if (blocksize < 4096) {
+ fprintf(stderr, "block size is too small\n");
+ goto fail;
+ }
+ if (btrfs_check_nodesize(nodesize, blocksize, features))
+ goto fail;
+ blocks_per_node = nodesize / blocksize;
+ ret = -blocks_per_node;
+ for (i = 0; i < 7; i++) {
+ if (nodesize == blocksize)
+ ret = convert_alloc_block(&cctx, 0, blocks + i);
+ else
+ ret = convert_alloc_block_range(&cctx,
+ ret + blocks_per_node, blocks_per_node,
+ blocks + i);
+ if (ret) {
+ fprintf(stderr, "not enough free space\n");
+ goto fail;
+ }
+ blocks[i] *= blocksize;
+ }
+ super_bytenr = blocks[0];
+ fd = open(devname, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "unable to open %s\n", devname);
+ goto fail;
+ }
+ btrfs_parse_features_to_string(features_buf, features);
+ if (features == BTRFS_MKFS_DEFAULT_FEATURES)
+ strcat(features_buf, " (default)");
+
+ printf("create btrfs filesystem:\n");
+ printf("\tblocksize: %u\n", blocksize);
+ printf("\tnodesize: %u\n", nodesize);
+ printf("\tfeatures: %s\n", features_buf);
+
+ mkfs_cfg.label = cctx.volume_name;
+ mkfs_cfg.fs_uuid = NULL;
+ memcpy(mkfs_cfg.blocks, blocks, sizeof(blocks));
+ mkfs_cfg.num_bytes = total_bytes;
+ mkfs_cfg.nodesize = nodesize;
+ mkfs_cfg.sectorsize = blocksize;
+ mkfs_cfg.stripesize = blocksize;
+ mkfs_cfg.features = features;
+
+ ret = make_btrfs(fd, &mkfs_cfg);
+ if (ret) {
+ fprintf(stderr, "unable to create initial ctree: %s\n",
+ strerror(-ret));
+ goto fail;
+ }
+ /* create a system chunk that maps the whole device */
+ ret = prepare_system_chunk(fd, super_bytenr);
+ if (ret) {
+ fprintf(stderr, "unable to update system chunk\n");
+ goto fail;
+ }
+ root = open_ctree_fd(fd, devname, super_bytenr, OPEN_CTREE_WRITES);
+ if (!root) {
+ fprintf(stderr, "unable to open ctree\n");
+ goto fail;
+ }
+ ret = cache_free_extents(root, &cctx);
+ if (ret) {
+ fprintf(stderr, "error during cache_free_extents %d\n", ret);
+ goto fail;
+ }
+ root->fs_info->extent_ops = &extent_ops;
+ /* recover block allocation bitmap */
+ for (i = 0; i < 7; i++) {
+ blocks[i] /= blocksize;
+ if (nodesize == blocksize)
+ convert_free_block(&cctx, blocks[i]);
+ else
+ convert_free_block_range(&cctx, blocks[i],
+ blocks_per_node);
+ }
+ ret = init_btrfs(root);
+ if (ret) {
+ fprintf(stderr, "unable to setup the root tree\n");
+ goto fail;
+ }
+ printf("creating btrfs metadata.\n");
+ ctx.max_copy_inodes = (cctx.inodes_count - cctx.free_inodes_count);
+ ctx.cur_copy_inodes = 0;
+
+ if (progress) {
+ ctx.info = task_init(print_copied_inodes, after_copied_inodes, &ctx);
+ task_start(ctx.info);
+ }
+ ret = copy_inodes(&cctx, root, datacsum, packing, noxattr, &ctx);
+ if (ret) {
+ fprintf(stderr, "error during copy_inodes %d\n", ret);
+ goto fail;
+ }
+ if (progress) {
+ task_stop(ctx.info);
+ task_deinit(ctx.info);
+ }
+
+ printf("creating %s image file.\n", cctx.convert_ops->name);
+ ret = asprintf(&subvol_name, "%s_saved", cctx.convert_ops->name);
+ if (ret < 0) {
+ fprintf(stderr, "error allocating subvolume name: %s_saved\n",
+ cctx.convert_ops->name);
+ goto fail;
+ }
+
+ image_root = link_subvol(root, subvol_name, CONV_IMAGE_SUBVOL_OBJECTID);
+
+ free(subvol_name);
+
+ if (!image_root) {
+ fprintf(stderr, "unable to create subvol\n");
+ goto fail;
+ }
+ ret = create_image(&cctx, image_root, "image", datacsum);
+ if (ret) {
+ fprintf(stderr, "error during create_image %d\n", ret);
+ goto fail;
+ }
+ memset(root->fs_info->super_copy->label, 0, BTRFS_LABEL_SIZE);
+ if (copylabel == 1) {
+ strncpy(root->fs_info->super_copy->label,
+ cctx.volume_name, BTRFS_LABEL_SIZE);
+ fprintf(stderr, "copy label '%s'\n",
+ root->fs_info->super_copy->label);
+ } else if (copylabel == -1) {
+ strcpy(root->fs_info->super_copy->label, fslabel);
+ fprintf(stderr, "set label to '%s'\n", fslabel);
+ }
+
+ printf("cleaning up system chunk.\n");
+ ret = cleanup_sys_chunk(root, image_root);
+ if (ret) {
+ fprintf(stderr, "error during cleanup_sys_chunk %d\n", ret);
+ goto fail;
+ }
+ ret = close_ctree(root);
+ if (ret) {
+ fprintf(stderr, "error during close_ctree %d\n", ret);
+ goto fail;
+ }
+ convert_close_fs(&cctx);
+ clean_convert_context(&cctx);
+
+ /*
+ * If this step succeed, we get a mountable btrfs. Otherwise
+ * the source fs is left unchanged.
+ */
+ ret = migrate_super_block(fd, super_bytenr, blocksize);
+ if (ret) {
+ fprintf(stderr, "unable to migrate super block\n");
+ goto fail;
+ }
+ is_btrfs = 1;
+
+ root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES);
+ if (!root) {
+ fprintf(stderr, "unable to open ctree\n");
+ goto fail;
+ }
+ /* move chunk tree into system chunk. */
+ ret = fixup_chunk_mapping(root);
+ if (ret) {
+ fprintf(stderr, "error during fixup_chunk_tree\n");
+ goto fail;
+ }
+ ret = close_ctree(root);
+ close(fd);
+
+ printf("conversion complete.\n");
+ return 0;
+fail:
+ clean_convert_context(&cctx);
+ if (fd != -1)
+ close(fd);
+ if (is_btrfs)
+ fprintf(stderr,
+ "WARNING: an error occured during chunk mapping fixup, filesystem mountable but not finalized\n");
+ else
+ fprintf(stderr, "conversion aborted\n");
+ return -1;
+}
+
+static int may_rollback(struct btrfs_root *root)
+{
+ struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_multi_bio *multi = NULL;
+ u64 bytenr;
+ u64 length;
+ u64 physical;
+ u64 total_bytes;
+ int num_stripes;
+ int ret;
+
+ if (btrfs_super_num_devices(info->super_copy) != 1)
+ goto fail;
+
+ bytenr = BTRFS_SUPER_INFO_OFFSET;
+ total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
+
+ while (1) {
+ ret = btrfs_map_block(&info->mapping_tree, WRITE, bytenr,
+ &length, &multi, 0, NULL);
+ if (ret) {
+ if (ret == -ENOENT) {
+ /* removed block group at the tail */
+ if (length == (u64)-1)
+ break;
+
+ /* removed block group in the middle */
+ goto next;
+ }
+ goto fail;
+ }
+
+ num_stripes = multi->num_stripes;
+ physical = multi->stripes[0].physical;
+ kfree(multi);
+
+ if (num_stripes != 1 || physical != bytenr)
+ goto fail;
+next:
+ bytenr += length;
+ if (bytenr >= total_bytes)
+ break;
+ }
+ return 0;
+fail:
+ return -1;
+}
+
+static int do_rollback(const char *devname)
+{
+ int fd = -1;
+ int ret;
+ int i;
+ struct btrfs_root *root;
+ struct btrfs_root *image_root;
+ struct btrfs_root *chunk_root;
+ struct btrfs_dir_item *dir;
+ struct btrfs_inode_item *inode;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_trans_handle *trans;
+ struct extent_buffer *leaf;
+ struct btrfs_block_group_cache *cache1;
+ struct btrfs_block_group_cache *cache2;
+ struct btrfs_key key;
+ struct btrfs_path path;
+ struct extent_io_tree io_tree;
+ char *buf = NULL;
+ char *name;
+ u64 bytenr;
+ u64 num_bytes;
+ u64 root_dir;
+ u64 objectid;
+ u64 offset;
+ u64 start;
+ u64 end;
+ u64 sb_bytenr;
+ u64 first_free;
+ u64 total_bytes;
+ u32 sectorsize;
+
+ extent_io_tree_init(&io_tree);
+
+ fd = open(devname, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "unable to open %s\n", devname);
+ goto fail;
+ }
+ root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES);
+ if (!root) {
+ fprintf(stderr, "unable to open ctree\n");
+ goto fail;
+ }
+ ret = may_rollback(root);
+ if (ret < 0) {
+ fprintf(stderr, "unable to do rollback\n");
+ goto fail;
+ }
+
+ sectorsize = root->sectorsize;
+ buf = malloc(sectorsize);
+ if (!buf) {
+ fprintf(stderr, "unable to allocate memory\n");
+ goto fail;
+ }
+
+ btrfs_init_path(&path);
+
+ key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
+ key.type = BTRFS_ROOT_BACKREF_KEY;
+ key.offset = BTRFS_FS_TREE_OBJECTID;
+ ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path, 0,
+ 0);
+ btrfs_release_path(&path);
+ if (ret > 0) {
+ fprintf(stderr,
+ "ERROR: unable to convert ext2 image subvolume, is it deleted?\n");
+ goto fail;
+ } else if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: unable to open ext2_subvol, id=%llu: %s\n",
+ (unsigned long long)key.objectid, strerror(-ret));
+ goto fail;
+ }
+
+ key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ image_root = btrfs_read_fs_root(root->fs_info, &key);
+ if (!image_root || IS_ERR(image_root)) {
+ fprintf(stderr, "unable to open subvol %llu\n",
+ (unsigned long long)key.objectid);
+ goto fail;
+ }
+
+ name = "image";
+ root_dir = btrfs_root_dirid(&root->root_item);
+ dir = btrfs_lookup_dir_item(NULL, image_root, &path,
+ root_dir, name, strlen(name), 0);
+ if (!dir || IS_ERR(dir)) {
+ fprintf(stderr, "unable to find file %s\n", name);
+ goto fail;
+ }
+ leaf = path.nodes[0];
+ btrfs_dir_item_key_to_cpu(leaf, dir, &key);
+ btrfs_release_path(&path);
+
+ objectid = key.objectid;
+
+ ret = btrfs_lookup_inode(NULL, image_root, &path, &key, 0);
+ if (ret) {
+ fprintf(stderr, "unable to find inode item\n");
+ goto fail;
+ }
+ leaf = path.nodes[0];
+ inode = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_inode_item);
+ total_bytes = btrfs_inode_size(leaf, inode);
+ btrfs_release_path(&path);
+
+ key.objectid = objectid;
+ key.offset = 0;
+ btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
+ ret = btrfs_search_slot(NULL, image_root, &key, &path, 0, 0);
+ if (ret != 0) {
+ fprintf(stderr, "unable to find first file extent\n");
+ btrfs_release_path(&path);
+ goto fail;
+ }
+
+ /* build mapping tree for the relocated blocks */
+ for (offset = 0; offset < total_bytes; ) {
+ leaf = path.nodes[0];
+ if (path.slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, &path);
+ if (ret != 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+ if (key.objectid != objectid || key.offset != offset ||
+ btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
+ break;
+
+ fi = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
+ break;
+ if (btrfs_file_extent_compression(leaf, fi) ||
+ btrfs_file_extent_encryption(leaf, fi) ||
+ btrfs_file_extent_other_encoding(leaf, fi))
+ break;
+
+ bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ /* skip holes and direct mapped extents */
+ if (bytenr == 0 || bytenr == offset)
+ goto next_extent;
+
+ bytenr += btrfs_file_extent_offset(leaf, fi);
+ num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
+
+ cache1 = btrfs_lookup_block_group(root->fs_info, offset);
+ cache2 = btrfs_lookup_block_group(root->fs_info,
+ offset + num_bytes - 1);
+ if (!cache1 || cache1 != cache2 ||
+ (!(cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
+ !intersect_with_sb(offset, num_bytes)))
+ break;
+
+ set_extent_bits(&io_tree, offset, offset + num_bytes - 1,
+ EXTENT_LOCKED, GFP_NOFS);
+ set_state_private(&io_tree, offset, bytenr);
+next_extent:
+ offset += btrfs_file_extent_num_bytes(leaf, fi);
+ path.slots[0]++;
+ }
+ btrfs_release_path(&path);
+
+ if (offset < total_bytes) {
+ fprintf(stderr, "unable to build extent mapping\n");
+ goto fail;
+ }
+
+ first_free = BTRFS_SUPER_INFO_OFFSET + 2 * sectorsize - 1;
+ first_free &= ~((u64)sectorsize - 1);
+ /* backup for extent #0 should exist */
+ if(!test_range_bit(&io_tree, 0, first_free - 1, EXTENT_LOCKED, 1)) {
+ fprintf(stderr, "no backup for the first extent\n");
+ goto fail;
+ }
+ /* force no allocation from system block group */
+ root->fs_info->system_allocs = -1;
+ trans = btrfs_start_transaction(root, 1);
+ BUG_ON(!trans);
+ /*
+ * recow the whole chunk tree, this will remove all chunk tree blocks
+ * from system block group
+ */
+ chunk_root = root->fs_info->chunk_root;
+ memset(&key, 0, sizeof(key));
+ while (1) {
+ ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
+ if (ret < 0)
+ break;
+
+ ret = btrfs_next_leaf(chunk_root, &path);
+ if (ret)
+ break;
+
+ btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+ btrfs_release_path(&path);
+ }
+ btrfs_release_path(&path);
+
+ offset = 0;
+ num_bytes = 0;
+ while(1) {
+ cache1 = btrfs_lookup_block_group(root->fs_info, offset);
+ if (!cache1)
+ break;
+
+ if (cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM)
+ num_bytes += btrfs_block_group_used(&cache1->item);
+
+ offset = cache1->key.objectid + cache1->key.offset;
+ }
+ /* only extent #0 left in system block group? */
+ if (num_bytes > first_free) {
+ fprintf(stderr, "unable to empty system block group\n");
+ goto fail;
+ }
+ /* create a system chunk that maps the whole device */
+ ret = prepare_system_chunk_sb(root->fs_info->super_copy);
+ if (ret) {
+ fprintf(stderr, "unable to update system chunk\n");
+ goto fail;
+ }
+
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+
+ ret = close_ctree(root);
+ if (ret) {
+ fprintf(stderr, "error during close_ctree %d\n", ret);
+ goto fail;
+ }
+
+ /* zero btrfs super block mirrors */
+ memset(buf, 0, sectorsize);
+ for (i = 1 ; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ if (bytenr >= total_bytes)
+ break;
+ ret = pwrite(fd, buf, sectorsize, bytenr);
+ if (ret != sectorsize) {
+ fprintf(stderr,
+ "error during zeroing supreblock %d: %d\n",
+ i, ret);
+ goto fail;
+ }
+ }
+
+ sb_bytenr = (u64)-1;
+ /* copy all relocated blocks back */
+ while(1) {
+ ret = find_first_extent_bit(&io_tree, 0, &start, &end,
+ EXTENT_LOCKED);
+ if (ret)
+ break;
+
+ ret = get_state_private(&io_tree, start, &bytenr);
+ BUG_ON(ret);
+
+ clear_extent_bits(&io_tree, start, end, EXTENT_LOCKED,
+ GFP_NOFS);
+
+ while (start <= end) {
+ if (start == BTRFS_SUPER_INFO_OFFSET) {
+ sb_bytenr = bytenr;
+ goto next_sector;
+ }
+ ret = pread(fd, buf, sectorsize, bytenr);
+ if (ret < 0) {
+ fprintf(stderr, "error during pread %d\n", ret);
+ goto fail;
+ }
+ BUG_ON(ret != sectorsize);
+ ret = pwrite(fd, buf, sectorsize, start);
+ if (ret < 0) {
+ fprintf(stderr, "error during pwrite %d\n", ret);
+ goto fail;
+ }
+ BUG_ON(ret != sectorsize);
+next_sector:
+ start += sectorsize;
+ bytenr += sectorsize;
+ }
+ }
+
+ ret = fsync(fd);
+ if (ret) {
+ fprintf(stderr, "error during fsync %d\n", ret);
+ goto fail;
+ }
+ /*
+ * finally, overwrite btrfs super block.
+ */
+ ret = pread(fd, buf, sectorsize, sb_bytenr);
+ if (ret < 0) {
+ fprintf(stderr, "error during pread %d\n", ret);
+ goto fail;
+ }
+ BUG_ON(ret != sectorsize);
+ ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET);
+ if (ret < 0) {
+ fprintf(stderr, "error during pwrite %d\n", ret);
+ goto fail;
+ }
+ BUG_ON(ret != sectorsize);
+ ret = fsync(fd);
+ if (ret) {
+ fprintf(stderr, "error during fsync %d\n", ret);
+ goto fail;
+ }
+
+ close(fd);
+ free(buf);
+ extent_io_tree_cleanup(&io_tree);
+ printf("rollback complete.\n");
+ return 0;
+
+fail:
+ if (fd != -1)
+ close(fd);
+ free(buf);
+ fprintf(stderr, "rollback aborted.\n");
+ return -1;
+}
+
+static void print_usage(void)
+{
+ printf("usage: btrfs-convert [options] device\n");
+ printf("options:\n");
+ printf("\t-d|--no-datasum disable data checksum, sets NODATASUM\n");
+ printf("\t-i|--no-xattr ignore xattrs and ACLs\n");
+ printf("\t-n|--no-inline disable inlining of small files to metadata\n");
+ printf("\t-N|--nodesize SIZE set filesystem metadata nodesize\n");
+ printf("\t-r|--rollback roll back to the original filesystem\n");
+ printf("\t-l|--label LABEL set filesystem label\n");
+ printf("\t-L|--copy-label use label from converted filesystem\n");
+ printf("\t-p|--progress show converting progress (default)\n");
+ printf("\t-O|--features LIST comma separated list of filesystem features\n");
+ printf("\t--no-progress show only overview, not the detailed progress\n");
+}
+
+int main(int argc, char *argv[])
+{
+ int ret;
+ int packing = 1;
+ int noxattr = 0;
+ int datacsum = 1;
+ u32 nodesize = max_t(u32, sysconf(_SC_PAGESIZE),
+ BTRFS_MKFS_DEFAULT_NODE_SIZE);
+ int rollback = 0;
+ int copylabel = 0;
+ int usage_error = 0;
+ int progress = 1;
+ char *file;
+ char fslabel[BTRFS_LABEL_SIZE];
+ u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
+
+ while(1) {
+ enum { GETOPT_VAL_NO_PROGRESS = 256 };
+ static const struct option long_options[] = {
+ { "no-progress", no_argument, NULL,
+ GETOPT_VAL_NO_PROGRESS },
+ { "no-datasum", no_argument, NULL, 'd' },
+ { "no-inline", no_argument, NULL, 'n' },
+ { "no-xattr", no_argument, NULL, 'i' },
+ { "rollback", no_argument, NULL, 'r' },
+ { "features", required_argument, NULL, 'O' },
+ { "progress", no_argument, NULL, 'p' },
+ { "label", required_argument, NULL, 'l' },
+ { "copy-label", no_argument, NULL, 'L' },
+ { "nodesize", required_argument, NULL, 'N' },
+ { "help", no_argument, NULL, GETOPT_VAL_HELP},
+ { NULL, 0, NULL, 0 }
+ };
+ int c = getopt_long(argc, argv, "dinN:rl:LpO:", long_options, NULL);
+
+ if (c < 0)
+ break;
+ switch(c) {
+ case 'd':
+ datacsum = 0;
+ break;
+ case 'i':
+ noxattr = 1;
+ break;
+ case 'n':
+ packing = 0;
+ break;
+ case 'N':
+ nodesize = parse_size(optarg);
+ break;
+ case 'r':
+ rollback = 1;
+ break;
+ case 'l':
+ copylabel = -1;
+ if (strlen(optarg) >= BTRFS_LABEL_SIZE) {
+ fprintf(stderr,
+ "WARNING: label too long, trimmed to %d bytes\n",
+ BTRFS_LABEL_SIZE - 1);
+ }
+ strncpy(fslabel, optarg, BTRFS_LABEL_SIZE - 1);
+ fslabel[BTRFS_LABEL_SIZE - 1] = 0;
+ break;
+ case 'L':
+ copylabel = 1;
+ break;
+ case 'p':
+ progress = 1;
+ break;
+ case 'O': {
+ char *orig = strdup(optarg);
+ char *tmp = orig;
+
+ tmp = btrfs_parse_fs_features(tmp, &features);
+ if (tmp) {
+ fprintf(stderr,
+ "Unrecognized filesystem feature '%s'\n",
+ tmp);
+ free(orig);
+ exit(1);
+ }
+ free(orig);
+ if (features & BTRFS_FEATURE_LIST_ALL) {
+ btrfs_list_all_fs_features(
+ ~BTRFS_CONVERT_ALLOWED_FEATURES);
+ exit(0);
+ }
+ if (features & ~BTRFS_CONVERT_ALLOWED_FEATURES) {
+ char buf[64];
+
+ btrfs_parse_features_to_string(buf,
+ features & ~BTRFS_CONVERT_ALLOWED_FEATURES);
+ fprintf(stderr,
+ "ERROR: features not allowed for convert: %s\n",
+ buf);
+ exit(1);
+ }
+
+ break;
+ }
+ case GETOPT_VAL_NO_PROGRESS:
+ progress = 0;
+ break;
+ case GETOPT_VAL_HELP:
+ default:
+ print_usage();
+ return c != GETOPT_VAL_HELP;
+ }
+ }
+ argc = argc - optind;
+ set_argv0(argv);
+ if (check_argc_exact(argc, 1)) {
+ print_usage();
+ return 1;
+ }
+
+ if (rollback && (!datacsum || noxattr || !packing)) {
+ fprintf(stderr,
+ "Usage error: -d, -i, -n options do not apply to rollback\n");
+ usage_error++;
+ }
+
+ if (usage_error) {
+ print_usage();
+ return 1;
+ }
+
+ file = argv[optind];
+ ret = check_mounted(file);
+ if (ret < 0) {
+ fprintf(stderr, "Could not check mount status: %s\n",
+ strerror(-ret));
+ return 1;
+ } else if (ret) {
+ fprintf(stderr, "%s is mounted\n", file);
+ return 1;
+ }
+
+ if (rollback) {
+ ret = do_rollback(file);
+ } else {
+ ret = do_convert(file, datacsum, packing, noxattr, nodesize,
+ copylabel, fslabel, progress, features);
+ }
+ if (ret)
+ return 1;
+ return 0;
+}
diff --git a/btrfs-corrupt-block.c b/btrfs-corrupt-block.c
new file mode 100644
index 00000000..be5cd7ea
--- /dev/null
+++ b/btrfs-corrupt-block.c
@@ -0,0 +1,1310 @@
+/*
+ * Copyright (C) 2009 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <limits.h>
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "volumes.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "utils.h"
+
+#define FIELD_BUF_LEN 80
+
+static struct extent_buffer *debug_corrupt_block(struct btrfs_root *root,
+ u64 bytenr, u32 blocksize, u64 copy)
+{
+ int ret;
+ struct extent_buffer *eb;
+ u64 length;
+ struct btrfs_multi_bio *multi = NULL;
+ struct btrfs_device *device;
+ int num_copies;
+ int mirror_num = 1;
+
+ eb = btrfs_find_create_tree_block(root->fs_info, bytenr, blocksize);
+ if (!eb)
+ return NULL;
+
+ length = blocksize;
+ while (1) {
+ ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
+ eb->start, &length, &multi,
+ mirror_num, NULL);
+ BUG_ON(ret);
+ device = multi->stripes[0].dev;
+ eb->fd = device->fd;
+ device->total_ios++;
+ eb->dev_bytenr = multi->stripes[0].physical;
+
+ fprintf(stdout,
+ "mirror %d logical %llu physical %llu device %s\n",
+ mirror_num, (unsigned long long)bytenr,
+ (unsigned long long)eb->dev_bytenr, device->name);
+ kfree(multi);
+
+ if (!copy || mirror_num == copy) {
+ ret = read_extent_from_disk(eb, 0, eb->len);
+ printf("corrupting %llu copy %d\n", eb->start,
+ mirror_num);
+ memset(eb->data, 0, eb->len);
+ write_extent_to_disk(eb);
+ fsync(eb->fd);
+ }
+
+ num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
+ eb->start, eb->len);
+ if (num_copies == 1)
+ break;
+
+ mirror_num++;
+ if (mirror_num > num_copies)
+ break;
+ }
+ return eb;
+}
+
+static void print_usage(int ret)
+{
+ fprintf(stderr, "usage: btrfs-corrupt-block [options] device\n");
+ fprintf(stderr, "\t-l Logical extent to be corrupted\n");
+ fprintf(stderr, "\t-c Copy of the extent to be corrupted"
+ " (usually 1 or 2, default: 0)\n");
+ fprintf(stderr, "\t-b Number of bytes to be corrupted\n");
+ fprintf(stderr, "\t-e Extent to be corrupted\n");
+ fprintf(stderr, "\t-E The whole extent tree to be corrupted\n");
+ fprintf(stderr, "\t-u Given chunk item to be corrupted\n");
+ fprintf(stderr, "\t-U The whole chunk tree to be corrupted\n");
+ fprintf(stderr, "\t-i The inode item to corrupt (must also specify "
+ "the field to corrupt)\n");
+ fprintf(stderr, "\t-x The file extent item to corrupt (must also "
+ "specify -i for the inode and -f for the field to corrupt)\n");
+ fprintf(stderr, "\t-m The metadata block to corrupt (must also "
+ "specify -f for the field to corrupt)\n");
+ fprintf(stderr, "\t-K The key to corrupt in the format "
+ "<num>,<num>,<num> (must also specify -f for the field)\n");
+ fprintf(stderr, "\t-f The field in the item to corrupt\n");
+ fprintf(stderr, "\t-I An item to corrupt (must also specify the field "
+ "to corrupt and a root+key for the item)\n");
+ fprintf(stderr, "\t-D Corrupt a dir item, must specify key and field\n");
+ fprintf(stderr, "\t-d Delete this item (must specify -K)\n");
+ fprintf(stderr, "\t-r Operate on this root (only works with -d)\n");
+ fprintf(stderr, "\t-C Delete a csum for the specified bytenr. When "
+ "used with -b it'll delete that many bytes, otherwise it's "
+ "just sectorsize\n");
+ exit(ret);
+}
+
+static void corrupt_keys(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *eb)
+{
+ int slot;
+ int bad_slot;
+ int nr;
+ struct btrfs_disk_key bad_key;;
+
+ nr = btrfs_header_nritems(eb);
+ if (nr == 0)
+ return;
+
+ slot = rand() % nr;
+ bad_slot = rand() % nr;
+
+ if (bad_slot == slot)
+ return;
+
+ fprintf(stderr,
+ "corrupting keys in block %llu slot %d swapping with %d\n",
+ (unsigned long long)eb->start, slot, bad_slot);
+
+ if (btrfs_header_level(eb) == 0) {
+ btrfs_item_key(eb, &bad_key, bad_slot);
+ btrfs_set_item_key(eb, &bad_key, slot);
+ } else {
+ btrfs_node_key(eb, &bad_key, bad_slot);
+ btrfs_set_node_key(eb, &bad_key, slot);
+ }
+ btrfs_mark_buffer_dirty(eb);
+ if (!trans) {
+ u16 csum_size =
+ btrfs_super_csum_size(root->fs_info->super_copy);
+ csum_tree_block_size(eb, csum_size, 0);
+ write_extent_to_disk(eb);
+ }
+}
+
+
+static int corrupt_keys_in_block(struct btrfs_root *root, u64 bytenr)
+{
+ struct extent_buffer *eb;
+
+ eb = read_tree_block(root, bytenr, root->leafsize, 0);
+ if (!extent_buffer_uptodate(eb))
+ return -EIO;;
+
+ corrupt_keys(NULL, root, eb);
+ free_extent_buffer(eb);
+ return 0;
+}
+
+static int corrupt_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr, u64 copy)
+{
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ u32 item_size;
+ unsigned long ptr;
+ struct btrfs_path *path;
+ int ret;
+ int slot;
+ int should_del = rand() % 3;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = bytenr;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ while(1) {
+ ret = btrfs_search_slot(trans, root->fs_info->extent_root,
+ &key, path, -1, 1);
+ if (ret < 0)
+ break;
+
+ if (ret > 0) {
+ if (path->slots[0] == 0)
+ break;
+ path->slots[0]--;
+ ret = 0;
+ }
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid != bytenr)
+ break;
+
+ if (key.type != BTRFS_EXTENT_ITEM_KEY &&
+ key.type != BTRFS_TREE_BLOCK_REF_KEY &&
+ key.type != BTRFS_EXTENT_DATA_REF_KEY &&
+ key.type != BTRFS_EXTENT_REF_V0_KEY &&
+ key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
+ key.type != BTRFS_SHARED_DATA_REF_KEY)
+ goto next;
+
+ if (should_del) {
+ fprintf(stderr,
+ "deleting extent record: key %llu %u %llu\n",
+ key.objectid, key.type, key.offset);
+
+ if (key.type == BTRFS_EXTENT_ITEM_KEY) {
+ /* make sure this extent doesn't get
+ * reused for other purposes */
+ btrfs_pin_extent(root->fs_info,
+ key.objectid, key.offset);
+ }
+
+ btrfs_del_item(trans, root, path);
+ } else {
+ fprintf(stderr,
+ "corrupting extent record: key %llu %u %llu\n",
+ key.objectid, key.type, key.offset);
+ ptr = btrfs_item_ptr_offset(leaf, slot);
+ item_size = btrfs_item_size_nr(leaf, slot);
+ memset_extent_buffer(leaf, 0, ptr, item_size);
+ btrfs_mark_buffer_dirty(leaf);
+ }
+next:
+ btrfs_release_path(path);
+
+ if (key.offset > 0)
+ key.offset--;
+ if (key.offset == 0)
+ break;
+ }
+
+ btrfs_free_path(path);
+ return 0;
+}
+
+static void btrfs_corrupt_extent_leaf(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *eb)
+{
+ u32 nr = btrfs_header_nritems(eb);
+ u32 victim = rand() % nr;
+ u64 objectid;
+ struct btrfs_key key;
+
+ btrfs_item_key_to_cpu(eb, &key, victim);
+ objectid = key.objectid;
+ corrupt_extent(trans, root, objectid, 1);
+}
+
+static void btrfs_corrupt_extent_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *eb)
+{
+ int i;
+
+ if (!eb)
+ return;
+
+ if (btrfs_is_leaf(eb)) {
+ btrfs_corrupt_extent_leaf(trans, root, eb);
+ return;
+ }
+
+ if (btrfs_header_level(eb) == 1 && eb != root->node) {
+ if (rand() % 5)
+ return;
+ }
+
+ for (i = 0; i < btrfs_header_nritems(eb); i++) {
+ struct extent_buffer *next;
+
+ next = read_tree_block(root, btrfs_node_blockptr(eb, i),
+ root->leafsize,
+ btrfs_node_ptr_generation(eb, i));
+ if (!extent_buffer_uptodate(next))
+ continue;
+ btrfs_corrupt_extent_tree(trans, root, next);
+ free_extent_buffer(next);
+ }
+}
+
+enum btrfs_inode_field {
+ BTRFS_INODE_FIELD_ISIZE,
+ BTRFS_INODE_FIELD_NBYTES,
+ BTRFS_INODE_FIELD_BAD,
+};
+
+enum btrfs_file_extent_field {
+ BTRFS_FILE_EXTENT_DISK_BYTENR,
+ BTRFS_FILE_EXTENT_BAD,
+};
+
+enum btrfs_dir_item_field {
+ BTRFS_DIR_ITEM_NAME,
+ BTRFS_DIR_ITEM_LOCATION_OBJECTID,
+ BTRFS_DIR_ITEM_BAD,
+};
+
+enum btrfs_metadata_block_field {
+ BTRFS_METADATA_BLOCK_GENERATION,
+ BTRFS_METADATA_BLOCK_SHIFT_ITEMS,
+ BTRFS_METADATA_BLOCK_BAD,
+};
+
+enum btrfs_item_field {
+ BTRFS_ITEM_OFFSET,
+ BTRFS_ITEM_BAD,
+};
+
+enum btrfs_key_field {
+ BTRFS_KEY_OBJECTID,
+ BTRFS_KEY_TYPE,
+ BTRFS_KEY_OFFSET,
+ BTRFS_KEY_BAD,
+};
+
+static enum btrfs_inode_field convert_inode_field(char *field)
+{
+ if (!strncmp(field, "isize", FIELD_BUF_LEN))
+ return BTRFS_INODE_FIELD_ISIZE;
+ if (!strncmp(field, "nbytes", FIELD_BUF_LEN))
+ return BTRFS_INODE_FIELD_NBYTES;
+ return BTRFS_INODE_FIELD_BAD;
+}
+
+static enum btrfs_file_extent_field convert_file_extent_field(char *field)
+{
+ if (!strncmp(field, "disk_bytenr", FIELD_BUF_LEN))
+ return BTRFS_FILE_EXTENT_DISK_BYTENR;
+ return BTRFS_FILE_EXTENT_BAD;
+}
+
+static enum btrfs_metadata_block_field
+convert_metadata_block_field(char *field)
+{
+ if (!strncmp(field, "generation", FIELD_BUF_LEN))
+ return BTRFS_METADATA_BLOCK_GENERATION;
+ if (!strncmp(field, "shift_items", FIELD_BUF_LEN))
+ return BTRFS_METADATA_BLOCK_SHIFT_ITEMS;
+ return BTRFS_METADATA_BLOCK_BAD;
+}
+
+static enum btrfs_key_field convert_key_field(char *field)
+{
+ if (!strncmp(field, "objectid", FIELD_BUF_LEN))
+ return BTRFS_KEY_OBJECTID;
+ if (!strncmp(field, "type", FIELD_BUF_LEN))
+ return BTRFS_KEY_TYPE;
+ if (!strncmp(field, "offset", FIELD_BUF_LEN))
+ return BTRFS_KEY_OFFSET;
+ return BTRFS_KEY_BAD;
+}
+
+static enum btrfs_item_field convert_item_field(char *field)
+{
+ if (!strncmp(field, "offset", FIELD_BUF_LEN))
+ return BTRFS_ITEM_OFFSET;
+ return BTRFS_ITEM_BAD;
+}
+
+static enum btrfs_dir_item_field convert_dir_item_field(char *field)
+{
+ if (!strncmp(field, "name", FIELD_BUF_LEN))
+ return BTRFS_DIR_ITEM_NAME;
+ if (!strncmp(field, "location_objectid", FIELD_BUF_LEN))
+ return BTRFS_DIR_ITEM_LOCATION_OBJECTID;
+ return BTRFS_DIR_ITEM_BAD;
+}
+
+static u64 generate_u64(u64 orig)
+{
+ u64 ret;
+ do {
+ ret = rand();
+ } while (ret == orig);
+ return ret;
+}
+
+static u32 generate_u32(u32 orig)
+{
+ u32 ret;
+ do {
+ ret = rand();
+ } while (ret == orig);
+ return ret;
+}
+
+static u8 generate_u8(u8 orig)
+{
+ u8 ret;
+ do {
+ ret = rand();
+ } while (ret == orig);
+ return ret;
+}
+
+static int corrupt_key(struct btrfs_root *root, struct btrfs_key *key,
+ char *field)
+{
+ enum btrfs_key_field corrupt_field = convert_key_field(field);
+ struct btrfs_path *path;
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ root = root->fs_info->fs_root;
+ if (corrupt_field == BTRFS_KEY_BAD) {
+ fprintf(stderr, "Invalid field %s\n", field);
+ return -EINVAL;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
+ ret = btrfs_search_slot(trans, root, key, path, 0, 1);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ fprintf(stderr, "Couldn't find the key to corrupt\n");
+ ret = -ENOENT;
+ goto out;
+ }
+
+ switch (corrupt_field) {
+ case BTRFS_KEY_OBJECTID:
+ key->objectid = generate_u64(key->objectid);
+ break;
+ case BTRFS_KEY_TYPE:
+ key->type = generate_u8(key->type);
+ break;
+ case BTRFS_KEY_OFFSET:
+ key->offset = generate_u64(key->objectid);
+ break;
+ default:
+ fprintf(stderr, "Invalid field %s, %d\n", field,
+ corrupt_field);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ btrfs_set_item_key_unsafe(root, path, key);
+out:
+ btrfs_free_path(path);
+ btrfs_commit_transaction(trans, root);
+ return ret;
+}
+
+static int corrupt_dir_item(struct btrfs_root *root, struct btrfs_key *key,
+ char *field)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_dir_item *di;
+ struct btrfs_path *path;
+ char name[PATH_MAX];
+ struct btrfs_key location;
+ struct btrfs_disk_key disk_key;
+ unsigned long name_ptr;
+ enum btrfs_dir_item_field corrupt_field =
+ convert_dir_item_field(field);
+ u64 bogus;
+ u16 name_len;
+ int ret;
+
+ if (corrupt_field == BTRFS_DIR_ITEM_BAD) {
+ fprintf(stderr, "Invalid field %s\n", field);
+ return -EINVAL;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
+ ret = btrfs_search_slot(trans, root, key, path, 0, 1);
+ if (ret) {
+ if (ret > 0)
+ ret = -ENOENT;
+ fprintf(stderr, "Error searching for dir item %d\n", ret);
+ goto out;
+ }
+
+ di = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_dir_item);
+
+ switch (corrupt_field) {
+ case BTRFS_DIR_ITEM_NAME:
+ name_len = btrfs_dir_name_len(path->nodes[0], di);
+ name_ptr = (unsigned long)(di + 1);
+ read_extent_buffer(path->nodes[0], name, name_ptr, name_len);
+ name[0]++;
+ write_extent_buffer(path->nodes[0], name, name_ptr, name_len);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ goto out;
+ case BTRFS_DIR_ITEM_LOCATION_OBJECTID:
+ btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
+ bogus = generate_u64(location.objectid);
+ location.objectid = bogus;
+ btrfs_cpu_key_to_disk(&disk_key, &location);
+ btrfs_set_dir_item_key(path->nodes[0], di, &disk_key);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ goto out;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+out:
+ btrfs_commit_transaction(trans, root);
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int corrupt_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 inode, char *field)
+{
+ struct btrfs_inode_item *ei;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ enum btrfs_inode_field corrupt_field = convert_inode_field(field);
+ u64 bogus;
+ u64 orig;
+ int ret;
+
+ if (corrupt_field == BTRFS_INODE_FIELD_BAD) {
+ fprintf(stderr, "Invalid field %s\n", field);
+ return -EINVAL;
+ }
+
+ key.objectid = inode;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ if (!path->slots[0]) {
+ fprintf(stderr, "Couldn't find inode %Lu\n", inode);
+ ret = -ENOENT;
+ goto out;
+ }
+ path->slots[0]--;
+ ret = 0;
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.objectid != inode) {
+ fprintf(stderr, "Couldn't find inode %Lu\n", inode);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ switch (corrupt_field) {
+ case BTRFS_INODE_FIELD_ISIZE:
+ orig = btrfs_inode_size(path->nodes[0], ei);
+ bogus = generate_u64(orig);
+ btrfs_set_inode_size(path->nodes[0], ei, bogus);
+ break;
+ case BTRFS_INODE_FIELD_NBYTES:
+ orig = btrfs_inode_nbytes(path->nodes[0], ei);
+ bogus = generate_u64(orig);
+ btrfs_set_inode_nbytes(path->nodes[0], ei, bogus);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int corrupt_file_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 inode, u64 extent,
+ char *field)
+{
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ enum btrfs_file_extent_field corrupt_field;
+ u64 bogus;
+ u64 orig;
+ int ret = 0;
+
+ corrupt_field = convert_file_extent_field(field);
+ if (corrupt_field == BTRFS_FILE_EXTENT_BAD) {
+ fprintf(stderr, "Invalid field %s\n", field);
+ return -EINVAL;
+ }
+
+ key.objectid = inode;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = extent;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ fprintf(stderr, "Couldn't find extent %llu for inode %llu\n",
+ extent, inode);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+ switch (corrupt_field) {
+ case BTRFS_FILE_EXTENT_DISK_BYTENR:
+ orig = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
+ bogus = generate_u64(orig);
+ btrfs_set_file_extent_disk_bytenr(path->nodes[0], fi, bogus);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static void shift_items(struct btrfs_root *root, struct extent_buffer *eb)
+{
+ int nritems = btrfs_header_nritems(eb);
+ int shift_space = btrfs_leaf_free_space(root, eb) / 2;
+ int slot = nritems / 2;
+ int i = 0;
+ unsigned int data_end = btrfs_item_offset_nr(eb, nritems - 1);
+
+ /* Shift the item data up to and including slot back by shift space */
+ memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end - shift_space,
+ btrfs_leaf_data(eb) + data_end,
+ btrfs_item_offset_nr(eb, slot - 1) - data_end);
+
+ /* Now update the item pointers. */
+ for (i = nritems - 1; i >= slot; i--) {
+ u32 offset = btrfs_item_offset_nr(eb, i);
+ offset -= shift_space;
+ btrfs_set_item_offset(eb, btrfs_item_nr(i), offset);
+ }
+}
+
+static int corrupt_metadata_block(struct btrfs_root *root, u64 block,
+ char *field)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ struct extent_buffer *eb;
+ struct btrfs_key key, root_key;
+ enum btrfs_metadata_block_field corrupt_field;
+ u64 root_objectid;
+ u64 orig, bogus;
+ u8 level;
+ int ret;
+
+ corrupt_field = convert_metadata_block_field(field);
+ if (corrupt_field == BTRFS_METADATA_BLOCK_BAD) {
+ fprintf(stderr, "Invalid field %s\n", field);
+ return -EINVAL;
+ }
+
+ eb = read_tree_block(root, block, root->leafsize, 0);
+ if (!extent_buffer_uptodate(eb)) {
+ fprintf(stderr, "Couldn't read in tree block %s\n", field);
+ return -EINVAL;
+ }
+ root_objectid = btrfs_header_owner(eb);
+ level = btrfs_header_level(eb);
+ if (level)
+ btrfs_node_key_to_cpu(eb, &key, 0);
+ else
+ btrfs_item_key_to_cpu(eb, &key, 0);
+ free_extent_buffer(eb);
+
+ root_key.objectid = root_objectid;
+ root_key.type = BTRFS_ROOT_ITEM_KEY;
+ root_key.offset = (u64)-1;
+
+ root = btrfs_read_fs_root(root->fs_info, &root_key);
+ if (IS_ERR(root)) {
+ fprintf(stderr, "Couldn't finde owner root %llu\n",
+ key.objectid);
+ return PTR_ERR(root);
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ fprintf(stderr, "Couldn't start transaction %ld\n",
+ PTR_ERR(trans));
+ return PTR_ERR(trans);
+ }
+
+ path->lowest_level = level;
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret < 0) {
+ fprintf(stderr, "Error searching to node %d\n", ret);
+ goto out;
+ }
+ eb = path->nodes[level];
+
+ ret = 0;
+ switch (corrupt_field) {
+ case BTRFS_METADATA_BLOCK_GENERATION:
+ orig = btrfs_header_generation(eb);
+ bogus = generate_u64(orig);
+ btrfs_set_header_generation(eb, bogus);
+ break;
+ case BTRFS_METADATA_BLOCK_SHIFT_ITEMS:
+ shift_items(root, path->nodes[level]);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ btrfs_mark_buffer_dirty(path->nodes[level]);
+out:
+ btrfs_commit_transaction(trans, root);
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int corrupt_btrfs_item(struct btrfs_root *root, struct btrfs_key *key,
+ char *field)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ enum btrfs_item_field corrupt_field;
+ u32 orig, bogus;
+ int ret;
+
+ corrupt_field = convert_item_field(field);
+ if (corrupt_field == BTRFS_ITEM_BAD) {
+ fprintf(stderr, "Invalid field %s\n", field);
+ return -EINVAL;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ fprintf(stderr, "Couldn't start transaction %ld\n",
+ PTR_ERR(trans));
+ return PTR_ERR(trans);
+ }
+
+ ret = btrfs_search_slot(trans, root, key, path, 0, 1);
+ if (ret != 0) {
+ fprintf(stderr, "Error searching to node %d\n", ret);
+ goto out;
+ }
+
+ ret = 0;
+ switch (corrupt_field) {
+ case BTRFS_ITEM_OFFSET:
+ orig = btrfs_item_offset_nr(path->nodes[0], path->slots[0]);
+ bogus = generate_u32(orig);
+ btrfs_set_item_offset(path->nodes[0],
+ btrfs_item_nr(path->slots[0]), bogus);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+out:
+ btrfs_commit_transaction(trans, root);
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int delete_item(struct btrfs_root *root, struct btrfs_key *key)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ fprintf(stderr, "Couldn't start transaction %ld\n",
+ PTR_ERR(trans));
+ return PTR_ERR(trans);
+ }
+
+ ret = btrfs_search_slot(trans, root, key, path, -1, 1);
+ if (ret) {
+ if (ret > 0)
+ ret = -ENOENT;
+ fprintf(stderr, "Error searching to node %d\n", ret);
+ goto out;
+ }
+ ret = btrfs_del_item(trans, root, path);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+out:
+ btrfs_commit_transaction(trans, root);
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int delete_csum(struct btrfs_root *root, u64 bytenr, u64 bytes)
+{
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ root = root->fs_info->csum_root;
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ fprintf(stderr, "Couldn't start transaction %ld\n",
+ PTR_ERR(trans));
+ return PTR_ERR(trans);
+ }
+
+ ret = btrfs_del_csums(trans, root, bytenr, bytes);
+ if (ret)
+ fprintf(stderr, "Error deleting csums %d\n", ret);
+ btrfs_commit_transaction(trans, root);
+ return ret;
+}
+
+/* corrupt item using NO cow.
+ * Because chunk recover will recover based on whole partition scaning,
+ * If using COW, chunk recover will use the old item to recover,
+ * which is still OK but we want to check the ability to rebuild chunk
+ * not only restore the old ones */
+static int corrupt_item_nocow(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ int del)
+{
+ int ret = 0;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ unsigned long ptr;
+ int slot;
+ u32 item_size;
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ /* Not deleting the first item of a leaf to keep leaf structure */
+ if (slot == 0)
+ del = 0;
+ /* Only accept valid eb */
+ BUG_ON(!leaf->data || slot >= btrfs_header_nritems(leaf));
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (del) {
+ fprintf(stdout, "Deleting key and data [%llu, %u, %llu].\n",
+ key.objectid, key.type, key.offset);
+ btrfs_del_item(trans, root, path);
+ } else {
+ fprintf(stdout, "Corrupting key and data [%llu, %u, %llu].\n",
+ key.objectid, key.type, key.offset);
+ ptr = btrfs_item_ptr_offset(leaf, slot);
+ item_size = btrfs_item_size_nr(leaf, slot);
+ memset_extent_buffer(leaf, 0, ptr, item_size);
+ btrfs_mark_buffer_dirty(leaf);
+ }
+ return ret;
+}
+static int corrupt_chunk_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ int ret;
+ int del;
+ int slot;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = (u64)-1;
+ key.offset = (u64)-1;
+ key.type = (u8)-1;
+
+ /* Here, cow and ins_len must equals 0 for the following reasons:
+ * 1) chunk recover is based on disk scanning, so COW should be
+ * disabled in case the original chunk being scanned and
+ * recovered using the old chunk.
+ * 2) if cow = 0, ins_len must also be set to 0, or BUG_ON will be
+ * triggered.
+ */
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+ BUG_ON(ret == 0);
+ if (ret < 0) {
+ fprintf(stderr, "Error searching tree\n");
+ goto free_out;
+ }
+ /* corrupt/del dev_item first */
+ while (!btrfs_previous_item(root, path, 0, BTRFS_DEV_ITEM_KEY)) {
+ slot = path->slots[0];
+ leaf = path->nodes[0];
+ del = rand() % 3;
+ /* Never delete the first item to keep the leaf structure */
+ if (path->slots[0] == 0)
+ del = 0;
+ ret = corrupt_item_nocow(trans, root, path, del);
+ if (ret)
+ goto free_out;
+ }
+ btrfs_release_path(path);
+
+ /* Here, cow and ins_len must equals 0 for the following reasons:
+ * 1) chunk recover is based on disk scanning, so COW should be
+ * disabled in case the original chunk being scanned and
+ * recovered using the old chunk.
+ * 2) if cow = 0, ins_len must also be set to 0, or BUG_ON will be
+ * triggered.
+ */
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+ BUG_ON(ret == 0);
+ if (ret < 0) {
+ fprintf(stderr, "Error searching tree\n");
+ goto free_out;
+ }
+ /* corrupt/del chunk then*/
+ while (!btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY)) {
+ slot = path->slots[0];
+ leaf = path->nodes[0];
+ del = rand() % 3;
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+ ret = corrupt_item_nocow(trans, root, path, del);
+ if (ret)
+ goto free_out;
+ }
+free_out:
+ btrfs_free_path(path);
+ return ret;
+}
+static int find_chunk_offset(struct btrfs_root *root,
+ struct btrfs_path *path, u64 offset)
+{
+ struct btrfs_key key;
+ int ret;
+
+ key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+ key.offset = offset;
+
+ /* Here, cow and ins_len must equals 0 for following reasons:
+ * 1) chunk recover is based on disk scanning, so COW should
+ * be disabled in case the original chunk being scanned
+ * and recovered using the old chunk.
+ * 2) if cow = 0, ins_len must also be set to 0, or BUG_ON
+ * will be triggered.
+ */
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret > 0) {
+ fprintf(stderr, "Can't find chunk with given offset %llu\n",
+ offset);
+ goto out;
+ }
+ if (ret < 0) {
+ fprintf(stderr, "Error searching chunk\n");
+ goto out;
+ }
+out:
+ return ret;
+
+}
+int main(int ac, char **av)
+{
+ struct cache_tree root_cache;
+ struct btrfs_key key;
+ struct btrfs_root *root;
+ struct extent_buffer *eb;
+ char *dev;
+ /* chunk offset can be 0,so change to (u64)-1 */
+ u64 logical = (u64)-1;
+ int ret = 0;
+ u64 copy = 0;
+ u64 bytes = 4096;
+ int extent_rec = 0;
+ int extent_tree = 0;
+ int corrupt_block_keys = 0;
+ int chunk_rec = 0;
+ int chunk_tree = 0;
+ int corrupt_item = 0;
+ int corrupt_di = 0;
+ int delete = 0;
+ u64 metadata_block = 0;
+ u64 inode = 0;
+ u64 file_extent = (u64)-1;
+ u64 root_objectid = 0;
+ u64 csum_bytenr = 0;
+ char field[FIELD_BUF_LEN];
+
+ field[0] = '\0';
+ srand(128);
+ memset(&key, 0, sizeof(key));
+
+ while(1) {
+ int c;
+ static const struct option long_options[] = {
+ /* { "byte-count", 1, NULL, 'b' }, */
+ { "logical", required_argument, NULL, 'l' },
+ { "copy", required_argument, NULL, 'c' },
+ { "bytes", required_argument, NULL, 'b' },
+ { "extent-record", no_argument, NULL, 'e' },
+ { "extent-tree", no_argument, NULL, 'E' },
+ { "keys", no_argument, NULL, 'k' },
+ { "chunk-record", no_argument, NULL, 'u' },
+ { "chunk-tree", no_argument, NULL, 'U' },
+ { "inode", required_argument, NULL, 'i'},
+ { "file-extent", required_argument, NULL, 'x'},
+ { "metadata-block", required_argument, NULL, 'm'},
+ { "field", required_argument, NULL, 'f'},
+ { "key", required_argument, NULL, 'K'},
+ { "item", no_argument, NULL, 'I'},
+ { "dir-item", no_argument, NULL, 'D'},
+ { "delete", no_argument, NULL, 'd'},
+ { "root", no_argument, NULL, 'r'},
+ { "csum", required_argument, NULL, 'C'},
+ { "help", no_argument, NULL, GETOPT_VAL_HELP},
+ { NULL, 0, NULL, 0 }
+ };
+
+ c = getopt_long(ac, av, "l:c:b:eEkuUi:f:x:m:K:IDdr:C:",
+ long_options, NULL);
+ if (c < 0)
+ break;
+ switch(c) {
+ case 'l':
+ logical = arg_strtou64(optarg);
+ break;
+ case 'c':
+ copy = arg_strtou64(optarg);
+ break;
+ case 'b':
+ bytes = arg_strtou64(optarg);
+ break;
+ case 'e':
+ extent_rec = 1;
+ break;
+ case 'E':
+ extent_tree = 1;
+ break;
+ case 'k':
+ corrupt_block_keys = 1;
+ break;
+ case 'u':
+ chunk_rec = 1;
+ break;
+ case 'U':
+ chunk_tree = 1;
+ break;
+ case 'i':
+ inode = arg_strtou64(optarg);
+ break;
+ case 'f':
+ strncpy(field, optarg, FIELD_BUF_LEN);
+ break;
+ case 'x':
+ file_extent = arg_strtou64(optarg);
+ break;
+ case 'm':
+ metadata_block = arg_strtou64(optarg);
+ break;
+ case 'K':
+ ret = sscanf(optarg, "%llu,%u,%llu",
+ &key.objectid,
+ (unsigned int *)&key.type,
+ &key.offset);
+ if (ret != 3) {
+ fprintf(stderr, "error reading key "
+ "%d\n", errno);
+ print_usage(1);
+ }
+ break;
+ case 'D':
+ corrupt_di = 1;
+ break;
+ case 'I':
+ corrupt_item = 1;
+ break;
+ case 'd':
+ delete = 1;
+ break;
+ case 'r':
+ root_objectid = arg_strtou64(optarg);
+ break;
+ case 'C':
+ csum_bytenr = arg_strtou64(optarg);
+ break;
+ case GETOPT_VAL_HELP:
+ default:
+ print_usage(c != GETOPT_VAL_HELP);
+ }
+ }
+ set_argv0(av);
+ ac = ac - optind;
+ if (check_argc_min(ac, 1))
+ print_usage(1);
+ dev = av[optind];
+
+ radix_tree_init();
+ cache_tree_init(&root_cache);
+
+ root = open_ctree(dev, 0, OPEN_CTREE_WRITES);
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ exit(1);
+ }
+ if (extent_rec) {
+ struct btrfs_trans_handle *trans;
+
+ if (logical == (u64)-1)
+ print_usage(1);
+ trans = btrfs_start_transaction(root, 1);
+ ret = corrupt_extent (trans, root, logical, 0);
+ btrfs_commit_transaction(trans, root);
+ goto out_close;
+ }
+ if (extent_tree) {
+ struct btrfs_trans_handle *trans;
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_corrupt_extent_tree(trans, root->fs_info->extent_root,
+ root->fs_info->extent_root->node);
+ btrfs_commit_transaction(trans, root);
+ goto out_close;
+ }
+ if (chunk_rec) {
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ int del;
+
+ if (logical == (u64)-1)
+ print_usage(1);
+ del = rand() % 3;
+ path = btrfs_alloc_path();
+ if (!path) {
+ fprintf(stderr, "path allocation failed\n");
+ goto out_close;
+ }
+
+ if (find_chunk_offset(root->fs_info->chunk_root, path,
+ logical) != 0) {
+ btrfs_free_path(path);
+ goto out_close;
+ }
+ trans = btrfs_start_transaction(root, 1);
+ ret = corrupt_item_nocow(trans, root->fs_info->chunk_root,
+ path, del);
+ if (ret < 0)
+ fprintf(stderr, "Failed to corrupt chunk record\n");
+ btrfs_commit_transaction(trans, root);
+ goto out_close;
+ }
+ if (chunk_tree) {
+ struct btrfs_trans_handle *trans;
+ trans = btrfs_start_transaction(root, 1);
+ ret = corrupt_chunk_tree(trans, root->fs_info->chunk_root);
+ if (ret < 0)
+ fprintf(stderr, "Failed to corrupt chunk tree\n");
+ btrfs_commit_transaction(trans, root);
+ goto out_close;
+ }
+ if (inode) {
+ struct btrfs_trans_handle *trans;
+
+ if (*field == 0)
+ print_usage(1);
+
+ trans = btrfs_start_transaction(root, 1);
+ if (file_extent == (u64)-1) {
+ printf("corrupting inode\n");
+ ret = corrupt_inode(trans, root, inode, field);
+ } else {
+ printf("corrupting file extent\n");
+ ret = corrupt_file_extent(trans, root, inode,
+ file_extent, field);
+ }
+ btrfs_commit_transaction(trans, root);
+ goto out_close;
+ }
+ if (metadata_block) {
+ if (*field == 0)
+ print_usage(1);
+ ret = corrupt_metadata_block(root, metadata_block, field);
+ goto out_close;
+ }
+ if (corrupt_di) {
+ if (!key.objectid || *field == 0)
+ print_usage(1);
+ ret = corrupt_dir_item(root, &key, field);
+ goto out_close;
+ }
+ if (csum_bytenr) {
+ ret = delete_csum(root, csum_bytenr, bytes);
+ goto out_close;
+ }
+ if (corrupt_item) {
+ if (!key.objectid)
+ print_usage(1);
+ ret = corrupt_btrfs_item(root, &key, field);
+ }
+ if (delete) {
+ struct btrfs_root *target = root;
+
+ if (!key.objectid)
+ print_usage(1);
+ if (root_objectid) {
+ struct btrfs_key root_key;
+
+ root_key.objectid = root_objectid;
+ root_key.type = BTRFS_ROOT_ITEM_KEY;
+ root_key.offset = (u64)-1;
+
+ target = btrfs_read_fs_root(root->fs_info, &root_key);
+ if (IS_ERR(target)) {
+ fprintf(stderr, "Couldn't find root %llu\n",
+ (unsigned long long)root_objectid);
+ print_usage(1);
+ }
+ }
+ ret = delete_item(target, &key);
+ goto out_close;
+ }
+ if (key.objectid || key.offset || key.type) {
+ if (*field == 0)
+ print_usage(1);
+ ret = corrupt_key(root, &key, field);
+ goto out_close;
+ }
+ /*
+ * If we made it here and we have extent set then we didn't specify
+ * inode and we're screwed.
+ */
+ if (file_extent != (u64)-1)
+ print_usage(1);
+
+ if (logical == (u64)-1)
+ print_usage(1);
+
+ if (bytes == 0)
+ bytes = root->sectorsize;
+
+ bytes = (bytes + root->sectorsize - 1) / root->sectorsize;
+ bytes *= root->sectorsize;
+
+ while (bytes > 0) {
+ if (corrupt_block_keys) {
+ corrupt_keys_in_block(root, logical);
+ } else {
+ eb = debug_corrupt_block(root, logical,
+ root->sectorsize, copy);
+ free_extent_buffer(eb);
+ }
+ logical += root->sectorsize;
+ bytes -= root->sectorsize;
+ }
+ return ret;
+out_close:
+ close_ctree(root);
+ return ret;
+}
diff --git a/btrfs-crc.c b/btrfs-crc.c
new file mode 100644
index 00000000..723e0b7a
--- /dev/null
+++ b/btrfs-crc.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2013 STRATO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "crc32c.h"
+#include "utils.h"
+
+void usage(void)
+{
+ printf("usage: btrfs-crc filename\n");
+ printf(" print out the btrfs crc for \"filename\"\n");
+ printf("usage: btrfs-crc filename -c crc [-s seed] [-l length]\n");
+ printf(" brute force search for file names with the given crc\n");
+ printf(" -s seed the random seed (default: random)\n");
+ printf(" -l length the length of the file names (default: 10)\n");
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ int c;
+ unsigned long checksum = 0;
+ char *str;
+ char *buf;
+ int length = 10;
+ int seed = getpid() ^ getppid();
+ int loop = 0;
+ int i;
+
+ while ((c = getopt(argc, argv, "l:c:s:h")) != -1) {
+ switch (c) {
+ case 'l':
+ length = atol(optarg);
+ break;
+ case 'c':
+ sscanf(optarg, "%li", &checksum);
+ loop = 1;
+ break;
+ case 's':
+ seed = atol(optarg);
+ break;
+ case 'h':
+ usage();
+ case '?':
+ return 255;
+ }
+ }
+
+ set_argv0(argv);
+ str = argv[optind];
+
+ if (!loop) {
+ if (check_argc_min(argc - optind, 1))
+ return 255;
+
+ printf("%12u - %s\n", crc32c(~1, str, strlen(str)), str);
+ return 0;
+ }
+
+ buf = malloc(length);
+ if (!buf)
+ return -ENOMEM;
+ srand(seed);
+
+ while (1) {
+ for (i = 0; i < length; i++)
+ buf[i] = rand() % 94 + 33;
+ if (crc32c(~1, buf, length) == checksum)
+ printf("%12lu - %.*s\n", checksum, length, buf);
+ }
+
+ return 0;
+}
diff --git a/btrfs-debug-tree.c b/btrfs-debug-tree.c
new file mode 100644
index 00000000..266176f3
--- /dev/null
+++ b/btrfs-debug-tree.c
@@ -0,0 +1,462 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <uuid/uuid.h>
+#include <getopt.h>
+
+#include "kerncompat.h"
+#include "radix-tree.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "volumes.h"
+#include "utils.h"
+
+static int print_usage(int ret)
+{
+ fprintf(stderr, "usage: btrfs-debug-tree [-e] [-d] [-r] [-R] [-u]\n");
+ fprintf(stderr, " [-b block_num ] device\n");
+ fprintf(stderr, "\t-e : print detailed extents info\n");
+ fprintf(stderr, "\t-d : print info of btrfs device and root tree dirs"
+ " only\n");
+ fprintf(stderr, "\t-r : print info of roots only\n");
+ fprintf(stderr, "\t-R : print info of roots and root backups\n");
+ fprintf(stderr, "\t-u : print info of uuid tree only\n");
+ fprintf(stderr, "\t-b block_num : print info of the specified block"
+ " only\n");
+ fprintf(stderr,
+ "\t-t tree_id : print only the tree with the given id\n");
+ fprintf(stderr, "%s\n", PACKAGE_STRING);
+ exit(ret);
+}
+
+static void print_extents(struct btrfs_root *root, struct extent_buffer *eb)
+{
+ int i;
+ u32 nr;
+ u32 size;
+
+ if (!eb)
+ return;
+
+ if (btrfs_is_leaf(eb)) {
+ btrfs_print_leaf(root, eb);
+ return;
+ }
+
+ size = btrfs_level_size(root, btrfs_header_level(eb) - 1);
+ nr = btrfs_header_nritems(eb);
+ for (i = 0; i < nr; i++) {
+ struct extent_buffer *next = read_tree_block(root,
+ btrfs_node_blockptr(eb, i),
+ size,
+ btrfs_node_ptr_generation(eb, i));
+ if (!extent_buffer_uptodate(next))
+ continue;
+ if (btrfs_is_leaf(next) &&
+ btrfs_header_level(eb) != 1)
+ BUG();
+ if (btrfs_header_level(next) !=
+ btrfs_header_level(eb) - 1)
+ BUG();
+ print_extents(root, next);
+ free_extent_buffer(next);
+ }
+}
+
+static void print_old_roots(struct btrfs_super_block *super)
+{
+ struct btrfs_root_backup *backup;
+ int i;
+
+ for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
+ backup = super->super_roots + i;
+ printf("btrfs root backup slot %d\n", i);
+ printf("\ttree root gen %llu block %llu\n",
+ (unsigned long long)btrfs_backup_tree_root_gen(backup),
+ (unsigned long long)btrfs_backup_tree_root(backup));
+
+ printf("\t\textent root gen %llu block %llu\n",
+ (unsigned long long)btrfs_backup_extent_root_gen(backup),
+ (unsigned long long)btrfs_backup_extent_root(backup));
+
+ printf("\t\tchunk root gen %llu block %llu\n",
+ (unsigned long long)btrfs_backup_chunk_root_gen(backup),
+ (unsigned long long)btrfs_backup_chunk_root(backup));
+
+ printf("\t\tdevice root gen %llu block %llu\n",
+ (unsigned long long)btrfs_backup_dev_root_gen(backup),
+ (unsigned long long)btrfs_backup_dev_root(backup));
+
+ printf("\t\tcsum root gen %llu block %llu\n",
+ (unsigned long long)btrfs_backup_csum_root_gen(backup),
+ (unsigned long long)btrfs_backup_csum_root(backup));
+
+ printf("\t\tfs root gen %llu block %llu\n",
+ (unsigned long long)btrfs_backup_fs_root_gen(backup),
+ (unsigned long long)btrfs_backup_fs_root(backup));
+
+ printf("\t\t%llu used %llu total %llu devices\n",
+ (unsigned long long)btrfs_backup_bytes_used(backup),
+ (unsigned long long)btrfs_backup_total_bytes(backup),
+ (unsigned long long)btrfs_backup_num_devices(backup));
+ }
+}
+
+int main(int ac, char **av)
+{
+ struct btrfs_root *root;
+ struct btrfs_fs_info *info;
+ struct btrfs_path path;
+ struct btrfs_key key;
+ struct btrfs_root_item ri;
+ struct extent_buffer *leaf;
+ struct btrfs_disk_key disk_key;
+ struct btrfs_key found_key;
+ char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
+ int ret;
+ int slot;
+ int extent_only = 0;
+ int device_only = 0;
+ int uuid_tree_only = 0;
+ int roots_only = 0;
+ int root_backups = 0;
+ u64 block_only = 0;
+ struct btrfs_root *tree_root_scan;
+ u64 tree_id = 0;
+
+ radix_tree_init();
+
+ while(1) {
+ int c;
+ static const struct option long_options[] = {
+ { "help", no_argument, NULL, GETOPT_VAL_HELP},
+ { NULL, 0, NULL, 0 }
+ };
+
+ c = getopt_long(ac, av, "deb:rRut:", long_options, NULL);
+ if (c < 0)
+ break;
+ switch(c) {
+ case 'e':
+ extent_only = 1;
+ break;
+ case 'd':
+ device_only = 1;
+ break;
+ case 'r':
+ roots_only = 1;
+ break;
+ case 'u':
+ uuid_tree_only = 1;
+ break;
+ case 'R':
+ roots_only = 1;
+ root_backups = 1;
+ break;
+ case 'b':
+ block_only = arg_strtou64(optarg);
+ break;
+ case 't':
+ tree_id = arg_strtou64(optarg);
+ break;
+ case GETOPT_VAL_HELP:
+ default:
+ print_usage(c != GETOPT_VAL_HELP);
+ }
+ }
+ set_argv0(av);
+ ac = ac - optind;
+ if (check_argc_exact(ac, 1))
+ print_usage(1);
+
+ ret = check_arg_type(av[optind]);
+ if (ret != BTRFS_ARG_BLKDEV && ret != BTRFS_ARG_REG) {
+ fprintf(stderr, "'%s' is not a block device or regular file\n",
+ av[optind]);
+ exit(1);
+ }
+
+ info = open_ctree_fs_info(av[optind], 0, 0, OPEN_CTREE_PARTIAL);
+ if (!info) {
+ fprintf(stderr, "unable to open %s\n", av[optind]);
+ exit(1);
+ }
+
+ root = info->fs_root;
+ if (!root) {
+ fprintf(stderr, "unable to open %s\n", av[optind]);
+ exit(1);
+ }
+
+ if (block_only) {
+ leaf = read_tree_block(root,
+ block_only,
+ root->leafsize, 0);
+
+ if (extent_buffer_uptodate(leaf) &&
+ btrfs_header_level(leaf) != 0) {
+ free_extent_buffer(leaf);
+ leaf = NULL;
+ }
+
+ if (!leaf) {
+ leaf = read_tree_block(root,
+ block_only,
+ root->nodesize, 0);
+ }
+ if (!extent_buffer_uptodate(leaf)) {
+ fprintf(stderr, "failed to read %llu\n",
+ (unsigned long long)block_only);
+ goto close_root;
+ }
+ btrfs_print_tree(root, leaf, 0);
+ free_extent_buffer(leaf);
+ goto close_root;
+ }
+
+ if (!(extent_only || uuid_tree_only || tree_id)) {
+ if (roots_only) {
+ printf("root tree: %llu level %d\n",
+ (unsigned long long)info->tree_root->node->start,
+ btrfs_header_level(info->tree_root->node));
+ printf("chunk tree: %llu level %d\n",
+ (unsigned long long)info->chunk_root->node->start,
+ btrfs_header_level(info->chunk_root->node));
+ } else {
+ if (info->tree_root->node) {
+ printf("root tree\n");
+ btrfs_print_tree(info->tree_root,
+ info->tree_root->node, 1);
+ }
+
+ if (info->chunk_root->node) {
+ printf("chunk tree\n");
+ btrfs_print_tree(info->chunk_root,
+ info->chunk_root->node, 1);
+ }
+ }
+ }
+ tree_root_scan = info->tree_root;
+
+ btrfs_init_path(&path);
+again:
+ if (!extent_buffer_uptodate(tree_root_scan->node))
+ goto no_node;
+
+ /*
+ * Tree's that are not pointed by the tree of tree roots
+ */
+ if (tree_id && tree_id == BTRFS_ROOT_TREE_OBJECTID) {
+ if (!info->tree_root->node) {
+ error("cannot print root tree, invalid pointer");
+ goto no_node;
+ }
+ printf("root tree\n");
+ btrfs_print_tree(info->tree_root, info->tree_root->node, 1);
+ goto no_node;
+ }
+
+ if (tree_id && tree_id == BTRFS_CHUNK_TREE_OBJECTID) {
+ if (!info->chunk_root->node) {
+ error("cannot print chunk tree, invalid pointer");
+ goto no_node;
+ }
+ printf("chunk tree\n");
+ btrfs_print_tree(info->chunk_root, info->chunk_root->node, 1);
+ goto no_node;
+ }
+
+ key.offset = 0;
+ key.objectid = 0;
+ btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
+ ret = btrfs_search_slot(NULL, tree_root_scan, &key, &path, 0, 0);
+ BUG_ON(ret < 0);
+ while(1) {
+ leaf = path.nodes[0];
+ slot = path.slots[0];
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(tree_root_scan, &path);
+ if (ret != 0)
+ break;
+ leaf = path.nodes[0];
+ slot = path.slots[0];
+ }
+ btrfs_item_key(leaf, &disk_key, path.slots[0]);
+ btrfs_disk_key_to_cpu(&found_key, &disk_key);
+ if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
+ unsigned long offset;
+ struct extent_buffer *buf;
+ int skip = extent_only | device_only | uuid_tree_only;
+
+ offset = btrfs_item_ptr_offset(leaf, slot);
+ read_extent_buffer(leaf, &ri, offset, sizeof(ri));
+ buf = read_tree_block(tree_root_scan,
+ btrfs_root_bytenr(&ri),
+ btrfs_level_size(tree_root_scan,
+ btrfs_root_level(&ri)),
+ 0);
+ if (!extent_buffer_uptodate(buf))
+ goto next;
+ if (tree_id && found_key.objectid != tree_id) {
+ free_extent_buffer(buf);
+ goto next;
+ }
+
+ switch(found_key.objectid) {
+ case BTRFS_ROOT_TREE_OBJECTID:
+ if (!skip)
+ printf("root");
+ break;
+ case BTRFS_EXTENT_TREE_OBJECTID:
+ if (!device_only && !uuid_tree_only)
+ skip = 0;
+ if (!skip)
+ printf("extent");
+ break;
+ case BTRFS_CHUNK_TREE_OBJECTID:
+ if (!skip) {
+ printf("chunk");
+ }
+ break;
+ case BTRFS_DEV_TREE_OBJECTID:
+ if (!uuid_tree_only)
+ skip = 0;
+ if (!skip)
+ printf("device");
+ break;
+ case BTRFS_FS_TREE_OBJECTID:
+ if (!skip) {
+ printf("fs");
+ }
+ break;
+ case BTRFS_ROOT_TREE_DIR_OBJECTID:
+ skip = 0;
+ printf("directory");
+ break;
+ case BTRFS_CSUM_TREE_OBJECTID:
+ if (!skip) {
+ printf("checksum");
+ }
+ break;
+ case BTRFS_ORPHAN_OBJECTID:
+ if (!skip) {
+ printf("orphan");
+ }
+ break;
+ case BTRFS_TREE_LOG_OBJECTID:
+ if (!skip) {
+ printf("log");
+ }
+ break;
+ case BTRFS_TREE_LOG_FIXUP_OBJECTID:
+ if (!skip) {
+ printf("log fixup");
+ }
+ break;
+ case BTRFS_TREE_RELOC_OBJECTID:
+ if (!skip) {
+ printf("reloc");
+ }
+ break;
+ case BTRFS_DATA_RELOC_TREE_OBJECTID:
+ if (!skip) {
+ printf("data reloc");
+ }
+ break;
+ case BTRFS_EXTENT_CSUM_OBJECTID:
+ if (!skip) {
+ printf("extent checksum");
+ }
+ break;
+ case BTRFS_QUOTA_TREE_OBJECTID:
+ if (!skip) {
+ printf("quota");
+ }
+ break;
+ case BTRFS_UUID_TREE_OBJECTID:
+ if (!extent_only && !device_only)
+ skip = 0;
+ if (!skip)
+ printf("uuid");
+ break;
+ case BTRFS_FREE_SPACE_TREE_OBJECTID:
+ if (!skip)
+ printf("free space");
+ break;
+ case BTRFS_MULTIPLE_OBJECTIDS:
+ if (!skip) {
+ printf("multiple");
+ }
+ break;
+ default:
+ if (!skip) {
+ printf("file");
+ }
+ }
+ if (extent_only && !skip) {
+ print_extents(tree_root_scan, buf);
+ } else if (!skip) {
+ printf(" tree ");
+ btrfs_print_key(&disk_key);
+ if (roots_only) {
+ printf(" %llu level %d\n",
+ (unsigned long long)buf->start,
+ btrfs_header_level(buf));
+ } else {
+ printf(" \n");
+ btrfs_print_tree(tree_root_scan, buf, 1);
+ }
+ }
+ free_extent_buffer(buf);
+ }
+next:
+ path.slots[0]++;
+ }
+no_node:
+ btrfs_release_path(&path);
+
+ if (tree_root_scan == info->tree_root &&
+ info->log_root_tree) {
+ tree_root_scan = info->log_root_tree;
+ goto again;
+ }
+
+ if (extent_only || device_only || uuid_tree_only)
+ goto close_root;
+
+ if (root_backups)
+ print_old_roots(info->super_copy);
+
+ printf("total bytes %llu\n",
+ (unsigned long long)btrfs_super_total_bytes(info->super_copy));
+ printf("bytes used %llu\n",
+ (unsigned long long)btrfs_super_bytes_used(info->super_copy));
+ uuidbuf[BTRFS_UUID_UNPARSED_SIZE - 1] = '\0';
+ uuid_unparse(info->super_copy->fsid, uuidbuf);
+ printf("uuid %s\n", uuidbuf);
+ printf("%s\n", PACKAGE_STRING);
+close_root:
+ ret = close_ctree(root);
+ btrfs_close_all_devices();
+ return ret;
+}
diff --git a/btrfs-debugfs b/btrfs-debugfs
new file mode 100755
index 00000000..cf1d285c
--- /dev/null
+++ b/btrfs-debugfs
@@ -0,0 +1,296 @@
+#!/usr/bin/env python2
+#
+# Simple python program to print out all the extents of a single file
+# LGPLv2 license
+# Copyright Facebook 2014
+
+import sys,os,struct,fcntl,ctypes,stat
+
+# helpers for max ints
+maxu64 = (1L << 64) - 1
+maxu32 = (1L << 32) - 1
+
+# the inode (like form stat)
+BTRFS_INODE_ITEM_KEY = 1
+# backref to the directory
+BTRFS_INODE_REF_KEY = 12
+# backref to the directory v2
+BTRFS_INODE_EXTREF_KEY = 13
+# xattr items
+BTRFS_XATTR_ITEM_KEY = 24
+# orphans for list files
+BTRFS_ORPHAN_ITEM_KEY = 48
+# treelog items for dirs
+BTRFS_DIR_LOG_ITEM_KEY = 60
+BTRFS_DIR_LOG_INDEX_KEY = 72
+# dir items and dir indexes both hold filenames
+BTRFS_DIR_ITEM_KEY = 84
+BTRFS_DIR_INDEX_KEY = 96
+# these are the file extent pointers
+BTRFS_EXTENT_DATA_KEY = 108
+# csums
+BTRFS_EXTENT_CSUM_KEY = 128
+# root item for subvols and snapshots
+BTRFS_ROOT_ITEM_KEY = 132
+# root item backrefs
+BTRFS_ROOT_BACKREF_KEY = 144
+BTRFS_ROOT_REF_KEY = 156
+# each allocated extent has an extent item
+BTRFS_EXTENT_ITEM_KEY = 168
+# optimized extents for metadata only
+BTRFS_METADATA_ITEM_KEY = 169
+# backrefs for extents
+BTRFS_TREE_BLOCK_REF_KEY = 176
+BTRFS_EXTENT_DATA_REF_KEY = 178
+BTRFS_EXTENT_REF_V0_KEY = 180
+BTRFS_SHARED_BLOCK_REF_KEY = 182
+BTRFS_SHARED_DATA_REF_KEY = 184
+# one of these for each block group
+BTRFS_BLOCK_GROUP_ITEM_KEY = 192
+# dev extents records which part of each device is allocated
+BTRFS_DEV_EXTENT_KEY = 204
+# dev items describe devs
+BTRFS_DEV_ITEM_KEY = 216
+# one for each chunk
+BTRFS_CHUNK_ITEM_KEY = 228
+# qgroup info
+BTRFS_QGROUP_STATUS_KEY = 240
+BTRFS_QGROUP_INFO_KEY = 242
+BTRFS_QGROUP_LIMIT_KEY = 244
+BTRFS_QGROUP_RELATION_KEY = 246
+# records balance progress
+BTRFS_BALANCE_ITEM_KEY = 248
+# stats on device errors
+BTRFS_DEV_STATS_KEY = 249
+BTRFS_DEV_REPLACE_KEY = 250
+BTRFS_STRING_ITEM_KEY = 253
+
+# in the kernel sources, this is flattened
+# btrfs_ioctl_search_args_v2. It includes both the btrfs_ioctl_search_key
+# and the buffer. We're using a 64K buffer size.
+#
+args_buffer_size = 65536
+class btrfs_ioctl_search_args(ctypes.Structure):
+ _pack_ = 1
+ _fields_ = [ ("tree_id", ctypes.c_ulonglong),
+ ("min_objectid", ctypes.c_ulonglong),
+ ("max_objectid", ctypes.c_ulonglong),
+ ("min_offset", ctypes.c_ulonglong),
+ ("max_offset", ctypes.c_ulonglong),
+ ("min_transid", ctypes.c_ulonglong),
+ ("max_transid", ctypes.c_ulonglong),
+ ("min_type", ctypes.c_uint),
+ ("max_type", ctypes.c_uint),
+ ("nr_items", ctypes.c_uint),
+ ("unused", ctypes.c_uint),
+ ("unused1", ctypes.c_ulonglong),
+ ("unused2", ctypes.c_ulonglong),
+ ("unused3", ctypes.c_ulonglong),
+ ("unused4", ctypes.c_ulonglong),
+ ("buf_size", ctypes.c_ulonglong),
+ ("buf", ctypes.c_ubyte * args_buffer_size),
+ ]
+
+# the search ioctl resturns one header for each item
+#
+class btrfs_ioctl_search_header(ctypes.Structure):
+ _pack_ = 1
+ _fields_ = [ ("transid", ctypes.c_ulonglong),
+ ("objectid", ctypes.c_ulonglong),
+ ("offset", ctypes.c_ulonglong),
+ ("type", ctypes.c_uint),
+ ("len", ctypes.c_uint),
+ ]
+
+# the type field in btrfs_file_extent_item
+BTRFS_FILE_EXTENT_INLINE = 0
+BTRFS_FILE_EXTENT_REG = 1
+BTRFS_FILE_EXTENT_PREALLOC = 2
+
+class btrfs_file_extent_item(ctypes.LittleEndianStructure):
+ _pack_ = 1
+ _fields_ = [ ("generation", ctypes.c_ulonglong),
+ ("ram_bytes", ctypes.c_ulonglong),
+ ("compression", ctypes.c_ubyte),
+ ("encryption", ctypes.c_ubyte),
+ ("other_encoding", ctypes.c_ubyte * 2),
+ ("type", ctypes.c_ubyte),
+ ("disk_bytenr", ctypes.c_ulonglong),
+ ("disk_num_bytes", ctypes.c_ulonglong),
+ ("offset", ctypes.c_ulonglong),
+ ("num_bytes", ctypes.c_ulonglong),
+ ]
+
+class btrfs_ioctl_search():
+ def __init__(self):
+ self.args = btrfs_ioctl_search_args()
+ self.args.tree_id = 0
+ self.args.min_objectid = 0
+ self.args.max_objectid = maxu64
+ self.args.min_offset = 0
+ self.args.max_offset = maxu64
+ self.args.min_transid = 0
+ self.args.max_transid = maxu64
+ self.args.min_type = 0
+ self.args.max_type = maxu32
+ self.args.nr_items = 0
+ self.args.buf_size = args_buffer_size
+
+ # magic encoded for x86_64 this is the v2 search ioctl
+ self.ioctl_num = 3228603409L
+
+ # the results of the search get stored into args.buf
+ def search(self, fd, nritems=65536):
+ self.args.nr_items = nritems
+ fcntl.ioctl(fd, self.ioctl_num, self.args, 1)
+
+# this moves the search key forward by one. If the end result is
+# still a valid search key (all mins less than all maxes), we return
+# True. Otherwise False
+#
+def advance_search(search):
+ if search.args.min_offset < maxu64:
+ search.args.min_offset += 1
+ elif search.args.min_type < 255:
+ search.args.min_type += 1
+ elif search.args.min_objectid < maxu64:
+ search.args.min_objectid += 1
+ else:
+ return False
+
+ if search.args.min_offset > search.args.max_offset:
+ return False
+ if search.args.min_type > search.args.max_type:
+ return False
+ if search.args.min_objectid > search.args.max_objectid:
+ return False
+
+ return True
+
+# given one search_header and one file_item, print the details. This
+# also tosses the [disk_bytenr,disk_num_bytes] into extent_hash to record
+# which extents were used by this file
+#
+def print_one_extent(header, fi, extent_hash):
+ # we're ignoring inline items for now
+ if fi.type == BTRFS_FILE_EXTENT_INLINE:
+ # header.len is the length of the item returned. We subtract
+ # the part of the file item header that is actually used (21 bytes)
+ # and we get the length of the inlined data.
+ # this may or may not be compressed
+ inline_len = header.len - 21
+ if fi.compression:
+ ram_bytes = fi.ram_bytes
+ else:
+ ram_bytes = inline_len
+ print "(%Lu %Lu): ram %Lu disk 0 disk_size %Lu -- inline" % \
+ (header.objectid, header.offset, ram_bytes, inline_len)
+ extent_hash[-1] = inline_len
+ return
+
+ if fi.disk_bytenr == 0:
+ tag = " -- hole"
+ else:
+ tag = ""
+ print "(%Lu %Lu): ram %Lu disk %Lu disk_size %Lu%s" % (header.objectid,
+ header.offset, fi.num_bytes, fi.disk_bytenr, fi.disk_num_bytes, tag)
+
+ if fi.disk_bytenr:
+ extent_hash[fi.disk_bytenr] = fi.disk_num_bytes
+
+# open 'filename' and run the search ioctl against it, printing all the extents
+# we find
+def print_file_extents(filename):
+ extent_hash = {}
+
+ s = btrfs_ioctl_search()
+ s.args.min_type = BTRFS_EXTENT_DATA_KEY
+ s.args.max_type = BTRFS_EXTENT_DATA_KEY
+
+ try:
+ fd = os.open(filename, os.O_RDONLY)
+ st = os.fstat(fd)
+ except Exception, e:
+ sys.stderr.write("Failed to open %s (%s)\n" % (filename, e))
+ return -1
+
+ if not stat.S_ISREG(st.st_mode):
+ sys.stderr.write("%s not a regular file\n" % filename)
+ return 0
+
+ s.args.min_objectid = st.st_ino
+ s.args.max_objectid = st.st_ino
+
+ size = st.st_size
+
+ while True:
+ try:
+ s.search(fd)
+ except Exception, e:
+ sys.stderr.write("Search ioctl failed for %s (%s)\n" % (filename, e))
+ return -1
+
+ if s.args.nr_items == 0:
+ break
+
+ # p is the results buffer from the kernel
+ p = ctypes.addressof(s.args.buf)
+ header = btrfs_ioctl_search_header()
+ header_size = ctypes.sizeof(header)
+ h = ctypes.addressof(header)
+ p_left = args_buffer_size
+
+ for x in xrange(0, s.args.nr_items):
+ # for each item, copy the header from the buffer into
+ # our header struct.
+ ctypes.memmove(h, p, header_size)
+ p += header_size
+ p_left -= header_size
+
+ # this would be a kernel bug it shouldn't be sending malformed
+ # items
+ if p_left <= 0:
+ break
+
+ if header.type == BTRFS_EXTENT_DATA_KEY:
+ fi = btrfs_file_extent_item()
+
+ # this would also be a kernel bug
+ if p_left < ctypes.sizeof(fi):
+ break
+
+ # Copy the file item out of the results buffer
+ ctypes.memmove(ctypes.addressof(fi), p, ctypes.sizeof(fi))
+ print_one_extent(header, fi, extent_hash)
+
+ p += header.len
+ p_left -= header.len
+ if p_left <= 0:
+ break
+
+ s.args.min_offset = header.offset
+
+ if not advance_search(s):
+ break
+
+ total_on_disk = 0
+ total_extents = 0
+ for x in extent_hash.itervalues():
+ total_on_disk += x
+ total_extents += 1
+
+ # don't divide by zero
+ if total_on_disk == 0:
+ total_on_disk = 1
+
+ print "file: %s extents %Lu disk size %Lu logical size %Lu ratio %.2f" % \
+ (filename, total_extents, total_on_disk, st.st_size,
+ float(st.st_size) / float(total_on_disk))
+ return 0
+
+if len(sys.argv) == 1:
+ sys.stderr.write("Usage: btrfs-debug filename ...\n")
+ sys.exit(1)
+
+for f in sys.argv[1:]:
+ print_file_extents(f)
diff --git a/btrfs-find-root.c b/btrfs-find-root.c
new file mode 100644
index 00000000..2d5bbb2a
--- /dev/null
+++ b/btrfs-find-root.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2011 Red Hat. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <zlib.h>
+#include <getopt.h>
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "volumes.h"
+#include "utils.h"
+#include "crc32c.h"
+#include "extent-cache.h"
+#include "find-root.h"
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: find-roots [-a] [-o search_objectid] "
+ "[ -g search_generation ] [ -l search_level ] <device>\n");
+}
+
+/*
+ * Get reliable generation and level for given root.
+ *
+ * We have two sources of gen/level: superblock and tree root.
+ * superblock include the following level:
+ * Root, chunk, log
+ * and the following generations:
+ * Root, chunk, uuid
+ * Other gen/leven can only be read from its btrfs_tree_root if possible.
+ *
+ * Currently we only believe things from superblock.
+ */
+static void get_root_gen_and_level(u64 objectid, struct btrfs_fs_info *fs_info,
+ u64 *ret_gen, u8 *ret_level)
+{
+ struct btrfs_super_block *super = fs_info->super_copy;
+ u64 gen = (u64)-1;
+ u8 level = (u8)-1;
+
+ switch (objectid) {
+ case BTRFS_ROOT_TREE_OBJECTID:
+ level = btrfs_super_root_level(super);
+ gen = btrfs_super_generation(super);
+ break;
+ case BTRFS_CHUNK_TREE_OBJECTID:
+ level = btrfs_super_chunk_root_level(super);
+ gen = btrfs_super_chunk_root_generation(super);
+ break;
+ case BTRFS_TREE_LOG_OBJECTID:
+ level = btrfs_super_log_root_level(super);
+ gen = btrfs_super_log_root_transid(super);
+ break;
+ case BTRFS_UUID_TREE_OBJECTID:
+ gen = btrfs_super_uuid_tree_generation(super);
+ break;
+ }
+ if (gen != (u64)-1) {
+ printf("Superblock thinks the generation is %llu\n", gen);
+ if (ret_gen)
+ *ret_gen = gen;
+ } else {
+ printf("Superblock doesn't contain generation info for root %llu\n",
+ objectid);
+ }
+ if (level != (u8)-1) {
+ printf("Superblock thinks the level is %u\n", level);
+ if (ret_level)
+ *ret_level = level;
+ } else {
+ printf("Superblock doesn't contain the level info for root %llu\n",
+ objectid);
+ }
+}
+
+static void print_one_result(struct cache_extent *tree_block,
+ u8 level, u64 generation,
+ struct btrfs_find_root_filter *filter)
+{
+ int unsure = 0;
+
+ if (filter->match_gen == (u64)-1 || filter->match_level == (u8)-1)
+ unsure = 1;
+ printf("Well block %llu(gen: %llu level: %u) seems good, ",
+ tree_block->start, generation, level);
+ if (unsure)
+ printf("but we are unsure about the correct generation/level\n");
+ else if (level == filter->match_level &&
+ generation == filter->match_gen)
+ printf("and it matches superblock\n");
+ else
+ printf("but generation/level doesn't match, want gen: %llu level: %u\n",
+ filter->match_gen, filter->match_level);
+}
+
+static void print_find_root_result(struct cache_tree *result,
+ struct btrfs_find_root_filter *filter)
+{
+ struct btrfs_find_root_gen_cache *gen_cache;
+ struct cache_extent *cache;
+ struct cache_extent *tree_block;
+ u64 generation = 0;
+ u8 level = 0;
+
+ for (cache = last_cache_extent(result);
+ cache; cache = prev_cache_extent(cache)) {
+ gen_cache = container_of(cache,
+ struct btrfs_find_root_gen_cache, cache);
+ level = gen_cache->highest_level;
+ generation = cache->start;
+ /* For exact found one, skip it as it's output before */
+ if (level == filter->match_level &&
+ generation == filter->match_gen &&
+ !filter->search_all)
+ continue;
+ for (tree_block = last_cache_extent(&gen_cache->eb_tree);
+ tree_block; tree_block = prev_cache_extent(tree_block))
+ print_one_result(tree_block, level, generation, filter);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_find_root_filter filter = {0};
+ struct cache_tree result;
+ struct cache_extent *found;
+ int ret;
+
+ /* Default to search root tree */
+ filter.objectid = BTRFS_ROOT_TREE_OBJECTID;
+ filter.match_gen = (u64)-1;
+ filter.match_level = (u8)-1;
+ while (1) {
+ static const struct option long_options[] = {
+ { "help", no_argument, NULL, GETOPT_VAL_HELP},
+ { NULL, 0, NULL, 0 }
+ };
+ int c = getopt_long(argc, argv, "al:o:g:", long_options, NULL);
+
+ if (c < 0)
+ break;
+
+ switch (c) {
+ case 'a':
+ filter.search_all = 1;
+ break;
+ case 'o':
+ filter.objectid = arg_strtou64(optarg);
+ break;
+ case 'g':
+ filter.generation = arg_strtou64(optarg);
+ break;
+ case 'l':
+ filter.level = arg_strtou64(optarg);
+ break;
+ case GETOPT_VAL_HELP:
+ default:
+ usage();
+ exit(c != GETOPT_VAL_HELP);
+ }
+ }
+
+ set_argv0(argv);
+ argc = argc - optind;
+ if (check_argc_min(argc, 1)) {
+ usage();
+ exit(1);
+ }
+
+ fs_info = open_ctree_fs_info(argv[optind], 0, 0,
+ OPEN_CTREE_CHUNK_ROOT_ONLY |
+ OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR);
+ if (!fs_info) {
+ error("open ctree failed");
+ exit(1);
+ }
+ cache_tree_init(&result);
+
+ get_root_gen_and_level(filter.objectid, fs_info,
+ &filter.match_gen, &filter.match_level);
+ ret = btrfs_find_root_search(fs_info, &filter, &result, &found);
+ if (ret < 0) {
+ fprintf(stderr, "Fail to search the tree root: %s\n",
+ strerror(-ret));
+ goto out;
+ }
+ if (ret > 0) {
+ printf("Found tree root at %llu gen %llu level %u\n",
+ found->start, filter.match_gen, filter.match_level);
+ ret = 0;
+ }
+ print_find_root_result(&result, &filter);
+out:
+ btrfs_find_root_free(&result);
+ close_ctree_fs_info(fs_info);
+ btrfs_close_all_devices();
+ return ret;
+}
diff --git a/btrfs-fragments.c b/btrfs-fragments.c
new file mode 100644
index 00000000..17768c3f
--- /dev/null
+++ b/btrfs-fragments.c
@@ -0,0 +1,452 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <limits.h>
+#include <uuid/uuid.h>
+#include <ctype.h>
+
+#include <gd.h>
+
+#undef ULONG_MAX
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "ioctl.h"
+#include "utils.h"
+
+static int use_color;
+static void
+push_im(gdImagePtr im, char *name, char *dir)
+{
+ char fullname[2000];
+ FILE *pngout;
+
+ if (!im)
+ return;
+
+ snprintf(fullname, sizeof(fullname), "%s/%s", dir, name);
+ pngout = fopen(fullname, "w");
+ if (!pngout) {
+ printf("unable to create file %s\n", fullname);
+ exit(1);
+ }
+
+ gdImagePng(im, pngout);
+
+ fclose(pngout);
+ gdImageDestroy(im);
+}
+
+static char *
+chunk_type(u64 flags)
+{
+ switch (flags & (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_DATA |
+ BTRFS_BLOCK_GROUP_METADATA)) {
+ case BTRFS_BLOCK_GROUP_SYSTEM:
+ return "system";
+ case BTRFS_BLOCK_GROUP_DATA:
+ return "data";
+ case BTRFS_BLOCK_GROUP_METADATA:
+ return "metadata";
+ case BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA:
+ return "mixed";
+ default:
+ return "invalid";
+ }
+}
+
+static void
+print_bg(FILE *html, char *name, u64 start, u64 len, u64 used, u64 flags,
+ u64 areas)
+{
+ double frag = (double)areas / (len / 4096) * 2;
+
+ fprintf(html, "<p>%s chunk starts at %lld, size is %s, %.2f%% used, "
+ "%.2f%% fragmented</p>\n", chunk_type(flags), start,
+ pretty_size(len), 100.0 * used / len, 100.0 * frag);
+ fprintf(html, "<img src=\"%s\" border=\"1\" />\n", name);
+}
+
+enum tree_colors {
+ COLOR_ROOT = 0,
+ COLOR_EXTENT,
+ COLOR_CHUNK,
+ COLOR_DEV,
+ COLOR_FS,
+ COLOR_CSUM,
+ COLOR_RELOC,
+ COLOR_DATA,
+ COLOR_UNKNOWN,
+ COLOR_MAX
+};
+
+static int
+get_color(struct btrfs_extent_item *item, int len)
+{
+ u64 refs;
+ u64 flags;
+ u8 type;
+ u64 offset;
+ struct btrfs_extent_inline_ref *ref;
+
+ refs = btrfs_stack_extent_refs(item);
+ flags = btrfs_stack_extent_flags(item);
+
+ if (flags & BTRFS_EXTENT_FLAG_DATA)
+ return COLOR_DATA;
+ if (refs > 1) {
+ /* this must be an fs tree */
+ return COLOR_FS;
+ }
+
+ ref = (void *)item + sizeof(struct btrfs_extent_item) +
+ sizeof(struct btrfs_tree_block_info);
+ type = btrfs_stack_extent_inline_ref_type(ref);
+ offset = btrfs_stack_extent_inline_ref_offset(ref);
+
+ switch (type) {
+ case BTRFS_EXTENT_DATA_REF_KEY:
+ return COLOR_DATA;
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ case BTRFS_SHARED_DATA_REF_KEY:
+ return COLOR_FS;
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ break;
+ default:
+ return COLOR_UNKNOWN;
+ }
+
+ switch (offset) {
+ case BTRFS_ROOT_TREE_OBJECTID:
+ return COLOR_ROOT;
+ case BTRFS_EXTENT_TREE_OBJECTID:
+ return COLOR_EXTENT;
+ case BTRFS_CHUNK_TREE_OBJECTID:
+ return COLOR_CHUNK;
+ case BTRFS_DEV_TREE_OBJECTID:
+ return COLOR_DEV;
+ case BTRFS_FS_TREE_OBJECTID:
+ return COLOR_FS;
+ case BTRFS_CSUM_TREE_OBJECTID:
+ return COLOR_CSUM;
+ case BTRFS_DATA_RELOC_TREE_OBJECTID:
+ return COLOR_RELOC;
+ }
+
+ return COLOR_UNKNOWN;
+}
+
+static void
+init_colors(gdImagePtr im, int *colors)
+{
+ colors[COLOR_ROOT] = gdImageColorAllocate(im, 255, 0, 0);
+ colors[COLOR_EXTENT] = gdImageColorAllocate(im, 0, 255, 0);
+ colors[COLOR_CHUNK] = gdImageColorAllocate(im, 255, 0, 0);
+ colors[COLOR_DEV] = gdImageColorAllocate(im, 255, 0, 0);
+ colors[COLOR_FS] = gdImageColorAllocate(im, 0, 0, 0);
+ colors[COLOR_CSUM] = gdImageColorAllocate(im, 0, 0, 255);
+ colors[COLOR_RELOC] = gdImageColorAllocate(im, 128, 128, 128);
+ colors[COLOR_DATA] = gdImageColorAllocate(im, 100, 0, 0);
+ colors[COLOR_UNKNOWN] = gdImageColorAllocate(im, 50, 50, 50);
+}
+
+int
+list_fragments(int fd, u64 flags, char *dir)
+{
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ int i;
+ struct btrfs_ioctl_search_header *sh;
+ unsigned long off = 0;
+ int bgnum = 0;
+ u64 bgstart = 0;
+ u64 bglen = 0;
+ u64 bgend = 0;
+ u64 bgflags = 0;
+ u64 bgused = 0;
+ u64 saved_extent = 0;
+ u64 saved_len = 0;
+ int saved_color = 0;
+ u64 last_end = 0;
+ u64 areas = 0;
+ long px;
+ char name[1000];
+ FILE *html;
+ int colors[COLOR_MAX];
+
+ gdImagePtr im = NULL;
+ int black = 0;
+ int width = 800;
+
+ snprintf(name, sizeof(name), "%s/index.html", dir);
+ html = fopen(name, "w");
+ if (!html) {
+ printf("unable to create %s\n", name);
+ exit(1);
+ }
+
+ fprintf(html, "<html><header>\n");
+ fprintf(html, "<title>Btrfs Block Group Allocation Map</title>\n");
+ fprintf(html, "<style type=\"text/css\">\n");
+ fprintf(html, "img {margin-left: 1em; margin-bottom: 2em;}\n");
+ fprintf(html, "</style>\n");
+ fprintf(html, "</header><body>\n");
+
+ memset(&args, 0, sizeof(args));
+
+ sk->tree_id = 2;
+ sk->max_type = -1;
+ sk->min_type = 0;
+ sk->max_objectid = (u64)-1;
+ sk->max_offset = (u64)-1;
+ sk->max_transid = (u64)-1;
+
+ /* just a big number, doesn't matter much */
+ sk->nr_items = 4096;
+
+ while(1) {
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: can't perform the search\n");
+ goto out_close;
+ }
+ /* the ioctl returns the number of item it found in nr_items */
+ if (sk->nr_items == 0)
+ break;
+
+ off = 0;
+ for (i = 0; i < sk->nr_items; i++) {
+ int j;
+
+ sh = (struct btrfs_ioctl_search_header *)(args.buf +
+ off);
+ off += sizeof(*sh);
+ if (sh->type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
+ struct btrfs_block_group_item *bg;
+
+ if (im) {
+ push_im(im, name, dir);
+ im = NULL;
+
+ print_bg(html, name, bgstart, bglen,
+ bgused, bgflags, areas);
+ }
+
+ ++bgnum;
+
+ bg = (struct btrfs_block_group_item *)
+ (args.buf + off);
+ bgflags = btrfs_block_group_flags(bg);
+ bgused = btrfs_block_group_used(bg);
+
+ printf("found block group %lld len %lld "
+ "flags %lld\n", sh->objectid,
+ sh->offset, bgflags);
+ if (!(bgflags & flags)) {
+ /* skip this block group */
+ sk->min_objectid = sh->objectid +
+ sh->offset;
+ sk->min_type = 0;
+ sk->min_offset = 0;
+ break;
+ }
+ im = gdImageCreate(width,
+ (sh->offset / 4096 + 799) / width);
+
+ black = gdImageColorAllocate(im, 0, 0, 0);
+
+ for (j = 0; j < ARRAY_SIZE(colors); ++j)
+ colors[j] = black;
+
+ init_colors(im, colors);
+ bgstart = sh->objectid;
+ bglen = sh->offset;
+ bgend = bgstart + bglen;
+
+ snprintf(name, sizeof(name), "bg%d.png", bgnum);
+
+ last_end = bgstart;
+ if (saved_len) {
+ px = (saved_extent - bgstart) / 4096;
+ for (j = 0; j < saved_len / 4096; ++j) {
+ int x = (px + j) % width;
+ int y = (px + j) / width;
+ gdImageSetPixel(im, x, y,
+ saved_color);
+ }
+ last_end += saved_len;
+ }
+ areas = 0;
+ saved_len = 0;
+ }
+ if (im && sh->type == BTRFS_EXTENT_ITEM_KEY) {
+ int c;
+ struct btrfs_extent_item *item;
+
+ item = (struct btrfs_extent_item *)
+ (args.buf + off);
+
+ if (use_color)
+ c = colors[get_color(item, sh->len)];
+ else
+ c = black;
+ if (sh->objectid > bgend) {
+ printf("WARN: extent %lld is without "
+ "block group\n", sh->objectid);
+ goto skip;
+ }
+ if (sh->objectid == bgend) {
+ saved_extent = sh->objectid;
+ saved_len = sh->offset;
+ saved_color = c;
+ goto skip;
+ }
+ px = (sh->objectid - bgstart) / 4096;
+ for (j = 0; j < sh->offset / 4096; ++j) {
+ int x = (px + j) % width;
+ int y = (px + j) / width;
+ gdImageSetPixel(im, x, y, c);
+ }
+ if (sh->objectid != last_end)
+ ++areas;
+ last_end = sh->objectid + sh->offset;
+skip:;
+ }
+ off += sh->len;
+
+ /*
+ * record the mins in sk so we can make sure the
+ * next search doesn't repeat this root
+ */
+ sk->min_objectid = sh->objectid;
+ sk->min_type = sh->type;
+ sk->min_offset = sh->offset;
+ }
+ sk->nr_items = 4096;
+
+ /* increment by one */
+ if (++sk->min_offset == 0)
+ if (++sk->min_type == 0)
+ if (++sk->min_objectid == 0)
+ break;
+ }
+
+ if (im) {
+ push_im(im, name, dir);
+ print_bg(html, name, bgstart, bglen, bgused, bgflags, areas);
+ }
+
+ if (use_color) {
+ fprintf(html, "<p>");
+ fprintf(html, "data - dark red, ");
+ fprintf(html, "fs tree - black, ");
+ fprintf(html, "extent tree - green, ");
+ fprintf(html, "csum tree - blue, ");
+ fprintf(html, "reloc tree - grey, ");
+ fprintf(html, "other trees - red, ");
+ fprintf(html, "unknown tree - dark grey");
+ fprintf(html, "</p>");
+ }
+ fprintf(html, "</body></html>\n");
+
+out_close:
+ fclose(html);
+
+ return ret;
+}
+
+void
+usage(void)
+{
+ printf("usage: btrfs-fragments [options] <path>\n");
+ printf(" -c use color\n");
+ printf(" -d print data chunks\n");
+ printf(" -m print metadata chunks\n");
+ printf(" -s print system chunks\n");
+ printf(" (default is data+metadata)\n");
+ printf(" -o <dir> output directory, default is html\n");
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ char *path;
+ int fd;
+ int ret;
+ u64 flags = 0;
+ char *dir = "html";
+ DIR *dirstream = NULL;
+
+ while (1) {
+ int c = getopt(argc, argv, "cmso:h");
+ if (c < 0)
+ break;
+ switch (c) {
+ case 'c':
+ use_color = 1;
+ break;
+ case 'd':
+ flags |= BTRFS_BLOCK_GROUP_DATA;
+ break;
+ case 'm':
+ flags |= BTRFS_BLOCK_GROUP_METADATA;
+ break;
+ case 's':
+ flags |= BTRFS_BLOCK_GROUP_SYSTEM;
+ break;
+ case 'o':
+ dir = optarg;
+ break;
+ case 'h':
+ default:
+ usage();
+ }
+ }
+
+ set_argv0(argv);
+ argc = argc - optind;
+ if (check_argc_min(argc, 1)) {
+ usage();
+ exit(1);
+ }
+
+ path = argv[optind++];
+
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ exit(1);
+
+ if (flags == 0)
+ flags = BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA;
+
+ ret = list_fragments(fd, flags, dir);
+ close_file_or_dir(fd, dirstream);
+ if (ret)
+ exit(1);
+
+ exit(0);
+}
diff --git a/btrfs-image.c b/btrfs-image.c
new file mode 100644
index 00000000..c7fa18fb
--- /dev/null
+++ b/btrfs-image.c
@@ -0,0 +1,2880 @@
+/*
+ * Copyright (C) 2008 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <zlib.h>
+#include <getopt.h>
+
+#include "kerncompat.h"
+#include "crc32c.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "utils.h"
+#include "volumes.h"
+#include "extent_io.h"
+
+#define HEADER_MAGIC 0xbd5c25e27295668bULL
+#define MAX_PENDING_SIZE (256 * 1024)
+#define BLOCK_SIZE 1024
+#define BLOCK_MASK (BLOCK_SIZE - 1)
+
+#define COMPRESS_NONE 0
+#define COMPRESS_ZLIB 1
+
+struct meta_cluster_item {
+ __le64 bytenr;
+ __le32 size;
+} __attribute__ ((__packed__));
+
+struct meta_cluster_header {
+ __le64 magic;
+ __le64 bytenr;
+ __le32 nritems;
+ u8 compress;
+} __attribute__ ((__packed__));
+
+/* cluster header + index items + buffers */
+struct meta_cluster {
+ struct meta_cluster_header header;
+ struct meta_cluster_item items[];
+} __attribute__ ((__packed__));
+
+#define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \
+ sizeof(struct meta_cluster_item))
+
+struct fs_chunk {
+ u64 logical;
+ u64 physical;
+ u64 bytes;
+ struct rb_node l;
+ struct rb_node p;
+ struct list_head list;
+};
+
+struct async_work {
+ struct list_head list;
+ struct list_head ordered;
+ u64 start;
+ u64 size;
+ u8 *buffer;
+ size_t bufsize;
+ int error;
+};
+
+struct metadump_struct {
+ struct btrfs_root *root;
+ FILE *out;
+
+ struct meta_cluster *cluster;
+
+ pthread_t *threads;
+ size_t num_threads;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ struct rb_root name_tree;
+
+ struct list_head list;
+ struct list_head ordered;
+ size_t num_items;
+ size_t num_ready;
+
+ u64 pending_start;
+ u64 pending_size;
+
+ int compress_level;
+ int done;
+ int data;
+ int sanitize_names;
+
+ int error;
+};
+
+struct name {
+ struct rb_node n;
+ char *val;
+ char *sub;
+ u32 len;
+};
+
+struct mdrestore_struct {
+ FILE *in;
+ FILE *out;
+
+ pthread_t *threads;
+ size_t num_threads;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ struct rb_root chunk_tree;
+ struct rb_root physical_tree;
+ struct list_head list;
+ struct list_head overlapping_chunks;
+ size_t num_items;
+ u32 leafsize;
+ u64 devid;
+ u64 alloced_chunks;
+ u64 last_physical_offset;
+ u8 uuid[BTRFS_UUID_SIZE];
+ u8 fsid[BTRFS_FSID_SIZE];
+
+ int compress_method;
+ int done;
+ int error;
+ int old_restore;
+ int fixup_offset;
+ int multi_devices;
+ int clear_space_cache;
+ struct btrfs_fs_info *info;
+};
+
+static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
+ u64 search, u64 cluster_bytenr);
+static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
+
+static void csum_block(u8 *buf, size_t len)
+{
+ char result[BTRFS_CRC32_SIZE];
+ u32 crc = ~(u32)0;
+ crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
+ btrfs_csum_final(crc, result);
+ memcpy(buf, result, BTRFS_CRC32_SIZE);
+}
+
+static int has_name(struct btrfs_key *key)
+{
+ switch (key->type) {
+ case BTRFS_DIR_ITEM_KEY:
+ case BTRFS_DIR_INDEX_KEY:
+ case BTRFS_INODE_REF_KEY:
+ case BTRFS_INODE_EXTREF_KEY:
+ case BTRFS_XATTR_ITEM_KEY:
+ return 1;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static char *generate_garbage(u32 name_len)
+{
+ char *buf = malloc(name_len);
+ int i;
+
+ if (!buf)
+ return NULL;
+
+ for (i = 0; i < name_len; i++) {
+ char c = rand() % 94 + 33;
+
+ if (c == '/')
+ c++;
+ buf[i] = c;
+ }
+
+ return buf;
+}
+
+static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
+{
+ struct name *entry = rb_entry(a, struct name, n);
+ struct name *ins = rb_entry(b, struct name, n);
+ u32 len;
+
+ len = min(ins->len, entry->len);
+ return memcmp(ins->val, entry->val, len);
+}
+
+static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
+{
+ struct fs_chunk *entry = rb_entry(a, struct fs_chunk, l);
+ struct fs_chunk *ins = rb_entry(b, struct fs_chunk, l);
+
+ if (fuzz && ins->logical >= entry->logical &&
+ ins->logical < entry->logical + entry->bytes)
+ return 0;
+
+ if (ins->logical < entry->logical)
+ return -1;
+ else if (ins->logical > entry->logical)
+ return 1;
+ return 0;
+}
+
+static int physical_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
+{
+ struct fs_chunk *entry = rb_entry(a, struct fs_chunk, p);
+ struct fs_chunk *ins = rb_entry(b, struct fs_chunk, p);
+
+ if (fuzz && ins->physical >= entry->physical &&
+ ins->physical < entry->physical + entry->bytes)
+ return 0;
+
+ if (fuzz && entry->physical >= ins->physical &&
+ entry->physical < ins->physical + ins->bytes)
+ return 0;
+
+ if (ins->physical < entry->physical)
+ return -1;
+ else if (ins->physical > entry->physical)
+ return 1;
+ return 0;
+}
+
+static void tree_insert(struct rb_root *root, struct rb_node *ins,
+ int (*cmp)(struct rb_node *a, struct rb_node *b,
+ int fuzz))
+{
+ struct rb_node ** p = &root->rb_node;
+ struct rb_node * parent = NULL;
+ int dir;
+
+ while(*p) {
+ parent = *p;
+
+ dir = cmp(*p, ins, 1);
+ if (dir < 0)
+ p = &(*p)->rb_left;
+ else if (dir > 0)
+ p = &(*p)->rb_right;
+ else
+ BUG();
+ }
+
+ rb_link_node(ins, parent, p);
+ rb_insert_color(ins, root);
+}
+
+static struct rb_node *tree_search(struct rb_root *root,
+ struct rb_node *search,
+ int (*cmp)(struct rb_node *a,
+ struct rb_node *b, int fuzz),
+ int fuzz)
+{
+ struct rb_node *n = root->rb_node;
+ int dir;
+
+ while (n) {
+ dir = cmp(n, search, fuzz);
+ if (dir < 0)
+ n = n->rb_left;
+ else if (dir > 0)
+ n = n->rb_right;
+ else
+ return n;
+ }
+
+ return NULL;
+}
+
+static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical, u64 *size)
+{
+ struct fs_chunk *fs_chunk;
+ struct rb_node *entry;
+ struct fs_chunk search;
+ u64 offset;
+
+ if (logical == BTRFS_SUPER_INFO_OFFSET)
+ return logical;
+
+ search.logical = logical;
+ entry = tree_search(&mdres->chunk_tree, &search.l, chunk_cmp, 1);
+ if (!entry) {
+ if (mdres->in != stdin)
+ printf("Couldn't find a chunk, using logical\n");
+ return logical;
+ }
+ fs_chunk = rb_entry(entry, struct fs_chunk, l);
+ if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical)
+ BUG();
+ offset = search.logical - fs_chunk->logical;
+
+ *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical);
+ return fs_chunk->physical + offset;
+}
+
+
+static char *find_collision(struct metadump_struct *md, char *name,
+ u32 name_len)
+{
+ struct name *val;
+ struct rb_node *entry;
+ struct name tmp;
+ unsigned long checksum;
+ int found = 0;
+ int i;
+
+ tmp.val = name;
+ tmp.len = name_len;
+ entry = tree_search(&md->name_tree, &tmp.n, name_cmp, 0);
+ if (entry) {
+ val = rb_entry(entry, struct name, n);
+ free(name);
+ return val->sub;
+ }
+
+ val = malloc(sizeof(struct name));
+ if (!val) {
+ fprintf(stderr, "Couldn't sanitize name, enomem\n");
+ free(name);
+ return NULL;
+ }
+
+ memset(val, 0, sizeof(*val));
+
+ val->val = name;
+ val->len = name_len;
+ val->sub = malloc(name_len);
+ if (!val->sub) {
+ fprintf(stderr, "Couldn't sanitize name, enomem\n");
+ free(val);
+ free(name);
+ return NULL;
+ }
+
+ checksum = crc32c(~1, val->val, name_len);
+ memset(val->sub, ' ', name_len);
+ i = 0;
+ while (1) {
+ if (crc32c(~1, val->sub, name_len) == checksum &&
+ memcmp(val->sub, val->val, val->len)) {
+ found = 1;
+ break;
+ }
+
+ if (val->sub[i] == 127) {
+ do {
+ i++;
+ if (i >= name_len)
+ break;
+ } while (val->sub[i] == 127);
+
+ if (i >= name_len)
+ break;
+ val->sub[i]++;
+ if (val->sub[i] == '/')
+ val->sub[i]++;
+ memset(val->sub, ' ', i);
+ i = 0;
+ continue;
+ } else {
+ val->sub[i]++;
+ if (val->sub[i] == '/')
+ val->sub[i]++;
+ }
+ }
+
+ if (!found) {
+ fprintf(stderr, "Couldn't find a collision for '%.*s', "
+ "generating normal garbage, it won't match indexes\n",
+ val->len, val->val);
+ for (i = 0; i < name_len; i++) {
+ char c = rand() % 94 + 33;
+
+ if (c == '/')
+ c++;
+ val->sub[i] = c;
+ }
+ }
+
+ tree_insert(&md->name_tree, &val->n, name_cmp);
+ return val->sub;
+}
+
+static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb,
+ int slot)
+{
+ struct btrfs_dir_item *dir_item;
+ char *buf;
+ char *garbage;
+ unsigned long name_ptr;
+ u32 total_len;
+ u32 cur = 0;
+ u32 this_len;
+ u32 name_len;
+ int free_garbage = (md->sanitize_names == 1);
+
+ dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
+ total_len = btrfs_item_size_nr(eb, slot);
+ while (cur < total_len) {
+ this_len = sizeof(*dir_item) +
+ btrfs_dir_name_len(eb, dir_item) +
+ btrfs_dir_data_len(eb, dir_item);
+ name_ptr = (unsigned long)(dir_item + 1);
+ name_len = btrfs_dir_name_len(eb, dir_item);
+
+ if (md->sanitize_names > 1) {
+ buf = malloc(name_len);
+ if (!buf) {
+ fprintf(stderr, "Couldn't sanitize name, "
+ "enomem\n");
+ return;
+ }
+ read_extent_buffer(eb, buf, name_ptr, name_len);
+ garbage = find_collision(md, buf, name_len);
+ } else {
+ garbage = generate_garbage(name_len);
+ }
+ if (!garbage) {
+ fprintf(stderr, "Couldn't sanitize name, enomem\n");
+ return;
+ }
+ write_extent_buffer(eb, garbage, name_ptr, name_len);
+ cur += this_len;
+ dir_item = (struct btrfs_dir_item *)((char *)dir_item +
+ this_len);
+ if (free_garbage)
+ free(garbage);
+ }
+}
+
+static void sanitize_inode_ref(struct metadump_struct *md,
+ struct extent_buffer *eb, int slot, int ext)
+{
+ struct btrfs_inode_extref *extref;
+ struct btrfs_inode_ref *ref;
+ char *garbage, *buf;
+ unsigned long ptr;
+ unsigned long name_ptr;
+ u32 item_size;
+ u32 cur_offset = 0;
+ int len;
+ int free_garbage = (md->sanitize_names == 1);
+
+ item_size = btrfs_item_size_nr(eb, slot);
+ ptr = btrfs_item_ptr_offset(eb, slot);
+ while (cur_offset < item_size) {
+ if (ext) {
+ extref = (struct btrfs_inode_extref *)(ptr +
+ cur_offset);
+ name_ptr = (unsigned long)(&extref->name);
+ len = btrfs_inode_extref_name_len(eb, extref);
+ cur_offset += sizeof(*extref);
+ } else {
+ ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
+ len = btrfs_inode_ref_name_len(eb, ref);
+ name_ptr = (unsigned long)(ref + 1);
+ cur_offset += sizeof(*ref);
+ }
+ cur_offset += len;
+
+ if (md->sanitize_names > 1) {
+ buf = malloc(len);
+ if (!buf) {
+ fprintf(stderr, "Couldn't sanitize name, "
+ "enomem\n");
+ return;
+ }
+ read_extent_buffer(eb, buf, name_ptr, len);
+ garbage = find_collision(md, buf, len);
+ } else {
+ garbage = generate_garbage(len);
+ }
+
+ if (!garbage) {
+ fprintf(stderr, "Couldn't sanitize name, enomem\n");
+ return;
+ }
+ write_extent_buffer(eb, garbage, name_ptr, len);
+ if (free_garbage)
+ free(garbage);
+ }
+}
+
+static void sanitize_xattr(struct metadump_struct *md,
+ struct extent_buffer *eb, int slot)
+{
+ struct btrfs_dir_item *dir_item;
+ unsigned long data_ptr;
+ u32 data_len;
+
+ dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
+ data_len = btrfs_dir_data_len(eb, dir_item);
+
+ data_ptr = (unsigned long)((char *)(dir_item + 1) +
+ btrfs_dir_name_len(eb, dir_item));
+ memset_extent_buffer(eb, 0, data_ptr, data_len);
+}
+
+static void sanitize_name(struct metadump_struct *md, u8 *dst,
+ struct extent_buffer *src, struct btrfs_key *key,
+ int slot)
+{
+ struct extent_buffer *eb;
+
+ eb = alloc_dummy_eb(src->start, src->len);
+ if (!eb) {
+ fprintf(stderr, "Couldn't sanitize name, no memory\n");
+ return;
+ }
+
+ memcpy(eb->data, dst, eb->len);
+
+ switch (key->type) {
+ case BTRFS_DIR_ITEM_KEY:
+ case BTRFS_DIR_INDEX_KEY:
+ sanitize_dir_item(md, eb, slot);
+ break;
+ case BTRFS_INODE_REF_KEY:
+ sanitize_inode_ref(md, eb, slot, 0);
+ break;
+ case BTRFS_INODE_EXTREF_KEY:
+ sanitize_inode_ref(md, eb, slot, 1);
+ break;
+ case BTRFS_XATTR_ITEM_KEY:
+ sanitize_xattr(md, eb, slot);
+ break;
+ default:
+ break;
+ }
+
+ memcpy(dst, eb->data, eb->len);
+ free(eb);
+}
+
+/*
+ * zero inline extents and csum items
+ */
+static void zero_items(struct metadump_struct *md, u8 *dst,
+ struct extent_buffer *src)
+{
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_item *item;
+ struct btrfs_key key;
+ u32 nritems = btrfs_header_nritems(src);
+ size_t size;
+ unsigned long ptr;
+ int i, extent_type;
+
+ for (i = 0; i < nritems; i++) {
+ item = btrfs_item_nr(i);
+ btrfs_item_key_to_cpu(src, &key, i);
+ if (key.type == BTRFS_CSUM_ITEM_KEY) {
+ size = btrfs_item_size_nr(src, i);
+ memset(dst + btrfs_leaf_data(src) +
+ btrfs_item_offset_nr(src, i), 0, size);
+ continue;
+ }
+
+ if (md->sanitize_names && has_name(&key)) {
+ sanitize_name(md, dst, src, &key, i);
+ continue;
+ }
+
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ continue;
+
+ fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
+ extent_type = btrfs_file_extent_type(src, fi);
+ if (extent_type != BTRFS_FILE_EXTENT_INLINE)
+ continue;
+
+ ptr = btrfs_file_extent_inline_start(fi);
+ size = btrfs_file_extent_inline_item_len(src, item);
+ memset(dst + ptr, 0, size);
+ }
+}
+
+/*
+ * copy buffer and zero useless data in the buffer
+ */
+static void copy_buffer(struct metadump_struct *md, u8 *dst,
+ struct extent_buffer *src)
+{
+ int level;
+ size_t size;
+ u32 nritems;
+
+ memcpy(dst, src->data, src->len);
+ if (src->start == BTRFS_SUPER_INFO_OFFSET)
+ return;
+
+ level = btrfs_header_level(src);
+ nritems = btrfs_header_nritems(src);
+
+ if (nritems == 0) {
+ size = sizeof(struct btrfs_header);
+ memset(dst + size, 0, src->len - size);
+ } else if (level == 0) {
+ size = btrfs_leaf_data(src) +
+ btrfs_item_offset_nr(src, nritems - 1) -
+ btrfs_item_nr_offset(nritems);
+ memset(dst + btrfs_item_nr_offset(nritems), 0, size);
+ zero_items(md, dst, src);
+ } else {
+ size = offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nritems;
+ memset(dst + size, 0, src->len - size);
+ }
+ csum_block(dst, src->len);
+}
+
+static void *dump_worker(void *data)
+{
+ struct metadump_struct *md = (struct metadump_struct *)data;
+ struct async_work *async;
+ int ret;
+
+ while (1) {
+ pthread_mutex_lock(&md->mutex);
+ while (list_empty(&md->list)) {
+ if (md->done) {
+ pthread_mutex_unlock(&md->mutex);
+ goto out;
+ }
+ pthread_cond_wait(&md->cond, &md->mutex);
+ }
+ async = list_entry(md->list.next, struct async_work, list);
+ list_del_init(&async->list);
+ pthread_mutex_unlock(&md->mutex);
+
+ if (md->compress_level > 0) {
+ u8 *orig = async->buffer;
+
+ async->bufsize = compressBound(async->size);
+ async->buffer = malloc(async->bufsize);
+ if (!async->buffer) {
+ fprintf(stderr, "Error allocing buffer\n");
+ pthread_mutex_lock(&md->mutex);
+ if (!md->error)
+ md->error = -ENOMEM;
+ pthread_mutex_unlock(&md->mutex);
+ pthread_exit(NULL);
+ }
+
+ ret = compress2(async->buffer,
+ (unsigned long *)&async->bufsize,
+ orig, async->size, md->compress_level);
+
+ if (ret != Z_OK)
+ async->error = 1;
+
+ free(orig);
+ }
+
+ pthread_mutex_lock(&md->mutex);
+ md->num_ready++;
+ pthread_mutex_unlock(&md->mutex);
+ }
+out:
+ pthread_exit(NULL);
+}
+
+static void meta_cluster_init(struct metadump_struct *md, u64 start)
+{
+ struct meta_cluster_header *header;
+
+ md->num_items = 0;
+ md->num_ready = 0;
+ header = &md->cluster->header;
+ header->magic = cpu_to_le64(HEADER_MAGIC);
+ header->bytenr = cpu_to_le64(start);
+ header->nritems = cpu_to_le32(0);
+ header->compress = md->compress_level > 0 ?
+ COMPRESS_ZLIB : COMPRESS_NONE;
+}
+
+static void metadump_destroy(struct metadump_struct *md, int num_threads)
+{
+ int i;
+ struct rb_node *n;
+
+ pthread_mutex_lock(&md->mutex);
+ md->done = 1;
+ pthread_cond_broadcast(&md->cond);
+ pthread_mutex_unlock(&md->mutex);
+
+ for (i = 0; i < num_threads; i++)
+ pthread_join(md->threads[i], NULL);
+
+ pthread_cond_destroy(&md->cond);
+ pthread_mutex_destroy(&md->mutex);
+
+ while ((n = rb_first(&md->name_tree))) {
+ struct name *name;
+
+ name = rb_entry(n, struct name, n);
+ rb_erase(n, &md->name_tree);
+ free(name->val);
+ free(name->sub);
+ free(name);
+ }
+ free(md->threads);
+ free(md->cluster);
+}
+
+static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
+ FILE *out, int num_threads, int compress_level,
+ int sanitize_names)
+{
+ int i, ret = 0;
+
+ memset(md, 0, sizeof(*md));
+ md->cluster = calloc(1, BLOCK_SIZE);
+ if (!md->cluster)
+ return -ENOMEM;
+ md->threads = calloc(num_threads, sizeof(pthread_t));
+ if (!md->threads) {
+ free(md->cluster);
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(&md->list);
+ INIT_LIST_HEAD(&md->ordered);
+ md->root = root;
+ md->out = out;
+ md->pending_start = (u64)-1;
+ md->compress_level = compress_level;
+ md->sanitize_names = sanitize_names;
+ if (sanitize_names > 1)
+ crc32c_optimization_init();
+
+ md->name_tree.rb_node = NULL;
+ md->num_threads = num_threads;
+ pthread_cond_init(&md->cond, NULL);
+ pthread_mutex_init(&md->mutex, NULL);
+ meta_cluster_init(md, 0);
+
+ if (!num_threads)
+ return 0;
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_create(md->threads + i, NULL, dump_worker, md);
+ if (ret)
+ break;
+ }
+
+ if (ret)
+ metadump_destroy(md, i + 1);
+
+ return ret;
+}
+
+static int write_zero(FILE *out, size_t size)
+{
+ static char zero[BLOCK_SIZE];
+ return fwrite(zero, size, 1, out);
+}
+
+static int write_buffers(struct metadump_struct *md, u64 *next)
+{
+ struct meta_cluster_header *header = &md->cluster->header;
+ struct meta_cluster_item *item;
+ struct async_work *async;
+ u64 bytenr = 0;
+ u32 nritems = 0;
+ int ret;
+ int err = 0;
+
+ if (list_empty(&md->ordered))
+ goto out;
+
+ /* wait until all buffers are compressed */
+ while (!err && md->num_items > md->num_ready) {
+ struct timespec ts = {
+ .tv_sec = 0,
+ .tv_nsec = 10000000,
+ };
+ pthread_mutex_unlock(&md->mutex);
+ nanosleep(&ts, NULL);
+ pthread_mutex_lock(&md->mutex);
+ err = md->error;
+ }
+
+ if (err) {
+ fprintf(stderr, "One of the threads errored out %s\n",
+ strerror(err));
+ goto out;
+ }
+
+ /* setup and write index block */
+ list_for_each_entry(async, &md->ordered, ordered) {
+ item = md->cluster->items + nritems;
+ item->bytenr = cpu_to_le64(async->start);
+ item->size = cpu_to_le32(async->bufsize);
+ nritems++;
+ }
+ header->nritems = cpu_to_le32(nritems);
+
+ ret = fwrite(md->cluster, BLOCK_SIZE, 1, md->out);
+ if (ret != 1) {
+ fprintf(stderr, "Error writing out cluster: %d\n", errno);
+ return -EIO;
+ }
+
+ /* write buffers */
+ bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
+ while (!list_empty(&md->ordered)) {
+ async = list_entry(md->ordered.next, struct async_work,
+ ordered);
+ list_del_init(&async->ordered);
+
+ bytenr += async->bufsize;
+ if (!err)
+ ret = fwrite(async->buffer, async->bufsize, 1,
+ md->out);
+ if (ret != 1) {
+ err = -EIO;
+ ret = 0;
+ fprintf(stderr, "Error writing out cluster: %d\n",
+ errno);
+ }
+
+ free(async->buffer);
+ free(async);
+ }
+
+ /* zero unused space in the last block */
+ if (!err && bytenr & BLOCK_MASK) {
+ size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
+
+ bytenr += size;
+ ret = write_zero(md->out, size);
+ if (ret != 1) {
+ fprintf(stderr, "Error zeroing out buffer: %d\n",
+ errno);
+ err = -EIO;
+ }
+ }
+out:
+ *next = bytenr;
+ return err;
+}
+
+static int read_data_extent(struct metadump_struct *md,
+ struct async_work *async)
+{
+ struct btrfs_root *root = md->root;
+ u64 bytes_left = async->size;
+ u64 logical = async->start;
+ u64 offset = 0;
+ u64 read_len;
+ int num_copies;
+ int cur_mirror;
+ int ret;
+
+ num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, logical,
+ bytes_left);
+
+ /* Try our best to read data, just like read_tree_block() */
+ for (cur_mirror = 0; cur_mirror < num_copies; cur_mirror++) {
+ while (bytes_left) {
+ read_len = bytes_left;
+ ret = read_extent_data(root,
+ (char *)(async->buffer + offset),
+ logical, &read_len, cur_mirror);
+ if (ret < 0)
+ break;
+ offset += read_len;
+ logical += read_len;
+ bytes_left -= read_len;
+ }
+ }
+ if (bytes_left)
+ return -EIO;
+ return 0;
+}
+
+static int get_dev_fd(struct btrfs_root *root)
+{
+ struct btrfs_device *dev;
+
+ dev = list_first_entry(&root->fs_info->fs_devices->devices,
+ struct btrfs_device, dev_list);
+ return dev->fd;
+}
+
+static int flush_pending(struct metadump_struct *md, int done)
+{
+ struct async_work *async = NULL;
+ struct extent_buffer *eb;
+ u64 blocksize = md->root->nodesize;
+ u64 start;
+ u64 size;
+ size_t offset;
+ int ret = 0;
+
+ if (md->pending_size) {
+ async = calloc(1, sizeof(*async));
+ if (!async)
+ return -ENOMEM;
+
+ async->start = md->pending_start;
+ async->size = md->pending_size;
+ async->bufsize = async->size;
+ async->buffer = malloc(async->bufsize);
+ if (!async->buffer) {
+ free(async);
+ return -ENOMEM;
+ }
+ offset = 0;
+ start = async->start;
+ size = async->size;
+
+ if (md->data) {
+ ret = read_data_extent(md, async);
+ if (ret) {
+ free(async->buffer);
+ free(async);
+ return ret;
+ }
+ }
+
+ /*
+ * Balance can make the mapping not cover the super block, so
+ * just copy directly from one of the devices.
+ */
+ if (start == BTRFS_SUPER_INFO_OFFSET) {
+ int fd = get_dev_fd(md->root);
+
+ ret = pread64(fd, async->buffer, size, start);
+ if (ret < size) {
+ free(async->buffer);
+ free(async);
+ fprintf(stderr, "Error reading superblock\n");
+ return -EIO;
+ }
+ size = 0;
+ ret = 0;
+ }
+
+ while (!md->data && size > 0) {
+ u64 this_read = min(blocksize, size);
+ eb = read_tree_block(md->root, start, this_read, 0);
+ if (!extent_buffer_uptodate(eb)) {
+ free(async->buffer);
+ free(async);
+ fprintf(stderr,
+ "Error reading metadata block\n");
+ return -EIO;
+ }
+ copy_buffer(md, async->buffer + offset, eb);
+ free_extent_buffer(eb);
+ start += this_read;
+ offset += this_read;
+ size -= this_read;
+ }
+
+ md->pending_start = (u64)-1;
+ md->pending_size = 0;
+ } else if (!done) {
+ return 0;
+ }
+
+ pthread_mutex_lock(&md->mutex);
+ if (async) {
+ list_add_tail(&async->ordered, &md->ordered);
+ md->num_items++;
+ if (md->compress_level > 0) {
+ list_add_tail(&async->list, &md->list);
+ pthread_cond_signal(&md->cond);
+ } else {
+ md->num_ready++;
+ }
+ }
+ if (md->num_items >= ITEMS_PER_CLUSTER || done) {
+ ret = write_buffers(md, &start);
+ if (ret)
+ fprintf(stderr, "Error writing buffers %d\n",
+ errno);
+ else
+ meta_cluster_init(md, start);
+ }
+ pthread_mutex_unlock(&md->mutex);
+ return ret;
+}
+
+static int add_extent(u64 start, u64 size, struct metadump_struct *md,
+ int data)
+{
+ int ret;
+ if (md->data != data ||
+ md->pending_size + size > MAX_PENDING_SIZE ||
+ md->pending_start + md->pending_size != start) {
+ ret = flush_pending(md, 0);
+ if (ret)
+ return ret;
+ md->pending_start = start;
+ }
+ readahead_tree_block(md->root, start, size, 0);
+ md->pending_size += size;
+ md->data = data;
+ return 0;
+}
+
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+static int is_tree_block(struct btrfs_root *extent_root,
+ struct btrfs_path *path, u64 bytenr)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ u64 ref_objectid;
+ int ret;
+
+ leaf = path->nodes[0];
+ while (1) {
+ struct btrfs_extent_ref_v0 *ref_item;
+ path->slots[0]++;
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(extent_root, path);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ break;
+ leaf = path->nodes[0];
+ }
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid != bytenr)
+ break;
+ if (key.type != BTRFS_EXTENT_REF_V0_KEY)
+ continue;
+ ref_item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_ref_v0);
+ ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
+ if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
+ return 1;
+ break;
+ }
+ return 0;
+}
+#endif
+
+static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
+ struct metadump_struct *metadump, int root_tree)
+{
+ struct extent_buffer *tmp;
+ struct btrfs_root_item *ri;
+ struct btrfs_key key;
+ u64 bytenr;
+ int level;
+ int nritems = 0;
+ int i = 0;
+ int ret;
+
+ ret = add_extent(btrfs_header_bytenr(eb), root->leafsize, metadump, 0);
+ if (ret) {
+ fprintf(stderr, "Error adding metadata block\n");
+ return ret;
+ }
+
+ if (btrfs_header_level(eb) == 0 && !root_tree)
+ return 0;
+
+ level = btrfs_header_level(eb);
+ nritems = btrfs_header_nritems(eb);
+ for (i = 0; i < nritems; i++) {
+ if (level == 0) {
+ btrfs_item_key_to_cpu(eb, &key, i);
+ if (key.type != BTRFS_ROOT_ITEM_KEY)
+ continue;
+ ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
+ bytenr = btrfs_disk_root_bytenr(eb, ri);
+ tmp = read_tree_block(root, bytenr, root->leafsize, 0);
+ if (!extent_buffer_uptodate(tmp)) {
+ fprintf(stderr,
+ "Error reading log root block\n");
+ return -EIO;
+ }
+ ret = copy_tree_blocks(root, tmp, metadump, 0);
+ free_extent_buffer(tmp);
+ if (ret)
+ return ret;
+ } else {
+ bytenr = btrfs_node_blockptr(eb, i);
+ tmp = read_tree_block(root, bytenr, root->leafsize, 0);
+ if (!extent_buffer_uptodate(tmp)) {
+ fprintf(stderr, "Error reading log block\n");
+ return -EIO;
+ }
+ ret = copy_tree_blocks(root, tmp, metadump, root_tree);
+ free_extent_buffer(tmp);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int copy_log_trees(struct btrfs_root *root,
+ struct metadump_struct *metadump,
+ struct btrfs_path *path)
+{
+ u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
+
+ if (blocknr == 0)
+ return 0;
+
+ if (!root->fs_info->log_root_tree ||
+ !root->fs_info->log_root_tree->node) {
+ fprintf(stderr, "Error copying tree log, it wasn't setup\n");
+ return -EIO;
+ }
+
+ return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
+ metadump, 1);
+}
+
+static int copy_space_cache(struct btrfs_root *root,
+ struct metadump_struct *metadump,
+ struct btrfs_path *path)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_key key;
+ u64 bytenr, num_bytes;
+ int ret;
+
+ root = root->fs_info->tree_root;
+
+ key.objectid = 0;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Error searching for free space inode %d\n",
+ ret);
+ return ret;
+ }
+
+ leaf = path->nodes[0];
+
+ while (1) {
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0) {
+ fprintf(stderr, "Error going to next leaf "
+ "%d\n", ret);
+ return ret;
+ }
+ if (ret > 0)
+ break;
+ leaf = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.type != BTRFS_EXTENT_DATA_KEY) {
+ path->slots[0]++;
+ continue;
+ }
+
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(leaf, fi) !=
+ BTRFS_FILE_EXTENT_REG) {
+ path->slots[0]++;
+ continue;
+ }
+
+ bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
+ ret = add_extent(bytenr, num_bytes, metadump, 1);
+ if (ret) {
+ fprintf(stderr, "Error adding space cache blocks %d\n",
+ ret);
+ btrfs_release_path(path);
+ return ret;
+ }
+ path->slots[0]++;
+ }
+
+ return 0;
+}
+
+static int copy_from_extent_tree(struct metadump_struct *metadump,
+ struct btrfs_path *path)
+{
+ struct btrfs_root *extent_root;
+ struct extent_buffer *leaf;
+ struct btrfs_extent_item *ei;
+ struct btrfs_key key;
+ u64 bytenr;
+ u64 num_bytes;
+ int ret;
+
+ extent_root = metadump->root->fs_info->extent_root;
+ bytenr = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
+ key.objectid = bytenr;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Error searching extent root %d\n", ret);
+ return ret;
+ }
+ ret = 0;
+
+ leaf = path->nodes[0];
+
+ while (1) {
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(extent_root, path);
+ if (ret < 0) {
+ fprintf(stderr, "Error going to next leaf %d"
+ "\n", ret);
+ break;
+ }
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ leaf = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid < bytenr ||
+ (key.type != BTRFS_EXTENT_ITEM_KEY &&
+ key.type != BTRFS_METADATA_ITEM_KEY)) {
+ path->slots[0]++;
+ continue;
+ }
+
+ bytenr = key.objectid;
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
+ num_bytes = extent_root->leafsize;
+ else
+ num_bytes = key.offset;
+
+ if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
+ ei = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_item);
+ if (btrfs_extent_flags(leaf, ei) &
+ BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ ret = add_extent(bytenr, num_bytes, metadump,
+ 0);
+ if (ret) {
+ fprintf(stderr, "Error adding block "
+ "%d\n", ret);
+ break;
+ }
+ }
+ } else {
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ ret = is_tree_block(extent_root, path, bytenr);
+ if (ret < 0) {
+ fprintf(stderr, "Error checking tree block "
+ "%d\n", ret);
+ break;
+ }
+
+ if (ret) {
+ ret = add_extent(bytenr, num_bytes, metadump,
+ 0);
+ if (ret) {
+ fprintf(stderr, "Error adding block "
+ "%d\n", ret);
+ break;
+ }
+ }
+ ret = 0;
+#else
+ fprintf(stderr, "Either extent tree corruption or "
+ "you haven't built with V0 support\n");
+ ret = -EIO;
+ break;
+#endif
+ }
+ bytenr += num_bytes;
+ }
+
+ btrfs_release_path(path);
+
+ return ret;
+}
+
+static int create_metadump(const char *input, FILE *out, int num_threads,
+ int compress_level, int sanitize, int walk_trees)
+{
+ struct btrfs_root *root;
+ struct btrfs_path *path = NULL;
+ struct metadump_struct metadump;
+ int ret;
+ int err = 0;
+
+ root = open_ctree(input, 0, 0);
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ return -EIO;
+ }
+
+ BUG_ON(root->nodesize != root->leafsize);
+
+ ret = metadump_init(&metadump, root, out, num_threads,
+ compress_level, sanitize);
+ if (ret) {
+ fprintf(stderr, "Error initing metadump %d\n", ret);
+ close_ctree(root);
+ return ret;
+ }
+
+ ret = add_extent(BTRFS_SUPER_INFO_OFFSET, BTRFS_SUPER_INFO_SIZE,
+ &metadump, 0);
+ if (ret) {
+ fprintf(stderr, "Error adding metadata %d\n", ret);
+ err = ret;
+ goto out;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ fprintf(stderr, "Out of memory allocing path\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (walk_trees) {
+ ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
+ &metadump, 1);
+ if (ret) {
+ err = ret;
+ goto out;
+ }
+
+ ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
+ &metadump, 1);
+ if (ret) {
+ err = ret;
+ goto out;
+ }
+ } else {
+ ret = copy_from_extent_tree(&metadump, path);
+ if (ret) {
+ err = ret;
+ goto out;
+ }
+ }
+
+ ret = copy_log_trees(root, &metadump, path);
+ if (ret) {
+ err = ret;
+ goto out;
+ }
+
+ ret = copy_space_cache(root, &metadump, path);
+out:
+ ret = flush_pending(&metadump, 1);
+ if (ret) {
+ if (!err)
+ err = ret;
+ fprintf(stderr, "Error flushing pending %d\n", ret);
+ }
+
+ metadump_destroy(&metadump, num_threads);
+
+ btrfs_free_path(path);
+ ret = close_ctree(root);
+ return err ? err : ret;
+}
+
+static void update_super_old(u8 *buffer)
+{
+ struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
+ struct btrfs_chunk *chunk;
+ struct btrfs_disk_key *key;
+ u32 sectorsize = btrfs_super_sectorsize(super);
+ u64 flags = btrfs_super_flags(super);
+
+ flags |= BTRFS_SUPER_FLAG_METADUMP;
+ btrfs_set_super_flags(super, flags);
+
+ key = (struct btrfs_disk_key *)(super->sys_chunk_array);
+ chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
+ sizeof(struct btrfs_disk_key));
+
+ btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
+ btrfs_set_disk_key_offset(key, 0);
+
+ btrfs_set_stack_chunk_length(chunk, (u64)-1);
+ btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
+ btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
+ btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
+ btrfs_set_stack_chunk_io_align(chunk, sectorsize);
+ btrfs_set_stack_chunk_io_width(chunk, sectorsize);
+ btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
+ btrfs_set_stack_chunk_num_stripes(chunk, 1);
+ btrfs_set_stack_chunk_sub_stripes(chunk, 0);
+ chunk->stripe.devid = super->dev_item.devid;
+ btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
+ memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
+ btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
+ csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
+}
+
+static int update_super(struct mdrestore_struct *mdres, u8 *buffer)
+{
+ struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
+ struct btrfs_chunk *chunk;
+ struct btrfs_disk_key *disk_key;
+ struct btrfs_key key;
+ u64 flags = btrfs_super_flags(super);
+ u32 new_array_size = 0;
+ u32 array_size;
+ u32 cur = 0;
+ u8 *ptr, *write_ptr;
+ int old_num_stripes;
+
+ write_ptr = ptr = super->sys_chunk_array;
+ array_size = btrfs_super_sys_array_size(super);
+
+ while (cur < array_size) {
+ disk_key = (struct btrfs_disk_key *)ptr;
+ btrfs_disk_key_to_cpu(&key, disk_key);
+
+ new_array_size += sizeof(*disk_key);
+ memmove(write_ptr, ptr, sizeof(*disk_key));
+
+ write_ptr += sizeof(*disk_key);
+ ptr += sizeof(*disk_key);
+ cur += sizeof(*disk_key);
+
+ if (key.type == BTRFS_CHUNK_ITEM_KEY) {
+ u64 physical, size = 0;
+
+ chunk = (struct btrfs_chunk *)ptr;
+ old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
+ chunk = (struct btrfs_chunk *)write_ptr;
+
+ memmove(write_ptr, ptr, sizeof(*chunk));
+ btrfs_set_stack_chunk_num_stripes(chunk, 1);
+ btrfs_set_stack_chunk_sub_stripes(chunk, 0);
+ btrfs_set_stack_chunk_type(chunk,
+ BTRFS_BLOCK_GROUP_SYSTEM);
+ chunk->stripe.devid = super->dev_item.devid;
+ physical = logical_to_physical(mdres, key.offset,
+ &size);
+ if (size != (u64)-1)
+ btrfs_set_stack_stripe_offset(&chunk->stripe,
+ physical);
+ memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
+ BTRFS_UUID_SIZE);
+ new_array_size += sizeof(*chunk);
+ } else {
+ fprintf(stderr, "Bogus key in the sys chunk array "
+ "%d\n", key.type);
+ return -EIO;
+ }
+ write_ptr += sizeof(*chunk);
+ ptr += btrfs_chunk_item_size(old_num_stripes);
+ cur += btrfs_chunk_item_size(old_num_stripes);
+ }
+
+ if (mdres->clear_space_cache)
+ btrfs_set_super_cache_generation(super, 0);
+
+ flags |= BTRFS_SUPER_FLAG_METADUMP_V2;
+ btrfs_set_super_flags(super, flags);
+ btrfs_set_super_sys_array_size(super, new_array_size);
+ csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
+
+ return 0;
+}
+
+static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
+{
+ struct extent_buffer *eb;
+
+ eb = calloc(1, sizeof(struct extent_buffer) + size);
+ if (!eb)
+ return NULL;
+
+ eb->start = bytenr;
+ eb->len = size;
+ return eb;
+}
+
+static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
+{
+ struct btrfs_item *item;
+ u32 nritems;
+ u32 old_size;
+ u32 old_data_start;
+ u32 size_diff;
+ u32 data_end;
+ int i;
+
+ old_size = btrfs_item_size_nr(eb, slot);
+ if (old_size == new_size)
+ return;
+
+ nritems = btrfs_header_nritems(eb);
+ data_end = btrfs_item_offset_nr(eb, nritems - 1);
+
+ old_data_start = btrfs_item_offset_nr(eb, slot);
+ size_diff = old_size - new_size;
+
+ for (i = slot; i < nritems; i++) {
+ u32 ioff;
+ item = btrfs_item_nr(i);
+ ioff = btrfs_item_offset(eb, item);
+ btrfs_set_item_offset(eb, item, ioff + size_diff);
+ }
+
+ memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
+ btrfs_leaf_data(eb) + data_end,
+ old_data_start + new_size - data_end);
+ item = btrfs_item_nr(slot);
+ btrfs_set_item_size(eb, item, new_size);
+}
+
+static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
+ struct async_work *async, u8 *buffer,
+ size_t size)
+{
+ struct extent_buffer *eb;
+ size_t size_left = size;
+ u64 bytenr = async->start;
+ int i;
+
+ if (size_left % mdres->leafsize)
+ return 0;
+
+ eb = alloc_dummy_eb(bytenr, mdres->leafsize);
+ if (!eb)
+ return -ENOMEM;
+
+ while (size_left) {
+ eb->start = bytenr;
+ memcpy(eb->data, buffer, mdres->leafsize);
+
+ if (btrfs_header_bytenr(eb) != bytenr)
+ break;
+ if (memcmp(mdres->fsid,
+ eb->data + offsetof(struct btrfs_header, fsid),
+ BTRFS_FSID_SIZE))
+ break;
+
+ if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
+ goto next;
+
+ if (btrfs_header_level(eb) != 0)
+ goto next;
+
+ for (i = 0; i < btrfs_header_nritems(eb); i++) {
+ struct btrfs_chunk chunk;
+ struct btrfs_key key;
+ u64 type, physical, size = (u64)-1;
+
+ btrfs_item_key_to_cpu(eb, &key, i);
+ if (key.type != BTRFS_CHUNK_ITEM_KEY)
+ continue;
+ truncate_item(eb, i, sizeof(chunk));
+ read_extent_buffer(eb, &chunk,
+ btrfs_item_ptr_offset(eb, i),
+ sizeof(chunk));
+
+ size = 0;
+ physical = logical_to_physical(mdres, key.offset,
+ &size);
+
+ /* Zero out the RAID profile */
+ type = btrfs_stack_chunk_type(&chunk);
+ type &= (BTRFS_BLOCK_GROUP_DATA |
+ BTRFS_BLOCK_GROUP_SYSTEM |
+ BTRFS_BLOCK_GROUP_METADATA |
+ BTRFS_BLOCK_GROUP_DUP);
+ btrfs_set_stack_chunk_type(&chunk, type);
+
+ btrfs_set_stack_chunk_num_stripes(&chunk, 1);
+ btrfs_set_stack_chunk_sub_stripes(&chunk, 0);
+ btrfs_set_stack_stripe_devid(&chunk.stripe, mdres->devid);
+ if (size != (u64)-1)
+ btrfs_set_stack_stripe_offset(&chunk.stripe,
+ physical);
+ memcpy(chunk.stripe.dev_uuid, mdres->uuid,
+ BTRFS_UUID_SIZE);
+ write_extent_buffer(eb, &chunk,
+ btrfs_item_ptr_offset(eb, i),
+ sizeof(chunk));
+ }
+ memcpy(buffer, eb->data, eb->len);
+ csum_block(buffer, eb->len);
+next:
+ size_left -= mdres->leafsize;
+ buffer += mdres->leafsize;
+ bytenr += mdres->leafsize;
+ }
+
+ free(eb);
+ return 0;
+}
+
+static void write_backup_supers(int fd, u8 *buf)
+{
+ struct btrfs_super_block *super = (struct btrfs_super_block *)buf;
+ struct stat st;
+ u64 size;
+ u64 bytenr;
+ int i;
+ int ret;
+
+ if (fstat(fd, &st)) {
+ fprintf(stderr, "Couldn't stat restore point, won't be able "
+ "to write backup supers: %d\n", errno);
+ return;
+ }
+
+ size = btrfs_device_size(fd, &st);
+
+ for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ if (bytenr + BTRFS_SUPER_INFO_SIZE > size)
+ break;
+ btrfs_set_super_bytenr(super, bytenr);
+ csum_block(buf, BTRFS_SUPER_INFO_SIZE);
+ ret = pwrite64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr);
+ if (ret < BTRFS_SUPER_INFO_SIZE) {
+ if (ret < 0)
+ fprintf(stderr, "Problem writing out backup "
+ "super block %d, err %d\n", i, errno);
+ else
+ fprintf(stderr, "Short write writing out "
+ "backup super block\n");
+ break;
+ }
+ }
+}
+
+static void *restore_worker(void *data)
+{
+ struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
+ struct async_work *async;
+ size_t size;
+ u8 *buffer;
+ u8 *outbuf;
+ int outfd;
+ int ret;
+ int compress_size = MAX_PENDING_SIZE * 4;
+
+ outfd = fileno(mdres->out);
+ buffer = malloc(compress_size);
+ if (!buffer) {
+ fprintf(stderr, "Error allocing buffer\n");
+ pthread_mutex_lock(&mdres->mutex);
+ if (!mdres->error)
+ mdres->error = -ENOMEM;
+ pthread_mutex_unlock(&mdres->mutex);
+ pthread_exit(NULL);
+ }
+
+ while (1) {
+ u64 bytenr;
+ off_t offset = 0;
+ int err = 0;
+
+ pthread_mutex_lock(&mdres->mutex);
+ while (!mdres->leafsize || list_empty(&mdres->list)) {
+ if (mdres->done) {
+ pthread_mutex_unlock(&mdres->mutex);
+ goto out;
+ }
+ pthread_cond_wait(&mdres->cond, &mdres->mutex);
+ }
+ async = list_entry(mdres->list.next, struct async_work, list);
+ list_del_init(&async->list);
+ pthread_mutex_unlock(&mdres->mutex);
+
+ if (mdres->compress_method == COMPRESS_ZLIB) {
+ size = compress_size;
+ ret = uncompress(buffer, (unsigned long *)&size,
+ async->buffer, async->bufsize);
+ if (ret != Z_OK) {
+ fprintf(stderr, "Error decompressing %d\n",
+ ret);
+ err = -EIO;
+ }
+ outbuf = buffer;
+ } else {
+ outbuf = async->buffer;
+ size = async->bufsize;
+ }
+
+ if (!mdres->multi_devices) {
+ if (async->start == BTRFS_SUPER_INFO_OFFSET) {
+ if (mdres->old_restore) {
+ update_super_old(outbuf);
+ } else {
+ ret = update_super(mdres, outbuf);
+ if (ret)
+ err = ret;
+ }
+ } else if (!mdres->old_restore) {
+ ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
+ if (ret)
+ err = ret;
+ }
+ }
+
+ if (!mdres->fixup_offset) {
+ while (size) {
+ u64 chunk_size = size;
+ if (!mdres->multi_devices && !mdres->old_restore)
+ bytenr = logical_to_physical(mdres,
+ async->start + offset,
+ &chunk_size);
+ else
+ bytenr = async->start + offset;
+
+ ret = pwrite64(outfd, outbuf+offset, chunk_size,
+ bytenr);
+ if (ret != chunk_size) {
+ if (ret < 0) {
+ fprintf(stderr, "Error writing to "
+ "device %d\n", errno);
+ err = errno;
+ break;
+ } else {
+ fprintf(stderr, "Short write\n");
+ err = -EIO;
+ break;
+ }
+ }
+ size -= chunk_size;
+ offset += chunk_size;
+ }
+ } else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
+ ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
+ if (ret) {
+ printk("Error write data\n");
+ exit(1);
+ }
+ }
+
+
+ /* backup super blocks are already there at fixup_offset stage */
+ if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
+ write_backup_supers(outfd, outbuf);
+
+ pthread_mutex_lock(&mdres->mutex);
+ if (err && !mdres->error)
+ mdres->error = err;
+ mdres->num_items--;
+ pthread_mutex_unlock(&mdres->mutex);
+
+ free(async->buffer);
+ free(async);
+ }
+out:
+ free(buffer);
+ pthread_exit(NULL);
+}
+
+static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads)
+{
+ struct rb_node *n;
+ int i;
+
+ while ((n = rb_first(&mdres->chunk_tree))) {
+ struct fs_chunk *entry;
+
+ entry = rb_entry(n, struct fs_chunk, l);
+ rb_erase(n, &mdres->chunk_tree);
+ rb_erase(&entry->p, &mdres->physical_tree);
+ free(entry);
+ }
+ pthread_mutex_lock(&mdres->mutex);
+ mdres->done = 1;
+ pthread_cond_broadcast(&mdres->cond);
+ pthread_mutex_unlock(&mdres->mutex);
+
+ for (i = 0; i < num_threads; i++)
+ pthread_join(mdres->threads[i], NULL);
+
+ pthread_cond_destroy(&mdres->cond);
+ pthread_mutex_destroy(&mdres->mutex);
+ free(mdres->threads);
+}
+
+static int mdrestore_init(struct mdrestore_struct *mdres,
+ FILE *in, FILE *out, int old_restore,
+ int num_threads, int fixup_offset,
+ struct btrfs_fs_info *info, int multi_devices)
+{
+ int i, ret = 0;
+
+ memset(mdres, 0, sizeof(*mdres));
+ pthread_cond_init(&mdres->cond, NULL);
+ pthread_mutex_init(&mdres->mutex, NULL);
+ INIT_LIST_HEAD(&mdres->list);
+ INIT_LIST_HEAD(&mdres->overlapping_chunks);
+ mdres->in = in;
+ mdres->out = out;
+ mdres->old_restore = old_restore;
+ mdres->chunk_tree.rb_node = NULL;
+ mdres->fixup_offset = fixup_offset;
+ mdres->info = info;
+ mdres->multi_devices = multi_devices;
+ mdres->clear_space_cache = 0;
+ mdres->last_physical_offset = 0;
+ mdres->alloced_chunks = 0;
+
+ if (!num_threads)
+ return 0;
+
+ mdres->num_threads = num_threads;
+ mdres->threads = calloc(num_threads, sizeof(pthread_t));
+ if (!mdres->threads)
+ return -ENOMEM;
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_create(mdres->threads + i, NULL, restore_worker,
+ mdres);
+ if (ret)
+ break;
+ }
+ if (ret)
+ mdrestore_destroy(mdres, i + 1);
+ return ret;
+}
+
+static int fill_mdres_info(struct mdrestore_struct *mdres,
+ struct async_work *async)
+{
+ struct btrfs_super_block *super;
+ u8 *buffer = NULL;
+ u8 *outbuf;
+ int ret;
+
+ /* We've already been initialized */
+ if (mdres->leafsize)
+ return 0;
+
+ if (mdres->compress_method == COMPRESS_ZLIB) {
+ size_t size = MAX_PENDING_SIZE * 2;
+
+ buffer = malloc(MAX_PENDING_SIZE * 2);
+ if (!buffer)
+ return -ENOMEM;
+ ret = uncompress(buffer, (unsigned long *)&size,
+ async->buffer, async->bufsize);
+ if (ret != Z_OK) {
+ fprintf(stderr, "Error decompressing %d\n", ret);
+ free(buffer);
+ return -EIO;
+ }
+ outbuf = buffer;
+ } else {
+ outbuf = async->buffer;
+ }
+
+ super = (struct btrfs_super_block *)outbuf;
+ mdres->leafsize = btrfs_super_leafsize(super);
+ memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
+ memcpy(mdres->uuid, super->dev_item.uuid,
+ BTRFS_UUID_SIZE);
+ mdres->devid = le64_to_cpu(super->dev_item.devid);
+ free(buffer);
+ return 0;
+}
+
+static int add_cluster(struct meta_cluster *cluster,
+ struct mdrestore_struct *mdres, u64 *next)
+{
+ struct meta_cluster_item *item;
+ struct meta_cluster_header *header = &cluster->header;
+ struct async_work *async;
+ u64 bytenr;
+ u32 i, nritems;
+ int ret;
+
+ mdres->compress_method = header->compress;
+
+ bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
+ nritems = le32_to_cpu(header->nritems);
+ for (i = 0; i < nritems; i++) {
+ item = &cluster->items[i];
+ async = calloc(1, sizeof(*async));
+ if (!async) {
+ fprintf(stderr, "Error allocating async\n");
+ return -ENOMEM;
+ }
+ async->start = le64_to_cpu(item->bytenr);
+ async->bufsize = le32_to_cpu(item->size);
+ async->buffer = malloc(async->bufsize);
+ if (!async->buffer) {
+ fprintf(stderr, "Error allocing async buffer\n");
+ free(async);
+ return -ENOMEM;
+ }
+ ret = fread(async->buffer, async->bufsize, 1, mdres->in);
+ if (ret != 1) {
+ fprintf(stderr, "Error reading buffer %d\n", errno);
+ free(async->buffer);
+ free(async);
+ return -EIO;
+ }
+ bytenr += async->bufsize;
+
+ pthread_mutex_lock(&mdres->mutex);
+ if (async->start == BTRFS_SUPER_INFO_OFFSET) {
+ ret = fill_mdres_info(mdres, async);
+ if (ret) {
+ fprintf(stderr, "Error setting up restore\n");
+ pthread_mutex_unlock(&mdres->mutex);
+ free(async->buffer);
+ free(async);
+ return ret;
+ }
+ }
+ list_add_tail(&async->list, &mdres->list);
+ mdres->num_items++;
+ pthread_cond_signal(&mdres->cond);
+ pthread_mutex_unlock(&mdres->mutex);
+ }
+ if (bytenr & BLOCK_MASK) {
+ char buffer[BLOCK_MASK];
+ size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
+
+ bytenr += size;
+ ret = fread(buffer, size, 1, mdres->in);
+ if (ret != 1) {
+ fprintf(stderr, "Error reading in buffer %d\n", errno);
+ return -EIO;
+ }
+ }
+ *next = bytenr;
+ return 0;
+}
+
+static int wait_for_worker(struct mdrestore_struct *mdres)
+{
+ int ret = 0;
+
+ pthread_mutex_lock(&mdres->mutex);
+ ret = mdres->error;
+ while (!ret && mdres->num_items > 0) {
+ struct timespec ts = {
+ .tv_sec = 0,
+ .tv_nsec = 10000000,
+ };
+ pthread_mutex_unlock(&mdres->mutex);
+ nanosleep(&ts, NULL);
+ pthread_mutex_lock(&mdres->mutex);
+ ret = mdres->error;
+ }
+ pthread_mutex_unlock(&mdres->mutex);
+ return ret;
+}
+
+static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer,
+ u64 bytenr, u64 item_bytenr, u32 bufsize,
+ u64 cluster_bytenr)
+{
+ struct extent_buffer *eb;
+ int ret = 0;
+ int i;
+
+ eb = alloc_dummy_eb(bytenr, mdres->leafsize);
+ if (!eb) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ while (item_bytenr != bytenr) {
+ buffer += mdres->leafsize;
+ item_bytenr += mdres->leafsize;
+ }
+
+ memcpy(eb->data, buffer, mdres->leafsize);
+ if (btrfs_header_bytenr(eb) != bytenr) {
+ fprintf(stderr, "Eb bytenr doesn't match found bytenr\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid),
+ BTRFS_FSID_SIZE)) {
+ fprintf(stderr, "Fsid doesn't match\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) {
+ fprintf(stderr, "Does not belong to the chunk tree\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ for (i = 0; i < btrfs_header_nritems(eb); i++) {
+ struct btrfs_chunk chunk;
+ struct fs_chunk *fs_chunk;
+ struct btrfs_key key;
+
+ if (btrfs_header_level(eb)) {
+ u64 blockptr = btrfs_node_blockptr(eb, i);
+
+ ret = search_for_chunk_blocks(mdres, blockptr,
+ cluster_bytenr);
+ if (ret)
+ break;
+ continue;
+ }
+
+ /* Yay a leaf! We loves leafs! */
+ btrfs_item_key_to_cpu(eb, &key, i);
+ if (key.type != BTRFS_CHUNK_ITEM_KEY)
+ continue;
+
+ fs_chunk = malloc(sizeof(struct fs_chunk));
+ if (!fs_chunk) {
+ fprintf(stderr, "Erorr allocating chunk\n");
+ ret = -ENOMEM;
+ break;
+ }
+ memset(fs_chunk, 0, sizeof(*fs_chunk));
+ read_extent_buffer(eb, &chunk, btrfs_item_ptr_offset(eb, i),
+ sizeof(chunk));
+
+ fs_chunk->logical = key.offset;
+ fs_chunk->physical = btrfs_stack_stripe_offset(&chunk.stripe);
+ fs_chunk->bytes = btrfs_stack_chunk_length(&chunk);
+ INIT_LIST_HEAD(&fs_chunk->list);
+ if (tree_search(&mdres->physical_tree, &fs_chunk->p,
+ physical_cmp, 1) != NULL)
+ list_add(&fs_chunk->list, &mdres->overlapping_chunks);
+ else
+ tree_insert(&mdres->physical_tree, &fs_chunk->p,
+ physical_cmp);
+ if (fs_chunk->physical + fs_chunk->bytes >
+ mdres->last_physical_offset)
+ mdres->last_physical_offset = fs_chunk->physical +
+ fs_chunk->bytes;
+ mdres->alloced_chunks += fs_chunk->bytes;
+ tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp);
+ }
+out:
+ free(eb);
+ return ret;
+}
+
+/* If you have to ask you aren't worthy */
+static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
+ u64 search, u64 cluster_bytenr)
+{
+ struct meta_cluster *cluster;
+ struct meta_cluster_header *header;
+ struct meta_cluster_item *item;
+ u64 current_cluster = cluster_bytenr, bytenr;
+ u64 item_bytenr;
+ u32 bufsize, nritems, i;
+ u32 max_size = MAX_PENDING_SIZE * 2;
+ u8 *buffer, *tmp = NULL;
+ int ret = 0;
+
+ cluster = malloc(BLOCK_SIZE);
+ if (!cluster) {
+ fprintf(stderr, "Error allocating cluster\n");
+ return -ENOMEM;
+ }
+
+ buffer = malloc(max_size);
+ if (!buffer) {
+ fprintf(stderr, "Error allocing buffer\n");
+ free(cluster);
+ return -ENOMEM;
+ }
+
+ if (mdres->compress_method == COMPRESS_ZLIB) {
+ tmp = malloc(max_size);
+ if (!tmp) {
+ fprintf(stderr, "Error allocing tmp buffer\n");
+ free(cluster);
+ free(buffer);
+ return -ENOMEM;
+ }
+ }
+
+ bytenr = current_cluster;
+ while (1) {
+ if (fseek(mdres->in, current_cluster, SEEK_SET)) {
+ fprintf(stderr, "Error seeking: %d\n", errno);
+ ret = -EIO;
+ break;
+ }
+
+ ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
+ if (ret == 0) {
+ if (cluster_bytenr != 0) {
+ cluster_bytenr = 0;
+ current_cluster = 0;
+ bytenr = 0;
+ continue;
+ }
+ printf("ok this is where we screwed up?\n");
+ ret = -EIO;
+ break;
+ } else if (ret < 0) {
+ fprintf(stderr, "Error reading image\n");
+ break;
+ }
+ ret = 0;
+
+ header = &cluster->header;
+ if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
+ le64_to_cpu(header->bytenr) != current_cluster) {
+ fprintf(stderr, "bad header in metadump image\n");
+ ret = -EIO;
+ break;
+ }
+
+ bytenr += BLOCK_SIZE;
+ nritems = le32_to_cpu(header->nritems);
+ for (i = 0; i < nritems; i++) {
+ size_t size;
+
+ item = &cluster->items[i];
+ bufsize = le32_to_cpu(item->size);
+ item_bytenr = le64_to_cpu(item->bytenr);
+
+ if (bufsize > max_size) {
+ fprintf(stderr, "item %u size %u too big\n",
+ i, bufsize);
+ ret = -EIO;
+ break;
+ }
+
+ if (mdres->compress_method == COMPRESS_ZLIB) {
+ ret = fread(tmp, bufsize, 1, mdres->in);
+ if (ret != 1) {
+ fprintf(stderr, "Error reading: %d\n",
+ errno);
+ ret = -EIO;
+ break;
+ }
+
+ size = max_size;
+ ret = uncompress(buffer,
+ (unsigned long *)&size, tmp,
+ bufsize);
+ if (ret != Z_OK) {
+ fprintf(stderr, "Error decompressing "
+ "%d\n", ret);
+ ret = -EIO;
+ break;
+ }
+ } else {
+ ret = fread(buffer, bufsize, 1, mdres->in);
+ if (ret != 1) {
+ fprintf(stderr, "Error reading: %d\n",
+ errno);
+ ret = -EIO;
+ break;
+ }
+ size = bufsize;
+ }
+ ret = 0;
+
+ if (item_bytenr <= search &&
+ item_bytenr + size > search) {
+ ret = read_chunk_block(mdres, buffer, search,
+ item_bytenr, size,
+ current_cluster);
+ if (!ret)
+ ret = 1;
+ break;
+ }
+ bytenr += bufsize;
+ }
+ if (ret) {
+ if (ret > 0)
+ ret = 0;
+ break;
+ }
+ if (bytenr & BLOCK_MASK)
+ bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK);
+ current_cluster = bytenr;
+ }
+
+ free(tmp);
+ free(buffer);
+ free(cluster);
+ return ret;
+}
+
+static int build_chunk_tree(struct mdrestore_struct *mdres,
+ struct meta_cluster *cluster)
+{
+ struct btrfs_super_block *super;
+ struct meta_cluster_header *header;
+ struct meta_cluster_item *item = NULL;
+ u64 chunk_root_bytenr = 0;
+ u32 i, nritems;
+ u64 bytenr = 0;
+ u8 *buffer;
+ int ret;
+
+ /* We can't seek with stdin so don't bother doing this */
+ if (mdres->in == stdin)
+ return 0;
+
+ ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
+ if (ret <= 0) {
+ fprintf(stderr, "Error reading in cluster: %d\n", errno);
+ return -EIO;
+ }
+ ret = 0;
+
+ header = &cluster->header;
+ if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
+ le64_to_cpu(header->bytenr) != 0) {
+ fprintf(stderr, "bad header in metadump image\n");
+ return -EIO;
+ }
+
+ bytenr += BLOCK_SIZE;
+ mdres->compress_method = header->compress;
+ nritems = le32_to_cpu(header->nritems);
+ for (i = 0; i < nritems; i++) {
+ item = &cluster->items[i];
+
+ if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET)
+ break;
+ bytenr += le32_to_cpu(item->size);
+ if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) {
+ fprintf(stderr, "Error seeking: %d\n", errno);
+ return -EIO;
+ }
+ }
+
+ if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) {
+ fprintf(stderr, "Huh, didn't find the super?\n");
+ return -EINVAL;
+ }
+
+ buffer = malloc(le32_to_cpu(item->size));
+ if (!buffer) {
+ fprintf(stderr, "Error allocing buffer\n");
+ return -ENOMEM;
+ }
+
+ ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in);
+ if (ret != 1) {
+ fprintf(stderr, "Error reading buffer: %d\n", errno);
+ free(buffer);
+ return -EIO;
+ }
+
+ if (mdres->compress_method == COMPRESS_ZLIB) {
+ size_t size = MAX_PENDING_SIZE * 2;
+ u8 *tmp;
+
+ tmp = malloc(MAX_PENDING_SIZE * 2);
+ if (!tmp) {
+ free(buffer);
+ return -ENOMEM;
+ }
+ ret = uncompress(tmp, (unsigned long *)&size,
+ buffer, le32_to_cpu(item->size));
+ if (ret != Z_OK) {
+ fprintf(stderr, "Error decompressing %d\n", ret);
+ free(buffer);
+ free(tmp);
+ return -EIO;
+ }
+ free(buffer);
+ buffer = tmp;
+ }
+
+ pthread_mutex_lock(&mdres->mutex);
+ super = (struct btrfs_super_block *)buffer;
+ chunk_root_bytenr = btrfs_super_chunk_root(super);
+ mdres->leafsize = btrfs_super_leafsize(super);
+ memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
+ memcpy(mdres->uuid, super->dev_item.uuid,
+ BTRFS_UUID_SIZE);
+ mdres->devid = le64_to_cpu(super->dev_item.devid);
+ free(buffer);
+ pthread_mutex_unlock(&mdres->mutex);
+
+ return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
+}
+
+static int range_contains_super(u64 physical, u64 bytes)
+{
+ u64 super_bytenr;
+ int i;
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ super_bytenr = btrfs_sb_offset(i);
+ if (super_bytenr >= physical &&
+ super_bytenr < physical + bytes)
+ return 1;
+ }
+
+ return 0;
+}
+
+static void remap_overlapping_chunks(struct mdrestore_struct *mdres)
+{
+ struct fs_chunk *fs_chunk;
+
+ while (!list_empty(&mdres->overlapping_chunks)) {
+ fs_chunk = list_first_entry(&mdres->overlapping_chunks,
+ struct fs_chunk, list);
+ list_del_init(&fs_chunk->list);
+ if (range_contains_super(fs_chunk->physical,
+ fs_chunk->bytes)) {
+ fprintf(stderr, "Remapping a chunk that had a super "
+ "mirror inside of it, clearing space cache "
+ "so we don't end up with corruption\n");
+ mdres->clear_space_cache = 1;
+ }
+ fs_chunk->physical = mdres->last_physical_offset;
+ tree_insert(&mdres->physical_tree, &fs_chunk->p, physical_cmp);
+ mdres->last_physical_offset += fs_chunk->bytes;
+ }
+}
+
+static int fixup_devices(struct btrfs_fs_info *fs_info,
+ struct mdrestore_struct *mdres, off_t dev_size)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_dev_item *dev_item;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_root *root = fs_info->chunk_root;
+ struct btrfs_key key;
+ u64 devid, cur_devid;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ fprintf(stderr, "Error alloc'ing path\n");
+ return -ENOMEM;
+ }
+
+ trans = btrfs_start_transaction(fs_info->tree_root, 1);
+ if (IS_ERR(trans)) {
+ fprintf(stderr, "Error starting transaction %ld\n",
+ PTR_ERR(trans));
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
+ dev_item = &fs_info->super_copy->dev_item;
+
+ devid = btrfs_stack_device_id(dev_item);
+
+ btrfs_set_stack_device_total_bytes(dev_item, dev_size);
+ btrfs_set_stack_device_bytes_used(dev_item, mdres->alloced_chunks);
+
+ key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ key.type = BTRFS_DEV_ITEM_KEY;
+ key.offset = 0;
+
+again:
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret < 0) {
+ fprintf(stderr, "search failed %d\n", ret);
+ exit(1);
+ }
+
+ while (1) {
+ leaf = path->nodes[0];
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0) {
+ fprintf(stderr, "Error going to next leaf "
+ "%d\n", ret);
+ exit(1);
+ }
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ leaf = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.type > BTRFS_DEV_ITEM_KEY)
+ break;
+ if (key.type != BTRFS_DEV_ITEM_KEY) {
+ path->slots[0]++;
+ continue;
+ }
+
+ dev_item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_dev_item);
+ cur_devid = btrfs_device_id(leaf, dev_item);
+ if (devid != cur_devid) {
+ ret = btrfs_del_item(trans, root, path);
+ if (ret) {
+ fprintf(stderr, "Error deleting item %d\n",
+ ret);
+ exit(1);
+ }
+ btrfs_release_path(path);
+ goto again;
+ }
+
+ btrfs_set_device_total_bytes(leaf, dev_item, dev_size);
+ btrfs_set_device_bytes_used(leaf, dev_item,
+ mdres->alloced_chunks);
+ btrfs_mark_buffer_dirty(leaf);
+ path->slots[0]++;
+ }
+
+ btrfs_free_path(path);
+ ret = btrfs_commit_transaction(trans, fs_info->tree_root);
+ if (ret) {
+ fprintf(stderr, "Commit failed %d\n", ret);
+ return ret;
+ }
+ return 0;
+}
+
+static int restore_metadump(const char *input, FILE *out, int old_restore,
+ int num_threads, int fixup_offset,
+ const char *target, int multi_devices)
+{
+ struct meta_cluster *cluster = NULL;
+ struct meta_cluster_header *header;
+ struct mdrestore_struct mdrestore;
+ struct btrfs_fs_info *info = NULL;
+ u64 bytenr = 0;
+ FILE *in = NULL;
+ int ret = 0;
+
+ if (!strcmp(input, "-")) {
+ in = stdin;
+ } else {
+ in = fopen(input, "r");
+ if (!in) {
+ perror("unable to open metadump image");
+ return 1;
+ }
+ }
+
+ /* NOTE: open with write mode */
+ if (fixup_offset) {
+ BUG_ON(!target);
+ info = open_ctree_fs_info(target, 0, 0,
+ OPEN_CTREE_WRITES |
+ OPEN_CTREE_RESTORE |
+ OPEN_CTREE_PARTIAL);
+ if (!info) {
+ fprintf(stderr, "%s: open ctree failed\n", __func__);
+ ret = -EIO;
+ goto failed_open;
+ }
+ }
+
+ cluster = malloc(BLOCK_SIZE);
+ if (!cluster) {
+ fprintf(stderr, "Error allocating cluster\n");
+ ret = -ENOMEM;
+ goto failed_info;
+ }
+
+ ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
+ fixup_offset, info, multi_devices);
+ if (ret) {
+ fprintf(stderr, "Error initing mdrestore %d\n", ret);
+ goto failed_cluster;
+ }
+
+ if (!multi_devices && !old_restore) {
+ ret = build_chunk_tree(&mdrestore, cluster);
+ if (ret)
+ goto out;
+ if (!list_empty(&mdrestore.overlapping_chunks))
+ remap_overlapping_chunks(&mdrestore);
+ }
+
+ if (in != stdin && fseek(in, 0, SEEK_SET)) {
+ fprintf(stderr, "Error seeking %d\n", errno);
+ goto out;
+ }
+
+ while (!mdrestore.error) {
+ ret = fread(cluster, BLOCK_SIZE, 1, in);
+ if (!ret)
+ break;
+
+ header = &cluster->header;
+ if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
+ le64_to_cpu(header->bytenr) != bytenr) {
+ fprintf(stderr, "bad header in metadump image\n");
+ ret = -EIO;
+ break;
+ }
+ ret = add_cluster(cluster, &mdrestore, &bytenr);
+ if (ret) {
+ fprintf(stderr, "Error adding cluster\n");
+ break;
+ }
+ }
+ ret = wait_for_worker(&mdrestore);
+
+ if (!ret && !multi_devices && !old_restore) {
+ struct btrfs_root *root;
+ struct stat st;
+
+ root = open_ctree_fd(fileno(out), target, 0,
+ OPEN_CTREE_PARTIAL |
+ OPEN_CTREE_WRITES |
+ OPEN_CTREE_NO_DEVICES);
+ if (!root) {
+ fprintf(stderr, "unable to open %s\n", target);
+ ret = -EIO;
+ goto out;
+ }
+ info = root->fs_info;
+
+ if (stat(target, &st)) {
+ fprintf(stderr, "statting %s failed\n", target);
+ close_ctree(info->chunk_root);
+ return 1;
+ }
+
+ ret = fixup_devices(info, &mdrestore, st.st_size);
+ close_ctree(info->chunk_root);
+ if (ret)
+ goto out;
+ }
+out:
+ mdrestore_destroy(&mdrestore, num_threads);
+failed_cluster:
+ free(cluster);
+failed_info:
+ if (fixup_offset && info)
+ close_ctree(info->chunk_root);
+failed_open:
+ if (in != stdin)
+ fclose(in);
+ return ret;
+}
+
+static int update_disk_super_on_device(struct btrfs_fs_info *info,
+ const char *other_dev, u64 cur_devid)
+{
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct btrfs_path path;
+ struct btrfs_dev_item *dev_item;
+ struct btrfs_super_block *disk_super;
+ char dev_uuid[BTRFS_UUID_SIZE];
+ char fs_uuid[BTRFS_UUID_SIZE];
+ u64 devid, type, io_align, io_width;
+ u64 sector_size, total_bytes, bytes_used;
+ char buf[BTRFS_SUPER_INFO_SIZE];
+ int fp = -1;
+ int ret;
+
+ key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ key.type = BTRFS_DEV_ITEM_KEY;
+ key.offset = cur_devid;
+
+ btrfs_init_path(&path);
+ ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0);
+ if (ret) {
+ fprintf(stderr, "ERROR: search key failed\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ leaf = path.nodes[0];
+ dev_item = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_dev_item);
+
+ devid = btrfs_device_id(leaf, dev_item);
+ if (devid != cur_devid) {
+ printk("ERROR: devid %llu mismatch with %llu\n", devid, cur_devid);
+ ret = -EIO;
+ goto out;
+ }
+
+ type = btrfs_device_type(leaf, dev_item);
+ io_align = btrfs_device_io_align(leaf, dev_item);
+ io_width = btrfs_device_io_width(leaf, dev_item);
+ sector_size = btrfs_device_sector_size(leaf, dev_item);
+ total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+ bytes_used = btrfs_device_bytes_used(leaf, dev_item);
+ read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
+ read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
+
+ btrfs_release_path(&path);
+
+ printk("update disk super on %s devid=%llu\n", other_dev, devid);
+
+ /* update other devices' super block */
+ fp = open(other_dev, O_CREAT | O_RDWR, 0600);
+ if (fp < 0) {
+ fprintf(stderr, "ERROR: could not open %s\n", other_dev);
+ ret = -EIO;
+ goto out;
+ }
+
+ memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
+
+ disk_super = (struct btrfs_super_block *)buf;
+ dev_item = &disk_super->dev_item;
+
+ btrfs_set_stack_device_type(dev_item, type);
+ btrfs_set_stack_device_id(dev_item, devid);
+ btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
+ btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
+ btrfs_set_stack_device_io_align(dev_item, io_align);
+ btrfs_set_stack_device_io_width(dev_item, io_width);
+ btrfs_set_stack_device_sector_size(dev_item, sector_size);
+ memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
+ memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
+ csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
+
+ ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
+ if (ret != BTRFS_SUPER_INFO_SIZE) {
+ if (ret < 0)
+ fprintf(stderr, "ERROR: cannot write superblock: %s\n", strerror(ret));
+ else
+ fprintf(stderr, "ERROR: cannot write superblock\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ write_backup_supers(fp, (u8 *)buf);
+
+out:
+ if (fp != -1)
+ close(fp);
+ return ret;
+}
+
+static void print_usage(int ret)
+{
+ fprintf(stderr, "usage: btrfs-image [options] source target\n");
+ fprintf(stderr, "\t-r \trestore metadump image\n");
+ fprintf(stderr, "\t-c value\tcompression level (0 ~ 9)\n");
+ fprintf(stderr, "\t-t value\tnumber of threads (1 ~ 32)\n");
+ fprintf(stderr, "\t-o \tdon't mess with the chunk tree when restoring\n");
+ fprintf(stderr, "\t-s \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
+ fprintf(stderr, "\t-w \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
+ fprintf(stderr, "\t-m \trestore for multiple devices\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "\tIn the dump mode, source is the btrfs device and target is the output file (use '-' for stdout).\n");
+ fprintf(stderr, "\tIn the restore mode, source is the dumped image and target is the btrfs device/file.\n");
+ exit(ret);
+}
+
+int main(int argc, char *argv[])
+{
+ char *source;
+ char *target;
+ u64 num_threads = 0;
+ u64 compress_level = 0;
+ int create = 1;
+ int old_restore = 0;
+ int walk_trees = 0;
+ int multi_devices = 0;
+ int ret;
+ int sanitize = 0;
+ int dev_cnt = 0;
+ int usage_error = 0;
+ FILE *out;
+
+ while (1) {
+ static const struct option long_options[] = {
+ { "help", no_argument, NULL, GETOPT_VAL_HELP},
+ { NULL, 0, NULL, 0 }
+ };
+ int c = getopt_long(argc, argv, "rc:t:oswm", long_options, NULL);
+ if (c < 0)
+ break;
+ switch (c) {
+ case 'r':
+ create = 0;
+ break;
+ case 't':
+ num_threads = arg_strtou64(optarg);
+ if (num_threads > 32)
+ print_usage(1);
+ break;
+ case 'c':
+ compress_level = arg_strtou64(optarg);
+ if (compress_level > 9)
+ print_usage(1);
+ break;
+ case 'o':
+ old_restore = 1;
+ break;
+ case 's':
+ sanitize++;
+ break;
+ case 'w':
+ walk_trees = 1;
+ break;
+ case 'm':
+ create = 0;
+ multi_devices = 1;
+ break;
+ case GETOPT_VAL_HELP:
+ default:
+ print_usage(c != GETOPT_VAL_HELP);
+ }
+ }
+
+ argc = argc - optind;
+ set_argv0(argv);
+ if (check_argc_min(argc, 2))
+ print_usage(1);
+
+ dev_cnt = argc - 1;
+
+ if (create) {
+ if (old_restore) {
+ fprintf(stderr, "Usage error: create and restore cannot be used at the same time\n");
+ usage_error++;
+ }
+ } else {
+ if (walk_trees || sanitize || compress_level) {
+ fprintf(stderr, "Usage error: use -w, -s, -c options for restore makes no sense\n");
+ usage_error++;
+ }
+ if (multi_devices && dev_cnt < 2) {
+ fprintf(stderr, "Usage error: not enough devices specified for -m option\n");
+ usage_error++;
+ }
+ if (!multi_devices && dev_cnt != 1) {
+ fprintf(stderr, "Usage error: accepts only 1 device without -m option\n");
+ usage_error++;
+ }
+ }
+
+ if (usage_error)
+ print_usage(1);
+
+ source = argv[optind];
+ target = argv[optind + 1];
+
+ if (create && !strcmp(target, "-")) {
+ out = stdout;
+ } else {
+ out = fopen(target, "w+");
+ if (!out) {
+ perror("unable to create target file");
+ exit(1);
+ }
+ }
+
+ if (compress_level > 0 || create == 0) {
+ if (num_threads == 0) {
+ long tmp = sysconf(_SC_NPROCESSORS_ONLN);
+
+ if (tmp <= 0)
+ tmp = 1;
+ num_threads = tmp;
+ }
+ } else {
+ num_threads = 0;
+ }
+
+ if (create) {
+ ret = check_mounted(source);
+ if (ret < 0) {
+ fprintf(stderr, "Could not check mount status: %s\n",
+ strerror(-ret));
+ exit(1);
+ } else if (ret)
+ fprintf(stderr,
+ "WARNING: The device is mounted. Make sure the filesystem is quiescent.\n");
+
+ ret = create_metadump(source, out, num_threads,
+ compress_level, sanitize, walk_trees);
+ } else {
+ ret = restore_metadump(source, out, old_restore, num_threads,
+ 0, target, multi_devices);
+ }
+ if (ret) {
+ printk("%s failed (%s)\n", (create) ? "create" : "restore",
+ strerror(errno));
+ goto out;
+ }
+
+ /* extended support for multiple devices */
+ if (!create && multi_devices) {
+ struct btrfs_fs_info *info;
+ u64 total_devs;
+ int i;
+
+ info = open_ctree_fs_info(target, 0, 0,
+ OPEN_CTREE_PARTIAL |
+ OPEN_CTREE_RESTORE);
+ if (!info) {
+ fprintf(stderr, "unable to open %s error = %s\n",
+ target, strerror(errno));
+ return 1;
+ }
+
+ total_devs = btrfs_super_num_devices(info->super_copy);
+ if (total_devs != dev_cnt) {
+ printk("it needs %llu devices but has only %d\n",
+ total_devs, dev_cnt);
+ close_ctree(info->chunk_root);
+ goto out;
+ }
+
+ /* update super block on other disks */
+ for (i = 2; i <= dev_cnt; i++) {
+ ret = update_disk_super_on_device(info,
+ argv[optind + i], (u64)i);
+ if (ret) {
+ printk("update disk super failed devid=%d (error=%d)\n",
+ i, ret);
+ close_ctree(info->chunk_root);
+ exit(1);
+ }
+ }
+
+ close_ctree(info->chunk_root);
+
+ /* fix metadata block to map correct chunk */
+ ret = restore_metadump(source, out, 0, num_threads, 1,
+ target, 1);
+ if (ret) {
+ fprintf(stderr, "fix metadump failed (error=%d)\n",
+ ret);
+ exit(1);
+ }
+ }
+out:
+ if (out == stdout) {
+ fflush(out);
+ } else {
+ fclose(out);
+ if (ret && create) {
+ int unlink_ret;
+
+ unlink_ret = unlink(target);
+ if (unlink_ret)
+ fprintf(stderr,
+ "unlink output file failed : %s\n",
+ strerror(errno));
+ }
+ }
+
+ btrfs_close_all_devices();
+
+ return !!ret;
+}
diff --git a/btrfs-list.c b/btrfs-list.c
new file mode 100644
index 00000000..2da54bf7
--- /dev/null
+++ b/btrfs-list.c
@@ -0,0 +1,1914 @@
+/*
+ * Copyright (C) 2010 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <libgen.h>
+#include "ctree.h"
+#include "transaction.h"
+#include "utils.h"
+#include "ioctl.h"
+#include <uuid/uuid.h>
+#include "btrfs-list.h"
+#include "rbtree-utils.h"
+
+#define BTRFS_LIST_NFILTERS_INCREASE (2 * BTRFS_LIST_FILTER_MAX)
+#define BTRFS_LIST_NCOMPS_INCREASE (2 * BTRFS_LIST_COMP_MAX)
+
+/* we store all the roots we find in an rbtree so that we can
+ * search for them later.
+ */
+struct root_lookup {
+ struct rb_root root;
+};
+
+static struct {
+ char *name;
+ char *column_name;
+ int need_print;
+} btrfs_list_columns[] = {
+ {
+ .name = "ID",
+ .column_name = "ID",
+ .need_print = 0,
+ },
+ {
+ .name = "gen",
+ .column_name = "Gen",
+ .need_print = 0,
+ },
+ {
+ .name = "cgen",
+ .column_name = "CGen",
+ .need_print = 0,
+ },
+ {
+ .name = "parent",
+ .column_name = "Parent",
+ .need_print = 0,
+ },
+ {
+ .name = "top level",
+ .column_name = "Top Level",
+ .need_print = 0,
+ },
+ {
+ .name = "otime",
+ .column_name = "OTime",
+ .need_print = 0,
+ },
+ {
+ .name = "parent_uuid",
+ .column_name = "Parent UUID",
+ .need_print = 0,
+ },
+ {
+ .name = "received_uuid",
+ .column_name = "Received UUID",
+ .need_print = 0,
+ },
+ {
+ .name = "uuid",
+ .column_name = "UUID",
+ .need_print = 0,
+ },
+ {
+ .name = "path",
+ .column_name = "Path",
+ .need_print = 0,
+ },
+ {
+ .name = NULL,
+ .column_name = NULL,
+ .need_print = 0,
+ },
+};
+
+static btrfs_list_filter_func all_filter_funcs[];
+static btrfs_list_comp_func all_comp_funcs[];
+
+void btrfs_list_setup_print_column(enum btrfs_list_column_enum column)
+{
+ int i;
+
+ BUG_ON(column < 0 || column > BTRFS_LIST_ALL);
+
+ if (column < BTRFS_LIST_ALL) {
+ btrfs_list_columns[column].need_print = 1;
+ return;
+ }
+
+ for (i = 0; i < BTRFS_LIST_ALL; i++)
+ btrfs_list_columns[i].need_print = 1;
+}
+
+static void root_lookup_init(struct root_lookup *tree)
+{
+ tree->root.rb_node = NULL;
+}
+
+static int comp_entry_with_rootid(struct root_info *entry1,
+ struct root_info *entry2,
+ int is_descending)
+{
+ int ret;
+
+ if (entry1->root_id > entry2->root_id)
+ ret = 1;
+ else if (entry1->root_id < entry2->root_id)
+ ret = -1;
+ else
+ ret = 0;
+
+ return is_descending ? -ret : ret;
+}
+
+static int comp_entry_with_gen(struct root_info *entry1,
+ struct root_info *entry2,
+ int is_descending)
+{
+ int ret;
+
+ if (entry1->gen > entry2->gen)
+ ret = 1;
+ else if (entry1->gen < entry2->gen)
+ ret = -1;
+ else
+ ret = 0;
+
+ return is_descending ? -ret : ret;
+}
+
+static int comp_entry_with_ogen(struct root_info *entry1,
+ struct root_info *entry2,
+ int is_descending)
+{
+ int ret;
+
+ if (entry1->ogen > entry2->ogen)
+ ret = 1;
+ else if (entry1->ogen < entry2->ogen)
+ ret = -1;
+ else
+ ret = 0;
+
+ return is_descending ? -ret : ret;
+}
+
+static int comp_entry_with_path(struct root_info *entry1,
+ struct root_info *entry2,
+ int is_descending)
+{
+ int ret;
+
+ if (strcmp(entry1->full_path, entry2->full_path) > 0)
+ ret = 1;
+ else if (strcmp(entry1->full_path, entry2->full_path) < 0)
+ ret = -1;
+ else
+ ret = 0;
+
+ return is_descending ? -ret : ret;
+}
+
+static btrfs_list_comp_func all_comp_funcs[] = {
+ [BTRFS_LIST_COMP_ROOTID] = comp_entry_with_rootid,
+ [BTRFS_LIST_COMP_OGEN] = comp_entry_with_ogen,
+ [BTRFS_LIST_COMP_GEN] = comp_entry_with_gen,
+ [BTRFS_LIST_COMP_PATH] = comp_entry_with_path,
+};
+
+static char *all_sort_items[] = {
+ [BTRFS_LIST_COMP_ROOTID] = "rootid",
+ [BTRFS_LIST_COMP_OGEN] = "ogen",
+ [BTRFS_LIST_COMP_GEN] = "gen",
+ [BTRFS_LIST_COMP_PATH] = "path",
+ [BTRFS_LIST_COMP_MAX] = NULL,
+};
+
+static int btrfs_list_get_sort_item(char *sort_name)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_LIST_COMP_MAX; i++) {
+ if (strcmp(sort_name, all_sort_items[i]) == 0)
+ return i;
+ }
+ return -1;
+}
+
+struct btrfs_list_comparer_set *btrfs_list_alloc_comparer_set(void)
+{
+ struct btrfs_list_comparer_set *set;
+ int size;
+
+ size = sizeof(struct btrfs_list_comparer_set) +
+ BTRFS_LIST_NCOMPS_INCREASE * sizeof(struct btrfs_list_comparer);
+ set = calloc(1, size);
+ if (!set) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(1);
+ }
+
+ set->total = BTRFS_LIST_NCOMPS_INCREASE;
+
+ return set;
+}
+
+void btrfs_list_free_comparer_set(struct btrfs_list_comparer_set *comp_set)
+{
+ free(comp_set);
+}
+
+static int btrfs_list_setup_comparer(struct btrfs_list_comparer_set **comp_set,
+ enum btrfs_list_comp_enum comparer, int is_descending)
+{
+ struct btrfs_list_comparer_set *set = *comp_set;
+ int size;
+
+ BUG_ON(!set);
+ BUG_ON(comparer >= BTRFS_LIST_COMP_MAX);
+ BUG_ON(set->ncomps > set->total);
+
+ if (set->ncomps == set->total) {
+ void *tmp;
+
+ size = set->total + BTRFS_LIST_NCOMPS_INCREASE;
+ size = sizeof(*set) + size * sizeof(struct btrfs_list_comparer);
+ tmp = set;
+ set = realloc(set, size);
+ if (!set) {
+ fprintf(stderr, "memory allocation failed\n");
+ free(tmp);
+ exit(1);
+ }
+
+ memset(&set->comps[set->total], 0,
+ BTRFS_LIST_NCOMPS_INCREASE *
+ sizeof(struct btrfs_list_comparer));
+ set->total += BTRFS_LIST_NCOMPS_INCREASE;
+ *comp_set = set;
+ }
+
+ BUG_ON(set->comps[set->ncomps].comp_func);
+
+ set->comps[set->ncomps].comp_func = all_comp_funcs[comparer];
+ set->comps[set->ncomps].is_descending = is_descending;
+ set->ncomps++;
+ return 0;
+}
+
+static int sort_comp(struct root_info *entry1, struct root_info *entry2,
+ struct btrfs_list_comparer_set *set)
+{
+ int rootid_compared = 0;
+ int i, ret = 0;
+
+ if (!set || !set->ncomps)
+ goto comp_rootid;
+
+ for (i = 0; i < set->ncomps; i++) {
+ if (!set->comps[i].comp_func)
+ break;
+
+ ret = set->comps[i].comp_func(entry1, entry2,
+ set->comps[i].is_descending);
+ if (ret)
+ return ret;
+
+ if (set->comps[i].comp_func == comp_entry_with_rootid)
+ rootid_compared = 1;
+ }
+
+ if (!rootid_compared) {
+comp_rootid:
+ ret = comp_entry_with_rootid(entry1, entry2, 0);
+ }
+
+ return ret;
+}
+
+static int sort_tree_insert(struct root_lookup *sort_tree,
+ struct root_info *ins,
+ struct btrfs_list_comparer_set *comp_set)
+{
+ struct rb_node **p = &sort_tree->root.rb_node;
+ struct rb_node *parent = NULL;
+ struct root_info *curr;
+ int ret;
+
+ while (*p) {
+ parent = *p;
+ curr = rb_entry(parent, struct root_info, sort_node);
+
+ ret = sort_comp(ins, curr, comp_set);
+ if (ret < 0)
+ p = &(*p)->rb_left;
+ else if (ret > 0)
+ p = &(*p)->rb_right;
+ else
+ return -EEXIST;
+ }
+
+ rb_link_node(&ins->sort_node, parent, p);
+ rb_insert_color(&ins->sort_node, &sort_tree->root);
+ return 0;
+}
+
+/*
+ * insert a new root into the tree. returns the existing root entry
+ * if one is already there. Both root_id and ref_tree are used
+ * as the key
+ */
+static int root_tree_insert(struct root_lookup *root_tree,
+ struct root_info *ins)
+{
+ struct rb_node **p = &root_tree->root.rb_node;
+ struct rb_node * parent = NULL;
+ struct root_info *curr;
+ int ret;
+
+ while(*p) {
+ parent = *p;
+ curr = rb_entry(parent, struct root_info, rb_node);
+
+ ret = comp_entry_with_rootid(ins, curr, 0);
+ if (ret < 0)
+ p = &(*p)->rb_left;
+ else if (ret > 0)
+ p = &(*p)->rb_right;
+ else
+ return -EEXIST;
+ }
+
+ rb_link_node(&ins->rb_node, parent, p);
+ rb_insert_color(&ins->rb_node, &root_tree->root);
+ return 0;
+}
+
+/*
+ * find a given root id in the tree. We return the smallest one,
+ * rb_next can be used to move forward looking for more if required
+ */
+static struct root_info *root_tree_search(struct root_lookup *root_tree,
+ u64 root_id)
+{
+ struct rb_node *n = root_tree->root.rb_node;
+ struct root_info *entry;
+ struct root_info tmp;
+ int ret;
+
+ tmp.root_id = root_id;
+
+ while(n) {
+ entry = rb_entry(n, struct root_info, rb_node);
+
+ ret = comp_entry_with_rootid(&tmp, entry, 0);
+ if (ret < 0)
+ n = n->rb_left;
+ else if (ret > 0)
+ n = n->rb_right;
+ else
+ return entry;
+ }
+ return NULL;
+}
+
+static int update_root(struct root_lookup *root_lookup,
+ u64 root_id, u64 ref_tree, u64 root_offset, u64 flags,
+ u64 dir_id, char *name, int name_len, u64 ogen, u64 gen,
+ time_t ot, void *uuid, void *puuid, void *ruuid)
+{
+ struct root_info *ri;
+
+ ri = root_tree_search(root_lookup, root_id);
+ if (!ri || ri->root_id != root_id)
+ return -ENOENT;
+ if (name && name_len > 0) {
+ free(ri->name);
+
+ ri->name = malloc(name_len + 1);
+ if (!ri->name) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(1);
+ }
+ strncpy(ri->name, name, name_len);
+ ri->name[name_len] = 0;
+ }
+ if (ref_tree)
+ ri->ref_tree = ref_tree;
+ if (root_offset)
+ ri->root_offset = root_offset;
+ if (flags)
+ ri->flags = flags;
+ if (dir_id)
+ ri->dir_id = dir_id;
+ if (gen)
+ ri->gen = gen;
+ if (ogen)
+ ri->ogen = ogen;
+ if (!ri->ogen && root_offset)
+ ri->ogen = root_offset;
+ if (ot)
+ ri->otime = ot;
+ if (uuid)
+ memcpy(&ri->uuid, uuid, BTRFS_UUID_SIZE);
+ if (puuid)
+ memcpy(&ri->puuid, puuid, BTRFS_UUID_SIZE);
+ if (ruuid)
+ memcpy(&ri->ruuid, ruuid, BTRFS_UUID_SIZE);
+
+ return 0;
+}
+
+/*
+ * add_root - update the existed root, or allocate a new root and insert it
+ * into the lookup tree.
+ * root_id: object id of the root
+ * ref_tree: object id of the referring root.
+ * root_offset: offset value of the root'key
+ * dir_id: inode id of the directory in ref_tree where this root can be found.
+ * name: the name of root_id in that directory
+ * name_len: the length of name
+ * ogen: the original generation of the root
+ * gen: the current generation of the root
+ * ot: the original time(create time) of the root
+ * uuid: uuid of the root
+ * puuid: uuid of the root parent if any
+ * ruuid: uuid of the received subvol, if any
+ */
+static int add_root(struct root_lookup *root_lookup,
+ u64 root_id, u64 ref_tree, u64 root_offset, u64 flags,
+ u64 dir_id, char *name, int name_len, u64 ogen, u64 gen,
+ time_t ot, void *uuid, void *puuid, void *ruuid)
+{
+ struct root_info *ri;
+ int ret;
+
+ ret = update_root(root_lookup, root_id, ref_tree, root_offset, flags,
+ dir_id, name, name_len, ogen, gen, ot,
+ uuid, puuid, ruuid);
+ if (!ret)
+ return 0;
+
+ ri = calloc(1, sizeof(*ri));
+ if (!ri) {
+ printf("memory allocation failed\n");
+ exit(1);
+ }
+ ri->root_id = root_id;
+
+ if (name && name_len > 0) {
+ ri->name = malloc(name_len + 1);
+ if (!ri->name) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(1);
+ }
+ strncpy(ri->name, name, name_len);
+ ri->name[name_len] = 0;
+ }
+ if (ref_tree)
+ ri->ref_tree = ref_tree;
+ if (dir_id)
+ ri->dir_id = dir_id;
+ if (root_offset)
+ ri->root_offset = root_offset;
+ if (flags)
+ ri->flags = flags;
+ if (gen)
+ ri->gen = gen;
+ if (ogen)
+ ri->ogen = ogen;
+ if (!ri->ogen && root_offset)
+ ri->ogen = root_offset;
+ if (ot)
+ ri->otime = ot;
+
+ if (uuid)
+ memcpy(&ri->uuid, uuid, BTRFS_UUID_SIZE);
+
+ if (puuid)
+ memcpy(&ri->puuid, puuid, BTRFS_UUID_SIZE);
+
+ if (ruuid)
+ memcpy(&ri->ruuid, ruuid, BTRFS_UUID_SIZE);
+
+ ret = root_tree_insert(root_lookup, ri);
+ if (ret) {
+ printf("failed to insert tree %llu\n", (unsigned long long)root_id);
+ exit(1);
+ }
+ return 0;
+}
+
+static void __free_root_info(struct rb_node *node)
+{
+ struct root_info *ri;
+
+ ri = rb_entry(node, struct root_info, rb_node);
+ free(ri->name);
+ free(ri->path);
+ free(ri->full_path);
+ free(ri);
+}
+
+static inline void __free_all_subvolumn(struct root_lookup *root_tree)
+{
+ rb_free_nodes(&root_tree->root, __free_root_info);
+}
+
+/*
+ * for a given root_info, search through the root_lookup tree to construct
+ * the full path name to it.
+ *
+ * This can't be called until all the root_info->path fields are filled
+ * in by lookup_ino_path
+ */
+static int resolve_root(struct root_lookup *rl, struct root_info *ri,
+ u64 top_id)
+{
+ char *full_path = NULL;
+ int len = 0;
+ struct root_info *found;
+
+ /*
+ * we go backwards from the root_info object and add pathnames
+ * from parent directories as we go.
+ */
+ found = ri;
+ while (1) {
+ char *tmp;
+ u64 next;
+ int add_len;
+
+ /*
+ * ref_tree = 0 indicates the subvolumes
+ * has been deleted.
+ */
+ if (!found->ref_tree) {
+ free(full_path);
+ return -ENOENT;
+ }
+
+ add_len = strlen(found->path);
+
+ if (full_path) {
+ /* room for / and for null */
+ tmp = malloc(add_len + 2 + len);
+ if (!tmp) {
+ perror("malloc failed");
+ exit(1);
+ }
+ memcpy(tmp + add_len + 1, full_path, len);
+ tmp[add_len] = '/';
+ memcpy(tmp, found->path, add_len);
+ tmp [add_len + len + 1] = '\0';
+ free(full_path);
+ full_path = tmp;
+ len += add_len + 1;
+ } else {
+ full_path = strdup(found->path);
+ len = add_len;
+ }
+ if (!ri->top_id)
+ ri->top_id = found->ref_tree;
+
+ next = found->ref_tree;
+ if (next == top_id)
+ break;
+ /*
+ * if the ref_tree = BTRFS_FS_TREE_OBJECTID,
+ * we are at the top
+ */
+ if (next == BTRFS_FS_TREE_OBJECTID)
+ break;
+ /*
+ * if the ref_tree wasn't in our tree of roots, the
+ * subvolume was deleted.
+ */
+ found = root_tree_search(rl, next);
+ if (!found) {
+ free(full_path);
+ return -ENOENT;
+ }
+ }
+
+ ri->full_path = full_path;
+
+ return 0;
+}
+
+/*
+ * for a single root_info, ask the kernel to give us a path name
+ * inside it's ref_root for the dir_id where it lives.
+ *
+ * This fills in root_info->path with the path to the directory and and
+ * appends this root's name.
+ */
+static int lookup_ino_path(int fd, struct root_info *ri)
+{
+ struct btrfs_ioctl_ino_lookup_args args;
+ int ret;
+
+ if (ri->path)
+ return 0;
+
+ if (!ri->ref_tree)
+ return -ENOENT;
+
+ memset(&args, 0, sizeof(args));
+ args.treeid = ri->ref_tree;
+ args.objectid = ri->dir_id;
+
+ ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
+ if (ret < 0) {
+ if (errno == ENOENT) {
+ ri->ref_tree = 0;
+ return -ENOENT;
+ }
+ fprintf(stderr, "ERROR: Failed to lookup path for root %llu - %s\n",
+ (unsigned long long)ri->ref_tree,
+ strerror(errno));
+ return ret;
+ }
+
+ if (args.name[0]) {
+ /*
+ * we're in a subdirectory of ref_tree, the kernel ioctl
+ * puts a / in there for us
+ */
+ ri->path = malloc(strlen(ri->name) + strlen(args.name) + 1);
+ if (!ri->path) {
+ perror("malloc failed");
+ exit(1);
+ }
+ strcpy(ri->path, args.name);
+ strcat(ri->path, ri->name);
+ } else {
+ /* we're at the root of ref_tree */
+ ri->path = strdup(ri->name);
+ if (!ri->path) {
+ perror("strdup failed");
+ exit(1);
+ }
+ }
+ return 0;
+}
+
+/* finding the generation for a given path is a two step process.
+ * First we use the inode loookup routine to find out the root id
+ *
+ * Then we use the tree search ioctl to scan all the root items for a
+ * given root id and spit out the latest generation we can find
+ */
+static u64 find_root_gen(int fd)
+{
+ struct btrfs_ioctl_ino_lookup_args ino_args;
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ struct btrfs_ioctl_search_header sh;
+ unsigned long off = 0;
+ u64 max_found = 0;
+ int i;
+
+ memset(&ino_args, 0, sizeof(ino_args));
+ ino_args.objectid = BTRFS_FIRST_FREE_OBJECTID;
+
+ /* this ioctl fills in ino_args->treeid */
+ ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: Failed to lookup path for dirid %llu - %s\n",
+ (unsigned long long)BTRFS_FIRST_FREE_OBJECTID,
+ strerror(errno));
+ return 0;
+ }
+
+ memset(&args, 0, sizeof(args));
+
+ sk->tree_id = 1;
+
+ /*
+ * there may be more than one ROOT_ITEM key if there are
+ * snapshots pending deletion, we have to loop through
+ * them.
+ */
+ sk->min_objectid = ino_args.treeid;
+ sk->max_objectid = ino_args.treeid;
+ sk->max_type = BTRFS_ROOT_ITEM_KEY;
+ sk->min_type = BTRFS_ROOT_ITEM_KEY;
+ sk->max_offset = (u64)-1;
+ sk->max_transid = (u64)-1;
+ sk->nr_items = 4096;
+
+ while (1) {
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: can't perform the search - %s\n",
+ strerror(errno));
+ return 0;
+ }
+ /* the ioctl returns the number of item it found in nr_items */
+ if (sk->nr_items == 0)
+ break;
+
+ off = 0;
+ for (i = 0; i < sk->nr_items; i++) {
+ struct btrfs_root_item *item;
+
+ memcpy(&sh, args.buf + off, sizeof(sh));
+ off += sizeof(sh);
+ item = (struct btrfs_root_item *)(args.buf + off);
+ off += sh.len;
+
+ sk->min_objectid = sh.objectid;
+ sk->min_type = sh.type;
+ sk->min_offset = sh.offset;
+
+ if (sh.objectid > ino_args.treeid)
+ break;
+
+ if (sh.objectid == ino_args.treeid &&
+ sh.type == BTRFS_ROOT_ITEM_KEY) {
+ max_found = max(max_found,
+ btrfs_root_generation(item));
+ }
+ }
+ if (sk->min_offset < (u64)-1)
+ sk->min_offset++;
+ else
+ break;
+
+ if (sk->min_type != BTRFS_ROOT_ITEM_KEY)
+ break;
+ if (sk->min_objectid != ino_args.treeid)
+ break;
+ }
+ return max_found;
+}
+
+/* pass in a directory id and this will return
+ * the full path of the parent directory inside its
+ * subvolume root.
+ *
+ * It may return NULL if it is in the root, or an ERR_PTR if things
+ * go badly.
+ */
+static char *__ino_resolve(int fd, u64 dirid)
+{
+ struct btrfs_ioctl_ino_lookup_args args;
+ int ret;
+ char *full;
+
+ memset(&args, 0, sizeof(args));
+ args.objectid = dirid;
+
+ ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: Failed to lookup path for dirid %llu - %s\n",
+ (unsigned long long)dirid, strerror(errno));
+ return ERR_PTR(ret);
+ }
+
+ if (args.name[0]) {
+ /*
+ * we're in a subdirectory of ref_tree, the kernel ioctl
+ * puts a / in there for us
+ */
+ full = strdup(args.name);
+ if (!full) {
+ perror("malloc failed");
+ return ERR_PTR(-ENOMEM);
+ }
+ } else {
+ /* we're at the root of ref_tree */
+ full = NULL;
+ }
+ return full;
+}
+
+/*
+ * simple string builder, returning a new string with both
+ * dirid and name
+ */
+static char *build_name(char *dirid, char *name)
+{
+ char *full;
+ if (!dirid)
+ return strdup(name);
+
+ full = malloc(strlen(dirid) + strlen(name) + 1);
+ if (!full)
+ return NULL;
+ strcpy(full, dirid);
+ strcat(full, name);
+ return full;
+}
+
+/*
+ * given an inode number, this returns the full path name inside the subvolume
+ * to that file/directory. cache_dirid and cache_name are used to
+ * cache the results so we can avoid tree searches if a later call goes
+ * to the same directory or file name
+ */
+static char *ino_resolve(int fd, u64 ino, u64 *cache_dirid, char **cache_name)
+
+{
+ u64 dirid;
+ char *dirname;
+ char *name;
+ char *full;
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ struct btrfs_ioctl_search_header *sh;
+ unsigned long off = 0;
+ int namelen;
+
+ memset(&args, 0, sizeof(args));
+
+ sk->tree_id = 0;
+
+ /*
+ * step one, we search for the inode back ref. We just use the first
+ * one
+ */
+ sk->min_objectid = ino;
+ sk->max_objectid = ino;
+ sk->max_type = BTRFS_INODE_REF_KEY;
+ sk->max_offset = (u64)-1;
+ sk->min_type = BTRFS_INODE_REF_KEY;
+ sk->max_transid = (u64)-1;
+ sk->nr_items = 1;
+
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: can't perform the search - %s\n",
+ strerror(errno));
+ return NULL;
+ }
+ /* the ioctl returns the number of item it found in nr_items */
+ if (sk->nr_items == 0)
+ return NULL;
+
+ off = 0;
+ sh = (struct btrfs_ioctl_search_header *)(args.buf + off);
+
+ if (sh->type == BTRFS_INODE_REF_KEY) {
+ struct btrfs_inode_ref *ref;
+ dirid = sh->offset;
+
+ ref = (struct btrfs_inode_ref *)(sh + 1);
+ namelen = btrfs_stack_inode_ref_name_len(ref);
+
+ name = (char *)(ref + 1);
+ name = strndup(name, namelen);
+
+ /* use our cached value */
+ if (dirid == *cache_dirid && *cache_name) {
+ dirname = *cache_name;
+ goto build;
+ }
+ } else {
+ return NULL;
+ }
+ /*
+ * the inode backref gives us the file name and the parent directory id.
+ * From here we use __ino_resolve to get the path to the parent
+ */
+ dirname = __ino_resolve(fd, dirid);
+build:
+ full = build_name(dirname, name);
+ if (*cache_name && dirname != *cache_name)
+ free(*cache_name);
+
+ *cache_name = dirname;
+ *cache_dirid = dirid;
+ free(name);
+
+ return full;
+}
+
+int btrfs_list_get_default_subvolume(int fd, u64 *default_id)
+{
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ struct btrfs_ioctl_search_header *sh;
+ u64 found = 0;
+ int ret;
+
+ memset(&args, 0, sizeof(args));
+
+ /*
+ * search for a dir item with a name 'default' in the tree of
+ * tree roots, it should point us to a default root
+ */
+ sk->tree_id = 1;
+
+ /* don't worry about ancient format and request only one item */
+ sk->nr_items = 1;
+
+ sk->max_objectid = BTRFS_ROOT_TREE_DIR_OBJECTID;
+ sk->min_objectid = BTRFS_ROOT_TREE_DIR_OBJECTID;
+ sk->max_type = BTRFS_DIR_ITEM_KEY;
+ sk->min_type = BTRFS_DIR_ITEM_KEY;
+ sk->max_offset = (u64)-1;
+ sk->max_transid = (u64)-1;
+
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0)
+ return ret;
+
+ /* the ioctl returns the number of items it found in nr_items */
+ if (sk->nr_items == 0)
+ goto out;
+
+ sh = (struct btrfs_ioctl_search_header *)args.buf;
+
+ if (sh->type == BTRFS_DIR_ITEM_KEY) {
+ struct btrfs_dir_item *di;
+ int name_len;
+ char *name;
+
+ di = (struct btrfs_dir_item *)(sh + 1);
+ name_len = btrfs_stack_dir_name_len(di);
+ name = (char *)(di + 1);
+
+ if (!strncmp("default", name, name_len))
+ found = btrfs_disk_key_objectid(&di->location);
+ }
+
+out:
+ *default_id = found;
+ return 0;
+}
+
+static int __list_subvol_search(int fd, struct root_lookup *root_lookup)
+{
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ struct btrfs_ioctl_search_header sh;
+ struct btrfs_root_ref *ref;
+ struct btrfs_root_item *ri;
+ unsigned long off = 0;
+ int name_len;
+ char *name;
+ u64 dir_id;
+ u64 gen = 0;
+ u64 ogen;
+ u64 flags;
+ int i;
+ time_t t;
+ u8 uuid[BTRFS_UUID_SIZE];
+ u8 puuid[BTRFS_UUID_SIZE];
+ u8 ruuid[BTRFS_UUID_SIZE];
+
+ root_lookup_init(root_lookup);
+ memset(&args, 0, sizeof(args));
+
+ /* search in the tree of tree roots */
+ sk->tree_id = 1;
+
+ /*
+ * set the min and max to backref keys. The search will
+ * only send back this type of key now.
+ */
+ sk->max_type = BTRFS_ROOT_BACKREF_KEY;
+ sk->min_type = BTRFS_ROOT_ITEM_KEY;
+
+ sk->min_objectid = BTRFS_FIRST_FREE_OBJECTID;
+
+ /*
+ * set all the other params to the max, we'll take any objectid
+ * and any trans
+ */
+ sk->max_objectid = BTRFS_LAST_FREE_OBJECTID;
+ sk->max_offset = (u64)-1;
+ sk->max_transid = (u64)-1;
+
+ /* just a big number, doesn't matter much */
+ sk->nr_items = 4096;
+
+ while(1) {
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0)
+ return ret;
+ /* the ioctl returns the number of item it found in nr_items */
+ if (sk->nr_items == 0)
+ break;
+
+ off = 0;
+
+ /*
+ * for each item, pull the key out of the header and then
+ * read the root_ref item it contains
+ */
+ for (i = 0; i < sk->nr_items; i++) {
+ memcpy(&sh, args.buf + off, sizeof(sh));
+ off += sizeof(sh);
+ if (sh.type == BTRFS_ROOT_BACKREF_KEY) {
+ ref = (struct btrfs_root_ref *)(args.buf + off);
+ name_len = btrfs_stack_root_ref_name_len(ref);
+ name = (char *)(ref + 1);
+ dir_id = btrfs_stack_root_ref_dirid(ref);
+
+ add_root(root_lookup, sh.objectid, sh.offset,
+ 0, 0, dir_id, name, name_len, 0, 0, 0,
+ NULL, NULL, NULL);
+ } else if (sh.type == BTRFS_ROOT_ITEM_KEY) {
+ ri = (struct btrfs_root_item *)(args.buf + off);
+ gen = btrfs_root_generation(ri);
+ flags = btrfs_root_flags(ri);
+ if(sh.len >
+ sizeof(struct btrfs_root_item_v0)) {
+ t = btrfs_stack_timespec_sec(&ri->otime);
+ ogen = btrfs_root_otransid(ri);
+ memcpy(uuid, ri->uuid, BTRFS_UUID_SIZE);
+ memcpy(puuid, ri->parent_uuid, BTRFS_UUID_SIZE);
+ memcpy(ruuid, ri->received_uuid, BTRFS_UUID_SIZE);
+ } else {
+ t = 0;
+ ogen = 0;
+ memset(uuid, 0, BTRFS_UUID_SIZE);
+ memset(puuid, 0, BTRFS_UUID_SIZE);
+ memset(ruuid, 0, BTRFS_UUID_SIZE);
+ }
+
+ add_root(root_lookup, sh.objectid, 0,
+ sh.offset, flags, 0, NULL, 0, ogen,
+ gen, t, uuid, puuid, ruuid);
+ }
+
+ off += sh.len;
+
+ /*
+ * record the mins in sk so we can make sure the
+ * next search doesn't repeat this root
+ */
+ sk->min_objectid = sh.objectid;
+ sk->min_type = sh.type;
+ sk->min_offset = sh.offset;
+ }
+ sk->nr_items = 4096;
+ sk->min_offset++;
+ if (!sk->min_offset) /* overflow */
+ sk->min_type++;
+ else
+ continue;
+
+ if (sk->min_type > BTRFS_ROOT_BACKREF_KEY) {
+ sk->min_type = BTRFS_ROOT_ITEM_KEY;
+ sk->min_objectid++;
+ } else
+ continue;
+
+ if (sk->min_objectid > sk->max_objectid)
+ break;
+ }
+
+ return 0;
+}
+
+static int filter_by_rootid(struct root_info *ri, u64 data)
+{
+ return ri->root_id == data;
+}
+
+static int filter_snapshot(struct root_info *ri, u64 data)
+{
+ return !!ri->root_offset;
+}
+
+static int filter_flags(struct root_info *ri, u64 flags)
+{
+ return ri->flags & flags;
+}
+
+static int filter_gen_more(struct root_info *ri, u64 data)
+{
+ return ri->gen >= data;
+}
+
+static int filter_gen_less(struct root_info *ri, u64 data)
+{
+ return ri->gen <= data;
+}
+
+static int filter_gen_equal(struct root_info *ri, u64 data)
+{
+ return ri->gen == data;
+}
+
+static int filter_cgen_more(struct root_info *ri, u64 data)
+{
+ return ri->ogen >= data;
+}
+
+static int filter_cgen_less(struct root_info *ri, u64 data)
+{
+ return ri->ogen <= data;
+}
+
+static int filter_cgen_equal(struct root_info *ri, u64 data)
+{
+ return ri->ogen == data;
+}
+
+static int filter_topid_equal(struct root_info *ri, u64 data)
+{
+ return ri->top_id == data;
+}
+
+static int filter_full_path(struct root_info *ri, u64 data)
+{
+ if (ri->full_path && ri->top_id != data) {
+ char *tmp;
+ char p[] = "<FS_TREE>";
+ int add_len = strlen(p);
+ int len = strlen(ri->full_path);
+
+ tmp = malloc(len + add_len + 2);
+ if (!tmp) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(1);
+ }
+ memcpy(tmp + add_len + 1, ri->full_path, len);
+ tmp[len + add_len + 1] = '\0';
+ tmp[add_len] = '/';
+ memcpy(tmp, p, add_len);
+ free(ri->full_path);
+ ri->full_path = tmp;
+ }
+ return 1;
+}
+
+static int filter_by_parent(struct root_info *ri, u64 data)
+{
+ return !uuid_compare(ri->puuid, (u8 *)(unsigned long)data);
+}
+
+static int filter_deleted(struct root_info *ri, u64 data)
+{
+ return ri->deleted;
+}
+
+static btrfs_list_filter_func all_filter_funcs[] = {
+ [BTRFS_LIST_FILTER_ROOTID] = filter_by_rootid,
+ [BTRFS_LIST_FILTER_SNAPSHOT_ONLY] = filter_snapshot,
+ [BTRFS_LIST_FILTER_FLAGS] = filter_flags,
+ [BTRFS_LIST_FILTER_GEN_MORE] = filter_gen_more,
+ [BTRFS_LIST_FILTER_GEN_LESS] = filter_gen_less,
+ [BTRFS_LIST_FILTER_GEN_EQUAL] = filter_gen_equal,
+ [BTRFS_LIST_FILTER_CGEN_MORE] = filter_cgen_more,
+ [BTRFS_LIST_FILTER_CGEN_LESS] = filter_cgen_less,
+ [BTRFS_LIST_FILTER_CGEN_EQUAL] = filter_cgen_equal,
+ [BTRFS_LIST_FILTER_TOPID_EQUAL] = filter_topid_equal,
+ [BTRFS_LIST_FILTER_FULL_PATH] = filter_full_path,
+ [BTRFS_LIST_FILTER_BY_PARENT] = filter_by_parent,
+ [BTRFS_LIST_FILTER_DELETED] = filter_deleted,
+};
+
+struct btrfs_list_filter_set *btrfs_list_alloc_filter_set(void)
+{
+ struct btrfs_list_filter_set *set;
+ int size;
+
+ size = sizeof(struct btrfs_list_filter_set) +
+ BTRFS_LIST_NFILTERS_INCREASE * sizeof(struct btrfs_list_filter);
+ set = calloc(1, size);
+ if (!set) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(1);
+ }
+
+ set->total = BTRFS_LIST_NFILTERS_INCREASE;
+
+ return set;
+}
+
+void btrfs_list_free_filter_set(struct btrfs_list_filter_set *filter_set)
+{
+ free(filter_set);
+}
+
+int btrfs_list_setup_filter(struct btrfs_list_filter_set **filter_set,
+ enum btrfs_list_filter_enum filter, u64 data)
+{
+ struct btrfs_list_filter_set *set = *filter_set;
+ int size;
+
+ BUG_ON(!set);
+ BUG_ON(filter >= BTRFS_LIST_FILTER_MAX);
+ BUG_ON(set->nfilters > set->total);
+
+ if (set->nfilters == set->total) {
+ void *tmp;
+
+ size = set->total + BTRFS_LIST_NFILTERS_INCREASE;
+ size = sizeof(*set) + size * sizeof(struct btrfs_list_filter);
+ tmp = set;
+ set = realloc(set, size);
+ if (!set) {
+ fprintf(stderr, "memory allocation failed\n");
+ free(tmp);
+ exit(1);
+ }
+
+ memset(&set->filters[set->total], 0,
+ BTRFS_LIST_NFILTERS_INCREASE *
+ sizeof(struct btrfs_list_filter));
+ set->total += BTRFS_LIST_NFILTERS_INCREASE;
+ *filter_set = set;
+ }
+
+ BUG_ON(set->filters[set->nfilters].filter_func);
+
+ if (filter == BTRFS_LIST_FILTER_DELETED)
+ set->only_deleted = 1;
+
+ set->filters[set->nfilters].filter_func = all_filter_funcs[filter];
+ set->filters[set->nfilters].data = data;
+ set->nfilters++;
+ return 0;
+}
+
+static int filter_root(struct root_info *ri,
+ struct btrfs_list_filter_set *set)
+{
+ int i, ret;
+
+ if (!set)
+ return 1;
+
+ if (set->only_deleted && !ri->deleted)
+ return 0;
+
+ if (!set->only_deleted && ri->deleted)
+ return 0;
+
+ for (i = 0; i < set->nfilters; i++) {
+ if (!set->filters[i].filter_func)
+ break;
+ ret = set->filters[i].filter_func(ri, set->filters[i].data);
+ if (!ret)
+ return 0;
+ }
+ return 1;
+}
+
+static void __filter_and_sort_subvol(struct root_lookup *all_subvols,
+ struct root_lookup *sort_tree,
+ struct btrfs_list_filter_set *filter_set,
+ struct btrfs_list_comparer_set *comp_set,
+ u64 top_id)
+{
+ struct rb_node *n;
+ struct root_info *entry;
+ int ret;
+
+ root_lookup_init(sort_tree);
+
+ n = rb_last(&all_subvols->root);
+ while (n) {
+ entry = rb_entry(n, struct root_info, rb_node);
+
+ ret = resolve_root(all_subvols, entry, top_id);
+ if (ret == -ENOENT) {
+ entry->full_path = strdup("DELETED");
+ entry->deleted = 1;
+ }
+ ret = filter_root(entry, filter_set);
+ if (ret)
+ sort_tree_insert(sort_tree, entry, comp_set);
+ n = rb_prev(n);
+ }
+}
+
+static int __list_subvol_fill_paths(int fd, struct root_lookup *root_lookup)
+{
+ struct rb_node *n;
+
+ n = rb_first(&root_lookup->root);
+ while (n) {
+ struct root_info *entry;
+ int ret;
+ entry = rb_entry(n, struct root_info, rb_node);
+ ret = lookup_ino_path(fd, entry);
+ if (ret && ret != -ENOENT)
+ return ret;
+ n = rb_next(n);
+ }
+
+ return 0;
+}
+
+static void print_subvolume_column(struct root_info *subv,
+ enum btrfs_list_column_enum column)
+{
+ char tstr[256];
+ char uuidparse[BTRFS_UUID_UNPARSED_SIZE];
+
+ BUG_ON(column >= BTRFS_LIST_ALL || column < 0);
+
+ switch (column) {
+ case BTRFS_LIST_OBJECTID:
+ printf("%llu", subv->root_id);
+ break;
+ case BTRFS_LIST_GENERATION:
+ printf("%llu", subv->gen);
+ break;
+ case BTRFS_LIST_OGENERATION:
+ printf("%llu", subv->ogen);
+ break;
+ case BTRFS_LIST_PARENT:
+ printf("%llu", subv->ref_tree);
+ break;
+ case BTRFS_LIST_TOP_LEVEL:
+ printf("%llu", subv->top_id);
+ break;
+ case BTRFS_LIST_OTIME:
+ if (subv->otime) {
+ struct tm tm;
+
+ localtime_r(&subv->otime, &tm);
+ strftime(tstr, 256, "%Y-%m-%d %X", &tm);
+ } else
+ strcpy(tstr, "-");
+ printf("%s", tstr);
+ break;
+ case BTRFS_LIST_UUID:
+ if (uuid_is_null(subv->uuid))
+ strcpy(uuidparse, "-");
+ else
+ uuid_unparse(subv->uuid, uuidparse);
+ printf("%s", uuidparse);
+ break;
+ case BTRFS_LIST_PUUID:
+ if (uuid_is_null(subv->puuid))
+ strcpy(uuidparse, "-");
+ else
+ uuid_unparse(subv->puuid, uuidparse);
+ printf("%s", uuidparse);
+ break;
+ case BTRFS_LIST_RUUID:
+ if (uuid_is_null(subv->ruuid))
+ strcpy(uuidparse, "-");
+ else
+ uuid_unparse(subv->ruuid, uuidparse);
+ printf("%s", uuidparse);
+ break;
+ case BTRFS_LIST_PATH:
+ BUG_ON(!subv->full_path);
+ printf("%s", subv->full_path);
+ break;
+ default:
+ break;
+ }
+}
+
+static void print_single_volume_info_raw(struct root_info *subv, char *raw_prefix)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_LIST_ALL; i++) {
+ if (!btrfs_list_columns[i].need_print)
+ continue;
+
+ if (raw_prefix)
+ printf("%s",raw_prefix);
+
+ print_subvolume_column(subv, i);
+ }
+ printf("\n");
+}
+
+static void print_single_volume_info_table(struct root_info *subv)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_LIST_ALL; i++) {
+ if (!btrfs_list_columns[i].need_print)
+ continue;
+
+ print_subvolume_column(subv, i);
+
+ if (i != BTRFS_LIST_PATH)
+ printf("\t");
+
+ if (i == BTRFS_LIST_TOP_LEVEL)
+ printf("\t");
+ }
+ printf("\n");
+}
+
+static void print_single_volume_info_default(struct root_info *subv)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_LIST_ALL; i++) {
+ if (!btrfs_list_columns[i].need_print)
+ continue;
+
+ printf("%s ", btrfs_list_columns[i].name);
+ print_subvolume_column(subv, i);
+
+ if (i != BTRFS_LIST_PATH)
+ printf(" ");
+ }
+ printf("\n");
+}
+
+static void print_all_volume_info_tab_head(void)
+{
+ int i;
+ int len;
+ char barrier[20];
+
+ for (i = 0; i < BTRFS_LIST_ALL; i++) {
+ if (btrfs_list_columns[i].need_print)
+ printf("%s\t", btrfs_list_columns[i].name);
+
+ if (i == BTRFS_LIST_ALL-1)
+ printf("\n");
+ }
+
+ for (i = 0; i < BTRFS_LIST_ALL; i++) {
+ memset(barrier, 0, sizeof(barrier));
+
+ if (btrfs_list_columns[i].need_print) {
+ len = strlen(btrfs_list_columns[i].name);
+ while (len--)
+ strcat(barrier, "-");
+
+ printf("%s\t", barrier);
+ }
+ if (i == BTRFS_LIST_ALL-1)
+ printf("\n");
+ }
+}
+
+static void print_all_volume_info(struct root_lookup *sorted_tree,
+ int layout, char *raw_prefix)
+{
+ struct rb_node *n;
+ struct root_info *entry;
+
+ if (layout == BTRFS_LIST_LAYOUT_TABLE)
+ print_all_volume_info_tab_head();
+
+ n = rb_first(&sorted_tree->root);
+ while (n) {
+ entry = rb_entry(n, struct root_info, sort_node);
+ switch (layout) {
+ case BTRFS_LIST_LAYOUT_DEFAULT:
+ print_single_volume_info_default(entry);
+ break;
+ case BTRFS_LIST_LAYOUT_TABLE:
+ print_single_volume_info_table(entry);
+ break;
+ case BTRFS_LIST_LAYOUT_RAW:
+ print_single_volume_info_raw(entry, raw_prefix);
+ break;
+ }
+ n = rb_next(n);
+ }
+}
+
+static int btrfs_list_subvols(int fd, struct root_lookup *root_lookup)
+{
+ int ret;
+
+ ret = __list_subvol_search(fd, root_lookup);
+ if (ret) {
+ fprintf(stderr, "ERROR: can't perform the search - %s\n",
+ strerror(errno));
+ return ret;
+ }
+
+ /*
+ * now we have an rbtree full of root_info objects, but we need to fill
+ * in their path names within the subvol that is referencing each one.
+ */
+ ret = __list_subvol_fill_paths(fd, root_lookup);
+ return ret;
+}
+
+int btrfs_list_subvols_print(int fd, struct btrfs_list_filter_set *filter_set,
+ struct btrfs_list_comparer_set *comp_set,
+ int layout, int full_path, char *raw_prefix)
+{
+ struct root_lookup root_lookup;
+ struct root_lookup root_sort;
+ int ret = 0;
+ u64 top_id = 0;
+
+ if (full_path)
+ ret = btrfs_list_get_path_rootid(fd, &top_id);
+ if (ret)
+ return ret;
+
+ ret = btrfs_list_subvols(fd, &root_lookup);
+ if (ret)
+ return ret;
+ __filter_and_sort_subvol(&root_lookup, &root_sort, filter_set,
+ comp_set, top_id);
+
+ print_all_volume_info(&root_sort, layout, raw_prefix);
+ __free_all_subvolumn(&root_lookup);
+
+ return 0;
+}
+
+static char *strdup_or_null(const char *s)
+{
+ if (!s)
+ return NULL;
+ return strdup(s);
+}
+
+int btrfs_get_subvol(int fd, struct root_info *the_ri)
+{
+ int ret, rr;
+ struct root_lookup rl;
+ struct rb_node *rbn;
+ struct root_info *ri;
+ u64 root_id;
+
+ ret = btrfs_list_get_path_rootid(fd, &root_id);
+ if (ret)
+ return ret;
+
+ ret = btrfs_list_subvols(fd, &rl);
+ if (ret)
+ return ret;
+
+ rbn = rb_first(&rl.root);
+ while(rbn) {
+ ri = rb_entry(rbn, struct root_info, rb_node);
+ rr = resolve_root(&rl, ri, root_id);
+ if (rr == -ENOENT) {
+ ret = -ENOENT;
+ rbn = rb_next(rbn);
+ continue;
+ }
+ if (!comp_entry_with_rootid(the_ri, ri, 0)) {
+ memcpy(the_ri, ri, offsetof(struct root_info, path));
+ the_ri->path = strdup_or_null(ri->path);
+ the_ri->name = strdup_or_null(ri->name);
+ the_ri->full_path = strdup_or_null(ri->full_path);
+ ret = 0;
+ break;
+ }
+ rbn = rb_next(rbn);
+ }
+ __free_all_subvolumn(&rl);
+ return ret;
+}
+
+static int print_one_extent(int fd, struct btrfs_ioctl_search_header *sh,
+ struct btrfs_file_extent_item *item,
+ u64 found_gen, u64 *cache_dirid,
+ char **cache_dir_name, u64 *cache_ino,
+ char **cache_full_name)
+{
+ u64 len = 0;
+ u64 disk_start = 0;
+ u64 disk_offset = 0;
+ u8 type;
+ int compressed = 0;
+ int flags = 0;
+ char *name = NULL;
+
+ if (sh->objectid == *cache_ino) {
+ name = *cache_full_name;
+ } else if (*cache_full_name) {
+ free(*cache_full_name);
+ *cache_full_name = NULL;
+ }
+ if (!name) {
+ name = ino_resolve(fd, sh->objectid, cache_dirid,
+ cache_dir_name);
+ *cache_full_name = name;
+ *cache_ino = sh->objectid;
+ }
+ if (!name)
+ return -EIO;
+
+ type = btrfs_stack_file_extent_type(item);
+ compressed = btrfs_stack_file_extent_compression(item);
+
+ if (type == BTRFS_FILE_EXTENT_REG ||
+ type == BTRFS_FILE_EXTENT_PREALLOC) {
+ disk_start = btrfs_stack_file_extent_disk_bytenr(item);
+ disk_offset = btrfs_stack_file_extent_offset(item);
+ len = btrfs_stack_file_extent_num_bytes(item);
+ } else if (type == BTRFS_FILE_EXTENT_INLINE) {
+ disk_start = 0;
+ disk_offset = 0;
+ len = btrfs_stack_file_extent_ram_bytes(item);
+ } else {
+ printf("unhandled extent type %d for inode %llu "
+ "file offset %llu gen %llu\n",
+ type,
+ (unsigned long long)sh->objectid,
+ (unsigned long long)sh->offset,
+ (unsigned long long)found_gen);
+
+ return -EIO;
+ }
+ printf("inode %llu file offset %llu len %llu disk start %llu "
+ "offset %llu gen %llu flags ",
+ (unsigned long long)sh->objectid,
+ (unsigned long long)sh->offset,
+ (unsigned long long)len,
+ (unsigned long long)disk_start,
+ (unsigned long long)disk_offset,
+ (unsigned long long)found_gen);
+
+ if (compressed) {
+ printf("COMPRESS");
+ flags++;
+ }
+ if (type == BTRFS_FILE_EXTENT_PREALLOC) {
+ printf("%sPREALLOC", flags ? "|" : "");
+ flags++;
+ }
+ if (type == BTRFS_FILE_EXTENT_INLINE) {
+ printf("%sINLINE", flags ? "|" : "");
+ flags++;
+ }
+ if (!flags)
+ printf("NONE");
+
+ printf(" %s\n", name);
+ return 0;
+}
+
+int btrfs_list_find_updated_files(int fd, u64 root_id, u64 oldest_gen)
+{
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ struct btrfs_ioctl_search_header sh;
+ struct btrfs_file_extent_item *item;
+ unsigned long off = 0;
+ u64 found_gen;
+ u64 max_found = 0;
+ int i;
+ u64 cache_dirid = 0;
+ u64 cache_ino = 0;
+ char *cache_dir_name = NULL;
+ char *cache_full_name = NULL;
+ struct btrfs_file_extent_item backup;
+
+ memset(&backup, 0, sizeof(backup));
+ memset(&args, 0, sizeof(args));
+
+ sk->tree_id = root_id;
+
+ /*
+ * set all the other params to the max, we'll take any objectid
+ * and any trans
+ */
+ sk->max_objectid = (u64)-1;
+ sk->max_offset = (u64)-1;
+ sk->max_transid = (u64)-1;
+ sk->max_type = BTRFS_EXTENT_DATA_KEY;
+ sk->min_transid = oldest_gen;
+ /* just a big number, doesn't matter much */
+ sk->nr_items = 4096;
+
+ max_found = find_root_gen(fd);
+ while(1) {
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: can't perform the search - %s\n",
+ strerror(errno));
+ break;
+ }
+ /* the ioctl returns the number of item it found in nr_items */
+ if (sk->nr_items == 0)
+ break;
+
+ off = 0;
+
+ /*
+ * for each item, pull the key out of the header and then
+ * read the root_ref item it contains
+ */
+ for (i = 0; i < sk->nr_items; i++) {
+ memcpy(&sh, args.buf + off, sizeof(sh));
+ off += sizeof(sh);
+
+ /*
+ * just in case the item was too big, pass something other
+ * than garbage
+ */
+ if (sh.len == 0)
+ item = &backup;
+ else
+ item = (struct btrfs_file_extent_item *)(args.buf +
+ off);
+ found_gen = btrfs_stack_file_extent_generation(item);
+ if (sh.type == BTRFS_EXTENT_DATA_KEY &&
+ found_gen >= oldest_gen) {
+ print_one_extent(fd, &sh, item, found_gen,
+ &cache_dirid, &cache_dir_name,
+ &cache_ino, &cache_full_name);
+ }
+ off += sh.len;
+
+ /*
+ * record the mins in sk so we can make sure the
+ * next search doesn't repeat this root
+ */
+ sk->min_objectid = sh.objectid;
+ sk->min_offset = sh.offset;
+ sk->min_type = sh.type;
+ }
+ sk->nr_items = 4096;
+ if (sk->min_offset < (u64)-1)
+ sk->min_offset++;
+ else if (sk->min_objectid < (u64)-1) {
+ sk->min_objectid++;
+ sk->min_offset = 0;
+ sk->min_type = 0;
+ } else
+ break;
+ }
+ free(cache_dir_name);
+ free(cache_full_name);
+ printf("transid marker was %llu\n", (unsigned long long)max_found);
+ return ret;
+}
+
+char *btrfs_list_path_for_root(int fd, u64 root)
+{
+ struct root_lookup root_lookup;
+ struct rb_node *n;
+ char *ret_path = NULL;
+ int ret;
+ u64 top_id;
+
+ ret = btrfs_list_get_path_rootid(fd, &top_id);
+ if (ret)
+ return ERR_PTR(ret);
+
+ ret = __list_subvol_search(fd, &root_lookup);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ ret = __list_subvol_fill_paths(fd, &root_lookup);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ n = rb_last(&root_lookup.root);
+ while (n) {
+ struct root_info *entry;
+
+ entry = rb_entry(n, struct root_info, rb_node);
+ ret = resolve_root(&root_lookup, entry, top_id);
+ if (ret == -ENOENT && entry->root_id == root) {
+ ret_path = NULL;
+ break;
+ }
+ if (entry->root_id == root) {
+ ret_path = entry->full_path;
+ entry->full_path = NULL;
+ }
+
+ n = rb_prev(n);
+ }
+ __free_all_subvolumn(&root_lookup);
+
+ return ret_path;
+}
+
+int btrfs_list_parse_sort_string(char *opt_arg,
+ struct btrfs_list_comparer_set **comps)
+{
+ int order;
+ int flag;
+ char *p;
+ char **ptr_argv;
+ int what_to_sort;
+
+ while ((p = strtok(opt_arg, ",")) != NULL) {
+ flag = 0;
+ ptr_argv = all_sort_items;
+
+ while (*ptr_argv) {
+ if (strcmp(*ptr_argv, p) == 0) {
+ flag = 1;
+ break;
+ } else {
+ p++;
+ if (strcmp(*ptr_argv, p) == 0) {
+ flag = 1;
+ p--;
+ break;
+ }
+ p--;
+ }
+ ptr_argv++;
+ }
+
+ if (flag == 0)
+ return -1;
+
+ else {
+ if (*p == '+') {
+ order = 0;
+ p++;
+ } else if (*p == '-') {
+ order = 1;
+ p++;
+ } else
+ order = 0;
+
+ what_to_sort = btrfs_list_get_sort_item(p);
+ btrfs_list_setup_comparer(comps, what_to_sort, order);
+ }
+ opt_arg = NULL;
+ }
+
+ return 0;
+}
+
+/*
+ * This function is used to parse the argument of filter condition.
+ *
+ * type is the filter object.
+ */
+int btrfs_list_parse_filter_string(char *opt_arg,
+ struct btrfs_list_filter_set **filters,
+ enum btrfs_list_filter_enum type)
+{
+
+ u64 arg;
+
+ switch (*(opt_arg++)) {
+ case '+':
+ arg = arg_strtou64(opt_arg);
+ type += 2;
+
+ btrfs_list_setup_filter(filters, type, arg);
+ break;
+ case '-':
+ arg = arg_strtou64(opt_arg);
+ type += 1;
+
+ btrfs_list_setup_filter(filters, type, arg);
+ break;
+ default:
+ opt_arg--;
+ arg = arg_strtou64(opt_arg);
+
+ btrfs_list_setup_filter(filters, type, arg);
+ break;
+ }
+
+ return 0;
+}
+
+int btrfs_list_get_path_rootid(int fd, u64 *treeid)
+{
+ int ret;
+ struct btrfs_ioctl_ino_lookup_args args;
+
+ memset(&args, 0, sizeof(args));
+ args.objectid = BTRFS_FIRST_FREE_OBJECTID;
+
+ ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
+ if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: can't perform the search - %s\n",
+ strerror(errno));
+ return ret;
+ }
+ *treeid = args.treeid;
+ return 0;
+}
diff --git a/btrfs-list.h b/btrfs-list.h
new file mode 100644
index 00000000..13f44c3a
--- /dev/null
+++ b/btrfs-list.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2012 FUJITSU LIMITED. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_LIST_H__
+#define __BTRFS_LIST_H__
+
+#if BTRFS_FLAT_INCLUDES
+#include "kerncompat.h"
+#include "rbtree.h"
+#include "ioctl.h"
+#else
+#include <btrfs/kerncompat.h>
+#include <btrfs/rbtree.h>
+#include <btrfs/ioctl.h>
+#endif /* BTRFS_FLAT_INCLUDES */
+
+#include <time.h>
+
+#define BTRFS_LIST_LAYOUT_DEFAULT 0
+#define BTRFS_LIST_LAYOUT_TABLE 1
+#define BTRFS_LIST_LAYOUT_RAW 2
+
+/*
+ * one of these for each root we find.
+ */
+struct root_info {
+ struct rb_node rb_node;
+ struct rb_node sort_node;
+
+ /* this root's id */
+ u64 root_id;
+
+ /* equal the offset of the root's key */
+ u64 root_offset;
+
+ /* flags of the root */
+ u64 flags;
+
+ /* the id of the root that references this one */
+ u64 ref_tree;
+
+ /* the dir id we're in from ref_tree */
+ u64 dir_id;
+
+ u64 top_id;
+
+ /* generation when the root is created or last updated */
+ u64 gen;
+
+ /* creation generation of this root in sec*/
+ u64 ogen;
+
+ /* creation time of this root in sec*/
+ time_t otime;
+
+ u8 uuid[BTRFS_UUID_SIZE];
+ u8 puuid[BTRFS_UUID_SIZE];
+ u8 ruuid[BTRFS_UUID_SIZE];
+
+ /* path from the subvol we live in to this root, including the
+ * root's name. This is null until we do the extra lookup ioctl.
+ */
+ char *path;
+
+ /* the name of this root in the directory it lives in */
+ char *name;
+
+ char *full_path;
+
+ int deleted;
+};
+
+typedef int (*btrfs_list_filter_func)(struct root_info *, u64);
+typedef int (*btrfs_list_comp_func)(struct root_info *, struct root_info *,
+ int);
+
+struct btrfs_list_filter {
+ btrfs_list_filter_func filter_func;
+ u64 data;
+};
+
+struct btrfs_list_comparer {
+ btrfs_list_comp_func comp_func;
+ int is_descending;
+};
+
+struct btrfs_list_filter_set {
+ int total;
+ int nfilters;
+ int only_deleted;
+ struct btrfs_list_filter filters[0];
+};
+
+struct btrfs_list_comparer_set {
+ int total;
+ int ncomps;
+ struct btrfs_list_comparer comps[0];
+};
+
+enum btrfs_list_column_enum {
+ BTRFS_LIST_OBJECTID,
+ BTRFS_LIST_GENERATION,
+ BTRFS_LIST_OGENERATION,
+ BTRFS_LIST_PARENT,
+ BTRFS_LIST_TOP_LEVEL,
+ BTRFS_LIST_OTIME,
+ BTRFS_LIST_PUUID,
+ BTRFS_LIST_RUUID,
+ BTRFS_LIST_UUID,
+ BTRFS_LIST_PATH,
+ BTRFS_LIST_ALL,
+};
+
+enum btrfs_list_filter_enum {
+ BTRFS_LIST_FILTER_ROOTID,
+ BTRFS_LIST_FILTER_SNAPSHOT_ONLY,
+ BTRFS_LIST_FILTER_FLAGS,
+ BTRFS_LIST_FILTER_GEN,
+ BTRFS_LIST_FILTER_GEN_EQUAL = BTRFS_LIST_FILTER_GEN,
+ BTRFS_LIST_FILTER_GEN_LESS,
+ BTRFS_LIST_FILTER_GEN_MORE,
+ BTRFS_LIST_FILTER_CGEN,
+ BTRFS_LIST_FILTER_CGEN_EQUAL = BTRFS_LIST_FILTER_CGEN,
+ BTRFS_LIST_FILTER_CGEN_LESS,
+ BTRFS_LIST_FILTER_CGEN_MORE,
+ BTRFS_LIST_FILTER_TOPID_EQUAL,
+ BTRFS_LIST_FILTER_FULL_PATH,
+ BTRFS_LIST_FILTER_BY_PARENT,
+ BTRFS_LIST_FILTER_DELETED,
+ BTRFS_LIST_FILTER_MAX,
+};
+
+enum btrfs_list_comp_enum {
+ BTRFS_LIST_COMP_ROOTID,
+ BTRFS_LIST_COMP_OGEN,
+ BTRFS_LIST_COMP_GEN,
+ BTRFS_LIST_COMP_PATH,
+ BTRFS_LIST_COMP_MAX,
+};
+
+int btrfs_list_parse_sort_string(char *optarg,
+ struct btrfs_list_comparer_set **comps);
+int btrfs_list_parse_filter_string(char *optarg,
+ struct btrfs_list_filter_set **filters,
+ enum btrfs_list_filter_enum type);
+void btrfs_list_setup_print_column(enum btrfs_list_column_enum column);
+struct btrfs_list_filter_set *btrfs_list_alloc_filter_set(void);
+void btrfs_list_free_filter_set(struct btrfs_list_filter_set *filter_set);
+int btrfs_list_setup_filter(struct btrfs_list_filter_set **filter_set,
+ enum btrfs_list_filter_enum filter, u64 data);
+struct btrfs_list_comparer_set *btrfs_list_alloc_comparer_set(void);
+void btrfs_list_free_comparer_set(struct btrfs_list_comparer_set *comp_set);
+
+int btrfs_list_subvols_print(int fd, struct btrfs_list_filter_set *filter_set,
+ struct btrfs_list_comparer_set *comp_set,
+ int is_tab_result, int full_path, char *raw_prefix);
+int btrfs_list_find_updated_files(int fd, u64 root_id, u64 oldest_gen);
+int btrfs_list_get_default_subvolume(int fd, u64 *default_id);
+char *btrfs_list_path_for_root(int fd, u64 root);
+int btrfs_list_get_path_rootid(int fd, u64 *treeid);
+int btrfs_get_subvol(int fd, struct root_info *the_ri);
+
+#endif
diff --git a/btrfs-map-logical.c b/btrfs-map-logical.c
new file mode 100644
index 00000000..0161b5c6
--- /dev/null
+++ b/btrfs-map-logical.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright (C) 2009 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <getopt.h>
+#include "kerncompat.h"
+#include "ctree.h"
+#include "volumes.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "utils.h"
+
+#define BUFFER_SIZE (64 * 1024)
+
+/* we write the mirror info to stdout unless they are dumping the data
+ * to stdout
+ * */
+static FILE *info_file;
+
+static int map_one_extent(struct btrfs_fs_info *fs_info,
+ u64 *logical_ret, u64 *len_ret, int search_foward)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ u64 logical;
+ u64 len = 0;
+ int ret = 0;
+
+ BUG_ON(!logical_ret);
+ logical = *logical_ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = logical;
+ key.type = 0;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path,
+ 0, 0);
+ if (ret < 0)
+ goto out;
+ BUG_ON(ret == 0);
+ ret = 0;
+
+again:
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if ((search_foward && key.objectid < logical) ||
+ (!search_foward && key.objectid > logical) ||
+ (key.type != BTRFS_EXTENT_ITEM_KEY &&
+ key.type != BTRFS_METADATA_ITEM_KEY)) {
+ if (!search_foward)
+ ret = btrfs_previous_extent_item(fs_info->extent_root,
+ path, 0);
+ else
+ ret = btrfs_next_item(fs_info->extent_root, path);
+ if (ret)
+ goto out;
+ goto again;
+ }
+ logical = key.objectid;
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
+ len = fs_info->tree_root->leafsize;
+ else
+ len = key.offset;
+
+out:
+ btrfs_free_path(path);
+ if (!ret) {
+ *logical_ret = logical;
+ if (len_ret)
+ *len_ret = len;
+ }
+ return ret;
+}
+
+static int __print_mapping_info(struct btrfs_fs_info *fs_info, u64 logical,
+ u64 len, int mirror_num)
+{
+ struct btrfs_multi_bio *multi = NULL;
+ u64 cur_offset = 0;
+ u64 cur_len;
+ int ret = 0;
+
+ while (cur_offset < len) {
+ struct btrfs_device *device;
+ int i;
+
+ cur_len = len - cur_offset;
+ ret = btrfs_map_block(&fs_info->mapping_tree, READ,
+ logical + cur_offset, &cur_len,
+ &multi, mirror_num, NULL);
+ if (ret) {
+ fprintf(info_file,
+ "Error: fails to map mirror%d logical %llu: %s\n",
+ mirror_num, logical, strerror(-ret));
+ return ret;
+ }
+ for (i = 0; i < multi->num_stripes; i++) {
+ device = multi->stripes[i].dev;
+ fprintf(info_file,
+ "mirror %d logical %Lu physical %Lu device %s\n",
+ mirror_num, logical + cur_offset,
+ multi->stripes[0].physical,
+ device->name);
+ }
+ kfree(multi);
+ multi = NULL;
+ cur_offset += cur_len;
+ }
+ return ret;
+}
+
+/*
+ * Logical and len is the exact value of a extent.
+ * And offset is the offset inside the extent. It's only used for case
+ * where user only want to print part of the extent.
+ *
+ * Caller *MUST* ensure the range [logical,logical+len) are in one extent.
+ * Or we can encounter the following case, causing a -ENOENT error:
+ * |<-----given parameter------>|
+ * |<------ Extent A ----->|
+ */
+static int print_mapping_info(struct btrfs_fs_info *fs_info, u64 logical,
+ u64 len)
+{
+ int num_copies;
+ int mirror_num;
+ int ret = 0;
+
+ num_copies = btrfs_num_copies(&fs_info->mapping_tree, logical, len);
+ for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
+ ret = __print_mapping_info(fs_info, logical, len, mirror_num);
+ if (ret < 0)
+ return ret;
+ }
+ return ret;
+}
+
+/* Same requisition as print_mapping_info function */
+static int write_extent_content(struct btrfs_fs_info *fs_info, int out_fd,
+ u64 logical, u64 length, int mirror)
+{
+ char buffer[BUFFER_SIZE];
+ u64 cur_offset = 0;
+ u64 cur_len;
+ int ret = 0;
+
+ while (cur_offset < length) {
+ cur_len = min_t(u64, length - cur_offset, BUFFER_SIZE);
+ ret = read_extent_data(fs_info->tree_root, buffer,
+ logical + cur_offset, &cur_len, mirror);
+ if (ret < 0) {
+ fprintf(stderr,
+ "Failed to read extent at [%llu, %llu]: %s\n",
+ logical, logical + length, strerror(-ret));
+ return ret;
+ }
+ ret = write(out_fd, buffer, cur_len);
+ if (ret < 0 || ret != cur_len) {
+ if (ret > 0)
+ ret = -EINTR;
+ fprintf(stderr, "output file write failed: %s\n",
+ strerror(-ret));
+ return ret;
+ }
+ cur_offset += cur_len;
+ }
+ return ret;
+}
+
+static void print_usage(void) __attribute__((noreturn));
+static void print_usage(void)
+{
+ fprintf(stderr, "usage: btrfs-map-logical [options] device\n");
+ fprintf(stderr, "\t-l Logical extent to map\n");
+ fprintf(stderr, "\t-c Copy of the extent to read (usually 1 or 2)\n");
+ fprintf(stderr, "\t-o Output file to hold the extent\n");
+ fprintf(stderr, "\t-b Number of bytes to read\n");
+ exit(1);
+}
+
+int main(int ac, char **av)
+{
+ struct cache_tree root_cache;
+ struct btrfs_root *root;
+ char *dev;
+ char *output_file = NULL;
+ u64 copy = 0;
+ u64 logical = 0;
+ u64 bytes = 0;
+ u64 cur_logical = 0;
+ u64 cur_len = 0;
+ int out_fd = -1;
+ int found = 0;
+ int ret = 0;
+
+ while(1) {
+ int c;
+ static const struct option long_options[] = {
+ /* { "byte-count", 1, NULL, 'b' }, */
+ { "logical", required_argument, NULL, 'l' },
+ { "copy", required_argument, NULL, 'c' },
+ { "output", required_argument, NULL, 'o' },
+ { "bytes", required_argument, NULL, 'b' },
+ { NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long(ac, av, "l:c:o:b:", long_options, NULL);
+ if (c < 0)
+ break;
+ switch(c) {
+ case 'l':
+ logical = arg_strtou64(optarg);
+ break;
+ case 'c':
+ copy = arg_strtou64(optarg);
+ break;
+ case 'b':
+ bytes = arg_strtou64(optarg);
+ break;
+ case 'o':
+ output_file = strdup(optarg);
+ break;
+ default:
+ print_usage();
+ }
+ }
+ set_argv0(av);
+ ac = ac - optind;
+ if (check_argc_min(ac, 1))
+ print_usage();
+ if (logical == 0)
+ print_usage();
+
+ dev = av[optind];
+
+ radix_tree_init();
+ cache_tree_init(&root_cache);
+
+ root = open_ctree(dev, 0, 0);
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ free(output_file);
+ exit(1);
+ }
+
+ info_file = stdout;
+ if (output_file) {
+ if (strcmp(output_file, "-") == 0) {
+ out_fd = 1;
+ info_file = stderr;
+ } else {
+ out_fd = open(output_file, O_RDWR | O_CREAT, 0600);
+ if (out_fd < 0)
+ goto close;
+ ret = ftruncate(out_fd, 0);
+ if (ret) {
+ ret = 1;
+ close(out_fd);
+ goto close;
+ }
+ info_file = stdout;
+ }
+ }
+
+ if (bytes == 0)
+ bytes = root->nodesize;
+ cur_logical = logical;
+ cur_len = bytes;
+
+ /* First find the nearest extent */
+ ret = map_one_extent(root->fs_info, &cur_logical, &cur_len, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to find extent at [%llu,%llu): %s\n",
+ cur_logical, cur_logical + cur_len, strerror(-ret));
+ goto out_close_fd;
+ }
+ /*
+ * Normally, search backward should be OK, but for special case like
+ * given logical is quite small where no extents are before it,
+ * we need to search forward.
+ */
+ if (ret > 0) {
+ ret = map_one_extent(root->fs_info, &cur_logical, &cur_len, 1);
+ if (ret < 0) {
+ fprintf(stderr,
+ "Failed to find extent at [%llu,%llu): %s\n",
+ cur_logical, cur_logical + cur_len,
+ strerror(-ret));
+ goto out_close_fd;
+ }
+ if (ret > 0) {
+ fprintf(stderr,
+ "Failed to find any extent at [%llu,%llu)\n",
+ cur_logical, cur_logical + cur_len);
+ goto out_close_fd;
+ }
+ }
+
+ while (cur_logical + cur_len >= logical && cur_logical < logical +
+ bytes) {
+ u64 real_logical;
+ u64 real_len;
+
+ found = 1;
+ ret = map_one_extent(root->fs_info, &cur_logical, &cur_len, 1);
+ if (ret < 0)
+ goto out_close_fd;
+ if (ret > 0)
+ break;
+ real_logical = max(logical, cur_logical);
+ real_len = min(logical + bytes, cur_logical + cur_len) -
+ real_logical;
+
+ ret = print_mapping_info(root->fs_info, real_logical, real_len);
+ if (ret < 0)
+ goto out_close_fd;
+ if (output_file && out_fd != -1) {
+ ret = write_extent_content(root->fs_info, out_fd,
+ real_logical, real_len, copy);
+ if (ret < 0)
+ goto out_close_fd;
+ }
+
+ cur_logical += cur_len;
+ }
+
+ if (!found) {
+ fprintf(stderr, "No extent found at range [%llu,%llu)\n",
+ logical, logical + bytes);
+ }
+out_close_fd:
+ if (output_file && out_fd != 1)
+ close(out_fd);
+close:
+ free(output_file);
+ close_ctree(root);
+ if (ret < 0)
+ ret = 1;
+ btrfs_close_all_devices();
+ return ret;
+}
diff --git a/btrfs-select-super.c b/btrfs-select-super.c
new file mode 100644
index 00000000..df741532
--- /dev/null
+++ b/btrfs-select-super.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include "kerncompat.h"
+#include "ctree.h"
+#include "volumes.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "utils.h"
+
+static void print_usage(void)
+{
+ fprintf(stderr, "usage: btrfs-select-super -s number dev\n");
+ fprintf(stderr, "%s\n", PACKAGE_STRING);
+ exit(1);
+}
+
+int main(int ac, char **av)
+{
+ struct btrfs_root *root;
+ int ret;
+ u64 num = 0;
+ u64 bytenr = 0;
+
+ while(1) {
+ int c;
+ c = getopt(ac, av, "s:");
+ if (c < 0)
+ break;
+ switch(c) {
+ case 's':
+ num = arg_strtou64(optarg);
+ if (num >= BTRFS_SUPER_MIRROR_MAX) {
+ fprintf(stderr,
+ "ERROR: super mirror should be less than: %d\n",
+ BTRFS_SUPER_MIRROR_MAX);
+ exit(1);
+ }
+ bytenr = btrfs_sb_offset(((int)num));
+ break;
+ default:
+ print_usage();
+ }
+ }
+ set_argv0(av);
+ ac = ac - optind;
+
+ if (check_argc_exact(ac, 1))
+ print_usage();
+
+ if (bytenr == 0) {
+ fprintf(stderr, "Please select the super copy with -s\n");
+ print_usage();
+ }
+
+ radix_tree_init();
+
+ if((ret = check_mounted(av[optind])) < 0) {
+ fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
+ return ret;
+ } else if(ret) {
+ fprintf(stderr, "%s is currently mounted. Aborting.\n", av[optind]);
+ return -EBUSY;
+ }
+
+ root = open_ctree(av[optind], bytenr, 1);
+
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ return 1;
+ }
+
+ /* make the super writing code think we've read the first super */
+ root->fs_info->super_bytenr = BTRFS_SUPER_INFO_OFFSET;
+ ret = write_all_supers(root);
+
+ /* we don't close the ctree or anything, because we don't want a real
+ * transaction commit. We just want the super copy we pulled off the
+ * disk to overwrite all the other copies
+ */
+ printf("using SB copy %llu, bytenr %llu\n", (unsigned long long)num,
+ (unsigned long long)bytenr);
+ close_ctree(root);
+ btrfs_close_all_devices();
+ return ret;
+}
diff --git a/btrfs-show-super.c b/btrfs-show-super.c
new file mode 100644
index 00000000..051bd115
--- /dev/null
+++ b/btrfs-show-super.c
@@ -0,0 +1,542 @@
+/*
+ * Copyright (C) 2012 STRATO AG. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <ctype.h>
+#include <uuid/uuid.h>
+#include <errno.h>
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "utils.h"
+#include "crc32c.h"
+
+static void print_usage(void);
+static void dump_superblock(struct btrfs_super_block *sb, int full);
+int main(int argc, char **argv);
+static int load_and_dump_sb(char *, int fd, u64 sb_bytenr, int full, int force);
+
+
+static void print_usage(void)
+{
+ fprintf(stderr,
+ "usage: btrfs-show-super [-i super_mirror|-a|-f|-F] dev [dev..]\n");
+ fprintf(stderr, "\t-f : print full superblock information\n");
+ fprintf(stderr, "\t-a : print information of all superblocks\n");
+ fprintf(stderr, "\t-i <super_mirror> : specify which mirror to print out\n");
+ fprintf(stderr, "\t-F : attempt to dump superblocks with bad magic\n");
+ fprintf(stderr, "\t-s <bytenr> : specify alternate superblock offset\n");
+ fprintf(stderr, "%s\n", PACKAGE_STRING);
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+ int all = 0;
+ int full = 0;
+ int force = 0;
+ char *filename;
+ int fd = -1;
+ int i;
+ u64 arg;
+ u64 sb_bytenr = btrfs_sb_offset(0);
+
+ while ((opt = getopt(argc, argv, "fFai:s:")) != -1) {
+ switch (opt) {
+ case 'i':
+ arg = arg_strtou64(optarg);
+ if (arg >= BTRFS_SUPER_MIRROR_MAX) {
+ fprintf(stderr,
+ "Illegal super_mirror %llu\n",
+ arg);
+ print_usage();
+ exit(1);
+ }
+ sb_bytenr = btrfs_sb_offset(arg);
+ break;
+
+ case 'a':
+ all = 1;
+ break;
+ case 'f':
+ full = 1;
+ break;
+ case 'F':
+ force = 1;
+ break;
+ case 's':
+ sb_bytenr = arg_strtou64(optarg);
+ all = 0;
+ break;
+ default:
+ print_usage();
+ exit(1);
+ }
+ }
+
+ set_argv0(argv);
+ if (check_argc_min(argc - optind, 1)) {
+ print_usage();
+ exit(1);
+ }
+
+ for (i = optind; i < argc; i++) {
+ filename = argv[i];
+ fd = open(filename, O_RDONLY, 0666);
+ if (fd < 0) {
+ fprintf(stderr, "Could not open %s\n", filename);
+ exit(1);
+ }
+
+ if (all) {
+ int idx;
+ for (idx = 0; idx < BTRFS_SUPER_MIRROR_MAX; idx++) {
+ sb_bytenr = btrfs_sb_offset(idx);
+ if (load_and_dump_sb(filename, fd,
+ sb_bytenr, full, force)) {
+ close(fd);
+ exit(1);
+ }
+
+ putchar('\n');
+ }
+ } else {
+ load_and_dump_sb(filename, fd, sb_bytenr, full, force);
+ putchar('\n');
+ }
+ close(fd);
+ }
+
+ exit(0);
+}
+
+static int load_and_dump_sb(char *filename, int fd, u64 sb_bytenr, int full,
+ int force)
+{
+ u8 super_block_data[BTRFS_SUPER_INFO_SIZE];
+ struct btrfs_super_block *sb;
+ u64 ret;
+
+ sb = (struct btrfs_super_block *)super_block_data;
+
+ ret = pread64(fd, super_block_data, BTRFS_SUPER_INFO_SIZE, sb_bytenr);
+ if (ret != BTRFS_SUPER_INFO_SIZE) {
+ /* check if the disk if too short for further superblock */
+ if (ret == 0 && errno == 0)
+ return 0;
+
+ fprintf(stderr,
+ "ERROR: Failed to read the superblock on %s at %llu\n",
+ filename, (unsigned long long)sb_bytenr);
+ fprintf(stderr,
+ "ERROR: error = '%s', errno = %d\n", strerror(errno), errno);
+ return 1;
+ }
+ printf("superblock: bytenr=%llu, device=%s\n", sb_bytenr, filename);
+ printf("---------------------------------------------------------\n");
+ if (btrfs_super_magic(sb) != BTRFS_MAGIC && !force) {
+ fprintf(stderr,
+ "ERROR: bad magic on superblock on %s at %llu\n",
+ filename, (unsigned long long)sb_bytenr);
+ } else {
+ dump_superblock(sb, full);
+ }
+ return 0;
+}
+
+static int check_csum_sblock(void *sb, int csum_size)
+{
+ char result[BTRFS_CSUM_SIZE];
+ u32 crc = ~(u32)0;
+
+ crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE,
+ crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+ btrfs_csum_final(crc, result);
+
+ return !memcmp(sb, &result, csum_size);
+}
+
+static void print_sys_chunk_array(struct btrfs_super_block *sb)
+{
+ struct extent_buffer *buf;
+ struct btrfs_disk_key *disk_key;
+ struct btrfs_chunk *chunk;
+ u8 *array_ptr;
+ unsigned long sb_array_offset;
+ u32 num_stripes;
+ u32 array_size;
+ u32 len = 0;
+ u32 cur_offset;
+ struct btrfs_key key;
+ int item;
+
+ buf = malloc(sizeof(*buf) + sizeof(*sb));
+ if (!buf) {
+ fprintf(stderr, "%s\n", strerror(ENOMEM));
+ exit(1);
+ }
+ write_extent_buffer(buf, sb, 0, sizeof(*sb));
+ array_size = btrfs_super_sys_array_size(sb);
+
+ array_ptr = sb->sys_chunk_array;
+ sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
+ cur_offset = 0;
+ item = 0;
+
+ while (cur_offset < array_size) {
+ disk_key = (struct btrfs_disk_key *)array_ptr;
+ len = sizeof(*disk_key);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
+
+ btrfs_disk_key_to_cpu(&key, disk_key);
+
+ array_ptr += len;
+ sb_array_offset += len;
+ cur_offset += len;
+
+ printf("\titem %d ", item);
+ btrfs_print_key(disk_key);
+ putchar('\n');
+
+ if (key.type == BTRFS_CHUNK_ITEM_KEY) {
+ chunk = (struct btrfs_chunk *)sb_array_offset;
+ /*
+ * At least one btrfs_chunk with one stripe must be
+ * present, exact stripe count check comes afterwards
+ */
+ len = btrfs_chunk_item_size(1);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
+
+ print_chunk(buf, chunk);
+ num_stripes = btrfs_chunk_num_stripes(buf, chunk);
+ if (!num_stripes) {
+ printk(
+ "ERROR: invalid number of stripes %u in sys_array at offset %u\n",
+ num_stripes, cur_offset);
+ break;
+ }
+ len = btrfs_chunk_item_size(num_stripes);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
+ } else {
+ printk(
+ "ERROR: unexpected item type %u in sys_array at offset %u\n",
+ (u32)key.type, cur_offset);
+ break;
+ }
+ array_ptr += len;
+ sb_array_offset += len;
+ cur_offset += len;
+
+ item++;
+ }
+
+ free(buf);
+ return;
+
+out_short_read:
+ printk("ERROR: sys_array too short to read %u bytes at offset %u\n",
+ len, cur_offset);
+ free(buf);
+}
+
+static int empty_backup(struct btrfs_root_backup *backup)
+{
+ if (backup == NULL ||
+ (backup->tree_root == 0 &&
+ backup->tree_root_gen == 0))
+ return 1;
+ return 0;
+}
+
+static void print_root_backup(struct btrfs_root_backup *backup)
+{
+ printf("\t\tbackup_tree_root:\t%llu\tgen: %llu\tlevel: %d\n",
+ btrfs_backup_tree_root(backup),
+ btrfs_backup_tree_root_gen(backup),
+ btrfs_backup_tree_root_level(backup));
+ printf("\t\tbackup_chunk_root:\t%llu\tgen: %llu\tlevel: %d\n",
+ btrfs_backup_chunk_root(backup),
+ btrfs_backup_chunk_root_gen(backup),
+ btrfs_backup_chunk_root_level(backup));
+ printf("\t\tbackup_extent_root:\t%llu\tgen: %llu\tlevel: %d\n",
+ btrfs_backup_extent_root(backup),
+ btrfs_backup_extent_root_gen(backup),
+ btrfs_backup_extent_root_level(backup));
+ printf("\t\tbackup_fs_root:\t\t%llu\tgen: %llu\tlevel: %d\n",
+ btrfs_backup_fs_root(backup),
+ btrfs_backup_fs_root_gen(backup),
+ btrfs_backup_fs_root_level(backup));
+ printf("\t\tbackup_dev_root:\t%llu\tgen: %llu\tlevel: %d\n",
+ btrfs_backup_dev_root(backup),
+ btrfs_backup_dev_root_gen(backup),
+ btrfs_backup_dev_root_level(backup));
+ printf("\t\tbackup_csum_root:\t%llu\tgen: %llu\tlevel: %d\n",
+ btrfs_backup_csum_root(backup),
+ btrfs_backup_csum_root_gen(backup),
+ btrfs_backup_csum_root_level(backup));
+
+ printf("\t\tbackup_total_bytes:\t%llu\n",
+ btrfs_backup_total_bytes(backup));
+ printf("\t\tbackup_bytes_used:\t%llu\n",
+ btrfs_backup_bytes_used(backup));
+ printf("\t\tbackup_num_devices:\t%llu\n",
+ btrfs_backup_num_devices(backup));
+ putchar('\n');
+}
+
+static void print_backup_roots(struct btrfs_super_block *sb)
+{
+ struct btrfs_root_backup *backup;
+ int i;
+
+ for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
+ backup = sb->super_roots + i;
+ if (!empty_backup(backup)) {
+ printf("\tbackup %d:\n", i);
+ print_root_backup(backup);
+ }
+ }
+}
+
+struct readable_flag_entry {
+ u64 bit;
+ char *output;
+};
+
+#define DEF_INCOMPAT_FLAG_ENTRY(bit_name) \
+ {BTRFS_FEATURE_INCOMPAT_##bit_name, #bit_name}
+
+static struct readable_flag_entry incompat_flags_array[] = {
+ DEF_INCOMPAT_FLAG_ENTRY(MIXED_BACKREF),
+ DEF_INCOMPAT_FLAG_ENTRY(DEFAULT_SUBVOL),
+ DEF_INCOMPAT_FLAG_ENTRY(MIXED_GROUPS),
+ DEF_INCOMPAT_FLAG_ENTRY(COMPRESS_LZO),
+ DEF_INCOMPAT_FLAG_ENTRY(COMPRESS_LZOv2),
+ DEF_INCOMPAT_FLAG_ENTRY(BIG_METADATA),
+ DEF_INCOMPAT_FLAG_ENTRY(EXTENDED_IREF),
+ DEF_INCOMPAT_FLAG_ENTRY(RAID56),
+ DEF_INCOMPAT_FLAG_ENTRY(SKINNY_METADATA),
+ DEF_INCOMPAT_FLAG_ENTRY(NO_HOLES)
+};
+static const int incompat_flags_num = sizeof(incompat_flags_array) /
+ sizeof(struct readable_flag_entry);
+
+#define DEF_HEADER_FLAG_ENTRY(bit_name) \
+ {BTRFS_HEADER_FLAG_##bit_name, #bit_name}
+#define DEF_SUPER_FLAG_ENTRY(bit_name) \
+ {BTRFS_SUPER_FLAG_##bit_name, #bit_name}
+
+static struct readable_flag_entry super_flags_array[] = {
+ DEF_HEADER_FLAG_ENTRY(WRITTEN),
+ DEF_HEADER_FLAG_ENTRY(RELOC),
+ DEF_SUPER_FLAG_ENTRY(CHANGING_FSID),
+ DEF_SUPER_FLAG_ENTRY(SEEDING),
+ DEF_SUPER_FLAG_ENTRY(METADUMP),
+ DEF_SUPER_FLAG_ENTRY(METADUMP_V2)
+};
+static const int super_flags_num = ARRAY_SIZE(super_flags_array);
+
+#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\
+ BTRFS_HEADER_FLAG_RELOC |\
+ BTRFS_SUPER_FLAG_CHANGING_FSID |\
+ BTRFS_SUPER_FLAG_SEEDING |\
+ BTRFS_SUPER_FLAG_METADUMP |\
+ BTRFS_SUPER_FLAG_METADUMP_V2)
+
+static void __print_readable_flag(u64 flag, struct readable_flag_entry *array,
+ int array_size, u64 supported_flags)
+{
+ int i;
+ int first = 1;
+ struct readable_flag_entry *entry;
+
+ if (!flag)
+ return;
+
+ printf("\t\t\t( ");
+ for (i = 0; i < array_size; i++) {
+ entry = array + i;
+ if (flag & entry->bit) {
+ if (first)
+ printf("%s ", entry->output);
+ else
+ printf("|\n\t\t\t %s ", entry->output);
+ first = 0;
+ }
+ }
+ flag &= ~supported_flags;
+ if (flag) {
+ if (first)
+ printf("unknown flag: 0x%llx ", flag);
+ else
+ printf("|\n\t\t\t unknown flag: 0x%llx ", flag);
+ }
+ printf(")\n");
+}
+
+static void print_readable_incompat_flag(u64 flag)
+{
+ return __print_readable_flag(flag, incompat_flags_array,
+ incompat_flags_num,
+ BTRFS_FEATURE_INCOMPAT_SUPP);
+}
+
+static void print_readable_super_flag(u64 flag)
+{
+ return __print_readable_flag(flag, super_flags_array,
+ super_flags_num, BTRFS_SUPER_FLAG_SUPP);
+}
+
+static void dump_superblock(struct btrfs_super_block *sb, int full)
+{
+ int i;
+ char *s, buf[BTRFS_UUID_UNPARSED_SIZE];
+ u8 *p;
+
+ printf("csum\t\t\t0x");
+ for (i = 0, p = sb->csum; i < btrfs_super_csum_size(sb); i++)
+ printf("%02x", p[i]);
+ if (check_csum_sblock(sb, btrfs_super_csum_size(sb)))
+ printf(" [match]");
+ else
+ printf(" [DON'T MATCH]");
+ putchar('\n');
+
+ printf("bytenr\t\t\t%llu\n",
+ (unsigned long long)btrfs_super_bytenr(sb));
+ printf("flags\t\t\t0x%llx\n",
+ (unsigned long long)btrfs_super_flags(sb));
+ print_readable_super_flag(btrfs_super_flags(sb));
+
+ printf("magic\t\t\t");
+ s = (char *) &sb->magic;
+ for (i = 0; i < 8; i++)
+ putchar(isprint(s[i]) ? s[i] : '.');
+ if (btrfs_super_magic(sb) == BTRFS_MAGIC)
+ printf(" [match]\n");
+ else
+ printf(" [DON'T MATCH]\n");
+
+ uuid_unparse(sb->fsid, buf);
+ printf("fsid\t\t\t%s\n", buf);
+
+ printf("label\t\t\t");
+ s = sb->label;
+ for (i = 0; i < BTRFS_LABEL_SIZE && s[i]; i++)
+ putchar(isprint(s[i]) ? s[i] : '.');
+ putchar('\n');
+
+ printf("generation\t\t%llu\n",
+ (unsigned long long)btrfs_super_generation(sb));
+ printf("root\t\t\t%llu\n", (unsigned long long)btrfs_super_root(sb));
+ printf("sys_array_size\t\t%llu\n",
+ (unsigned long long)btrfs_super_sys_array_size(sb));
+ printf("chunk_root_generation\t%llu\n",
+ (unsigned long long)btrfs_super_chunk_root_generation(sb));
+ printf("root_level\t\t%llu\n",
+ (unsigned long long)btrfs_super_root_level(sb));
+ printf("chunk_root\t\t%llu\n",
+ (unsigned long long)btrfs_super_chunk_root(sb));
+ printf("chunk_root_level\t%llu\n",
+ (unsigned long long)btrfs_super_chunk_root_level(sb));
+ printf("log_root\t\t%llu\n",
+ (unsigned long long)btrfs_super_log_root(sb));
+ printf("log_root_transid\t%llu\n",
+ (unsigned long long)btrfs_super_log_root_transid(sb));
+ printf("log_root_level\t\t%llu\n",
+ (unsigned long long)btrfs_super_log_root_level(sb));
+ printf("total_bytes\t\t%llu\n",
+ (unsigned long long)btrfs_super_total_bytes(sb));
+ printf("bytes_used\t\t%llu\n",
+ (unsigned long long)btrfs_super_bytes_used(sb));
+ printf("sectorsize\t\t%llu\n",
+ (unsigned long long)btrfs_super_sectorsize(sb));
+ printf("nodesize\t\t%llu\n",
+ (unsigned long long)btrfs_super_nodesize(sb));
+ printf("leafsize\t\t%llu\n",
+ (unsigned long long)btrfs_super_leafsize(sb));
+ printf("stripesize\t\t%llu\n",
+ (unsigned long long)btrfs_super_stripesize(sb));
+ printf("root_dir\t\t%llu\n",
+ (unsigned long long)btrfs_super_root_dir(sb));
+ printf("num_devices\t\t%llu\n",
+ (unsigned long long)btrfs_super_num_devices(sb));
+ printf("compat_flags\t\t0x%llx\n",
+ (unsigned long long)btrfs_super_compat_flags(sb));
+ printf("compat_ro_flags\t\t0x%llx\n",
+ (unsigned long long)btrfs_super_compat_ro_flags(sb));
+ printf("incompat_flags\t\t0x%llx\n",
+ (unsigned long long)btrfs_super_incompat_flags(sb));
+ print_readable_incompat_flag(btrfs_super_incompat_flags(sb));
+ printf("csum_type\t\t%llu\n",
+ (unsigned long long)btrfs_super_csum_type(sb));
+ printf("csum_size\t\t%llu\n",
+ (unsigned long long)btrfs_super_csum_size(sb));
+ printf("cache_generation\t%llu\n",
+ (unsigned long long)btrfs_super_cache_generation(sb));
+ printf("uuid_tree_generation\t%llu\n",
+ (unsigned long long)btrfs_super_uuid_tree_generation(sb));
+
+ uuid_unparse(sb->dev_item.uuid, buf);
+ printf("dev_item.uuid\t\t%s\n", buf);
+
+ uuid_unparse(sb->dev_item.fsid, buf);
+ printf("dev_item.fsid\t\t%s %s\n", buf,
+ !memcmp(sb->dev_item.fsid, sb->fsid, BTRFS_FSID_SIZE) ?
+ "[match]" : "[DON'T MATCH]");
+
+ printf("dev_item.type\t\t%llu\n", (unsigned long long)
+ btrfs_stack_device_type(&sb->dev_item));
+ printf("dev_item.total_bytes\t%llu\n", (unsigned long long)
+ btrfs_stack_device_total_bytes(&sb->dev_item));
+ printf("dev_item.bytes_used\t%llu\n", (unsigned long long)
+ btrfs_stack_device_bytes_used(&sb->dev_item));
+ printf("dev_item.io_align\t%u\n", (unsigned int)
+ btrfs_stack_device_io_align(&sb->dev_item));
+ printf("dev_item.io_width\t%u\n", (unsigned int)
+ btrfs_stack_device_io_width(&sb->dev_item));
+ printf("dev_item.sector_size\t%u\n", (unsigned int)
+ btrfs_stack_device_sector_size(&sb->dev_item));
+ printf("dev_item.devid\t\t%llu\n",
+ btrfs_stack_device_id(&sb->dev_item));
+ printf("dev_item.dev_group\t%u\n", (unsigned int)
+ btrfs_stack_device_group(&sb->dev_item));
+ printf("dev_item.seek_speed\t%u\n", (unsigned int)
+ btrfs_stack_device_seek_speed(&sb->dev_item));
+ printf("dev_item.bandwidth\t%u\n", (unsigned int)
+ btrfs_stack_device_bandwidth(&sb->dev_item));
+ printf("dev_item.generation\t%llu\n", (unsigned long long)
+ btrfs_stack_device_generation(&sb->dev_item));
+ if (full) {
+ printf("sys_chunk_array[%d]:\n", BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
+ print_sys_chunk_array(sb);
+ printf("backup_roots[%d]:\n", BTRFS_NUM_BACKUP_ROOTS);
+ print_backup_roots(sb);
+ }
+}
diff --git a/btrfs-zero-log.c b/btrfs-zero-log.c
new file mode 100644
index 00000000..058e9b19
--- /dev/null
+++ b/btrfs-zero-log.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "kerncompat.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "utils.h"
+
+__attribute__((noreturn)) static void print_usage(void)
+{
+ fprintf(stderr, "usage: btrfs-zero-log dev\n");
+ exit(1);
+}
+
+int main(int ac, char **av)
+{
+ struct btrfs_root *root;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_super_block *sb;
+ int ret;
+
+ set_argv0(av);
+ if (check_argc_exact(ac, 2))
+ print_usage();
+
+ radix_tree_init();
+
+ printf("WARNING: this utility is deprecated, please use 'btrfs rescue zero-log'\n\n");
+
+ if ((ret = check_mounted(av[1])) < 0) {
+ fprintf(stderr, "ERROR: could not check mount status: %s\n", strerror(-ret));
+ goto out;
+ } else if (ret) {
+ fprintf(stderr, "ERROR: %s is currently mounted\n", av[1]);
+ ret = -EBUSY;
+ goto out;
+ }
+
+ root = open_ctree(av[1], 0, OPEN_CTREE_WRITES | OPEN_CTREE_PARTIAL);
+ if (!root) {
+ fprintf(stderr, "ERROR: cannot open ctree\n");
+ return 1;
+ }
+
+ sb = root->fs_info->super_copy;
+ printf("Clearing log on %s, previous log_root %llu, level %u\n",
+ av[1],
+ (unsigned long long)btrfs_super_log_root(sb),
+ (unsigned)btrfs_super_log_root_level(sb));
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_super_log_root(root->fs_info->super_copy, 0);
+ btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
+ btrfs_commit_transaction(trans, root);
+ close_ctree(root);
+out:
+ return !!ret;
+}
diff --git a/btrfs.c b/btrfs.c
new file mode 100644
index 00000000..cc705153
--- /dev/null
+++ b/btrfs.c
@@ -0,0 +1,248 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "volumes.h"
+#include "crc32c.h"
+#include "commands.h"
+#include "utils.h"
+
+static const char * const btrfs_cmd_group_usage[] = {
+ "btrfs [--help] [--version] <group> [<group>...] <command> [<args>]",
+ NULL
+};
+
+static const char btrfs_cmd_group_info[] =
+ "Use --help as an argument for information on a specific group or command.";
+
+static inline const char *skip_prefix(const char *str, const char *prefix)
+{
+ size_t len = strlen(prefix);
+ return strncmp(str, prefix, len) ? NULL : str + len;
+}
+
+static int parse_one_token(const char *arg, const struct cmd_group *grp,
+ const struct cmd_struct **cmd_ret)
+{
+ const struct cmd_struct *cmd = grp->commands;
+ const struct cmd_struct *abbrev_cmd = NULL, *ambiguous_cmd = NULL;
+
+ for (; cmd->token; cmd++) {
+ const char *rest;
+
+ rest = skip_prefix(arg, cmd->token);
+ if (!rest) {
+ if (!prefixcmp(cmd->token, arg)) {
+ if (abbrev_cmd) {
+ /*
+ * If this is abbreviated, it is
+ * ambiguous. So when there is no
+ * exact match later, we need to
+ * error out.
+ */
+ ambiguous_cmd = abbrev_cmd;
+ }
+ abbrev_cmd = cmd;
+ }
+ continue;
+ }
+ if (*rest)
+ continue;
+
+ *cmd_ret = cmd;
+ return 0;
+ }
+
+ if (ambiguous_cmd)
+ return -2;
+
+ if (abbrev_cmd) {
+ *cmd_ret = abbrev_cmd;
+ return 0;
+ }
+
+ return -1;
+}
+
+static const struct cmd_struct *
+parse_command_token(const char *arg, const struct cmd_group *grp)
+{
+ const struct cmd_struct *cmd = NULL;
+
+ switch(parse_one_token(arg, grp, &cmd)) {
+ case -1:
+ help_unknown_token(arg, grp);
+ case -2:
+ help_ambiguous_token(arg, grp);
+ }
+
+ return cmd;
+}
+
+static void handle_help_options_next_level(const struct cmd_struct *cmd,
+ int argc, char **argv)
+{
+ if (argc < 2)
+ return;
+
+ if (!strcmp(argv[1], "--help")) {
+ if (cmd->next) {
+ argc--;
+ argv++;
+ help_command_group(cmd->next, argc, argv);
+ } else {
+ usage_command(cmd, 1, 0);
+ }
+
+ exit(0);
+ }
+}
+
+int handle_command_group(const struct cmd_group *grp, int argc,
+ char **argv)
+
+{
+ const struct cmd_struct *cmd;
+
+ argc--;
+ argv++;
+ if (argc < 1) {
+ usage_command_group(grp, 0, 0);
+ exit(1);
+ }
+
+ cmd = parse_command_token(argv[0], grp);
+
+ handle_help_options_next_level(cmd, argc, argv);
+
+ fixup_argv0(argv, cmd->token);
+ return cmd->fn(argc, argv);
+}
+
+static const struct cmd_group btrfs_cmd_group;
+
+static const char * const cmd_help_usage[] = {
+ "btrfs help [--full]",
+ "Display help information",
+ "",
+ "--full display detailed help on every command",
+ NULL
+};
+
+static int cmd_help(int argc, char **argv)
+{
+ help_command_group(&btrfs_cmd_group, argc, argv);
+ return 0;
+}
+
+static const char * const cmd_version_usage[] = {
+ "btrfs version",
+ "Display btrfs-progs version",
+ NULL
+};
+
+static int cmd_version(int argc, char **argv)
+{
+ printf("%s\n", PACKAGE_STRING);
+ return 0;
+}
+
+static void check_options(int argc, char **argv)
+{
+ const char *arg;
+
+ if (argc == 0)
+ return;
+
+ arg = argv[0];
+
+ if (arg[0] != '-' ||
+ !strcmp(arg, "--help") ||
+ !strcmp(arg, "--version"))
+ return;
+
+ fprintf(stderr, "Unknown option: %s\n", arg);
+ fprintf(stderr, "usage: %s\n",
+ btrfs_cmd_group.usagestr[0]);
+ exit(129);
+}
+
+static const struct cmd_group btrfs_cmd_group = {
+ btrfs_cmd_group_usage, btrfs_cmd_group_info, {
+ { "subvolume", cmd_subvolume, NULL, &subvolume_cmd_group, 0 },
+ { "filesystem", cmd_filesystem, NULL, &filesystem_cmd_group, 0 },
+ { "balance", cmd_balance, NULL, &balance_cmd_group, 0 },
+ { "device", cmd_device, NULL, &device_cmd_group, 0 },
+ { "scrub", cmd_scrub, NULL, &scrub_cmd_group, 0 },
+ { "check", cmd_check, cmd_check_usage, NULL, 0 },
+ { "rescue", cmd_rescue, NULL, &rescue_cmd_group, 0 },
+ { "restore", cmd_restore, cmd_restore_usage, NULL, 0 },
+ { "inspect-internal", cmd_inspect, NULL, &inspect_cmd_group, 0 },
+ { "property", cmd_property, NULL, &property_cmd_group, 0 },
+ { "send", cmd_send, cmd_send_usage, NULL, 0 },
+ { "receive", cmd_receive, cmd_receive_usage, NULL, 0 },
+ { "quota", cmd_quota, NULL, &quota_cmd_group, 0 },
+ { "qgroup", cmd_qgroup, NULL, &qgroup_cmd_group, 0 },
+ { "replace", cmd_replace, NULL, &replace_cmd_group, 0 },
+ { "help", cmd_help, cmd_help_usage, NULL, 0 },
+ { "version", cmd_version, cmd_version_usage, NULL, 0 },
+ NULL_CMD_STRUCT
+ },
+};
+
+int main(int argc, char **argv)
+{
+ const struct cmd_struct *cmd;
+ const char *bname;
+ int ret;
+
+ if ((bname = strrchr(argv[0], '/')) != NULL)
+ bname++;
+ else
+ bname = argv[0];
+
+ if (!strcmp(bname, "btrfsck")) {
+ argv[0] = "check";
+ } else {
+ argc--;
+ argv++;
+ check_options(argc, argv);
+ if (argc > 0) {
+ if (!prefixcmp(argv[0], "--"))
+ argv[0] += 2;
+ } else {
+ usage_command_group_short(&btrfs_cmd_group);
+ exit(1);
+ }
+ }
+
+ cmd = parse_command_token(argv[0], &btrfs_cmd_group);
+
+ handle_help_options_next_level(cmd, argc, argv);
+
+ crc32c_optimization_init();
+
+ fixup_argv0(argv, cmd->token);
+
+ ret = cmd->fn(argc, argv);
+
+ btrfs_close_all_devices();
+
+ exit(ret);
+}
diff --git a/btrfsck.h b/btrfsck.h
new file mode 100644
index 00000000..e16f52f5
--- /dev/null
+++ b/btrfsck.h
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2013 FUJITSU LIMITED. All rights reserved.
+ * Written by Miao Xie <miaox@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_CHECK_H__
+#define __BTRFS_CHECK_H__
+
+#if BTRFS_FLAT_INCLUDES
+#include "kerncompat.h"
+#include "ctree.h"
+#include "extent-cache.h"
+#include "list.h"
+#else
+#include <btrfs/kerncompat.h>
+#include <btrfs/ctree.h>
+#include <btrfs/extent-cache.h>
+#include <btrfs/list.h>
+#endif /* BTRFS_FLAT_INCLUDES */
+
+struct block_group_record {
+ struct cache_extent cache;
+ /* Used to identify the orphan block groups */
+ struct list_head list;
+
+ u64 generation;
+
+ u64 objectid;
+ u8 type;
+ u64 offset;
+
+ u64 flags;
+};
+
+struct block_group_tree {
+ struct cache_tree tree;
+ struct list_head block_groups;
+};
+
+struct device_record {
+ struct rb_node node;
+ u64 devid;
+
+ u64 generation;
+
+ u64 objectid;
+ u8 type;
+ u64 offset;
+
+ u64 total_byte;
+ u64 byte_used;
+
+ u64 real_used;
+};
+
+struct stripe {
+ u64 devid;
+ u64 offset;
+ u8 dev_uuid[BTRFS_UUID_SIZE];
+};
+
+struct chunk_record {
+ struct cache_extent cache;
+
+ struct list_head list;
+ struct list_head dextents;
+ struct block_group_record *bg_rec;
+
+ u64 generation;
+
+ u64 objectid;
+ u8 type;
+ u64 offset;
+
+ u64 owner;
+ u64 length;
+ u64 type_flags;
+ u64 stripe_len;
+ u16 num_stripes;
+ u16 sub_stripes;
+ u32 io_align;
+ u32 io_width;
+ u32 sector_size;
+ struct stripe stripes[0];
+};
+
+struct device_extent_record {
+ struct cache_extent cache;
+ /*
+ * Used to identify the orphan device extents (the device extents
+ * don't belong to a chunk or a device)
+ */
+ struct list_head chunk_list;
+ struct list_head device_list;
+
+ u64 generation;
+
+ u64 objectid;
+ u8 type;
+ u64 offset;
+
+ u64 chunk_objecteid;
+ u64 chunk_offset;
+ u64 length;
+};
+
+struct device_extent_tree {
+ struct cache_tree tree;
+ /*
+ * The idea is:
+ * When checking the chunk information, we move the device extents
+ * that has its chunk to the chunk's device extents list. After the
+ * check, if there are still some device extents in no_chunk_orphans,
+ * it means there are some device extents which don't belong to any
+ * chunk.
+ *
+ * The usage of no_device_orphans is the same as the first one, but it
+ * is for the device information check.
+ */
+ struct list_head no_chunk_orphans;
+ struct list_head no_device_orphans;
+};
+
+static inline unsigned long btrfs_chunk_record_size(int num_stripes)
+{
+ return sizeof(struct chunk_record) +
+ sizeof(struct stripe) * num_stripes;
+}
+void free_chunk_cache_tree(struct cache_tree *chunk_cache);
+
+/*
+ * Function to check validation for num_stripes, or it can call
+ * float point error for 0 division
+ * return < 0 for invalid combination
+ * return 0 for valid combination
+ */
+static inline int check_num_stripes(u64 type, int num_stripes)
+{
+ if (num_stripes == 0)
+ return -1;
+ if (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes <= 1)
+ return -1;
+ if (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes <= 2)
+ return -1;
+ return 0;
+}
+
+u64 calc_stripe_length(u64 type, u64 length, int num_stripes);
+/* For block group tree */
+static inline void block_group_tree_init(struct block_group_tree *tree)
+{
+ cache_tree_init(&tree->tree);
+ INIT_LIST_HEAD(&tree->block_groups);
+}
+
+int insert_block_group_record(struct block_group_tree *tree,
+ struct block_group_record *bg_rec);
+void free_block_group_tree(struct block_group_tree *tree);
+
+/* For device extent tree */
+static inline void device_extent_tree_init(struct device_extent_tree *tree)
+{
+ cache_tree_init(&tree->tree);
+ INIT_LIST_HEAD(&tree->no_chunk_orphans);
+ INIT_LIST_HEAD(&tree->no_device_orphans);
+}
+
+int insert_device_extent_record(struct device_extent_tree *tree,
+ struct device_extent_record *de_rec);
+void free_device_extent_tree(struct device_extent_tree *tree);
+
+
+/* Create various in-memory record by on-disk data */
+struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
+ struct btrfs_key *key,
+ int slot);
+struct block_group_record *
+btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
+ int slot);
+struct device_extent_record *
+btrfs_new_device_extent_record(struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot);
+
+int check_chunks(struct cache_tree *chunk_cache,
+ struct block_group_tree *block_group_cache,
+ struct device_extent_tree *dev_extent_cache,
+ struct list_head *good, struct list_head *bad,
+ struct list_head *rebuild, int silent);
+#endif
diff --git a/btrfstune.c b/btrfstune.c
new file mode 100644
index 00000000..0907aa9e
--- /dev/null
+++ b/btrfstune.c
@@ -0,0 +1,554 @@
+/*
+ * Copyright (C) 2008 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <uuid/uuid.h>
+#include <getopt.h>
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "utils.h"
+#include "volumes.h"
+
+static char *device;
+static int force = 0;
+
+static int update_seeding_flag(struct btrfs_root *root, int set_flag)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_super_block *disk_super;
+ u64 super_flags;
+ int ret;
+
+ disk_super = root->fs_info->super_copy;
+ super_flags = btrfs_super_flags(disk_super);
+ if (set_flag) {
+ if (super_flags & BTRFS_SUPER_FLAG_SEEDING) {
+ if (force)
+ return 0;
+ else
+ fprintf(stderr, "seeding flag is already set on %s\n", device);
+ return 1;
+ }
+ super_flags |= BTRFS_SUPER_FLAG_SEEDING;
+ } else {
+ if (!(super_flags & BTRFS_SUPER_FLAG_SEEDING)) {
+ fprintf(stderr, "seeding flag is not set on %s\n",
+ device);
+ return 1;
+ }
+ super_flags &= ~BTRFS_SUPER_FLAG_SEEDING;
+ fprintf(stderr, "Warning: Seeding flag cleared.\n");
+ }
+
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_super_flags(disk_super, super_flags);
+ ret = btrfs_commit_transaction(trans, root);
+
+ return ret;
+}
+
+static int set_super_incompat_flags(struct btrfs_root *root, u64 flags)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_super_block *disk_super;
+ u64 super_flags;
+ int ret;
+
+ disk_super = root->fs_info->super_copy;
+ super_flags = btrfs_super_incompat_flags(disk_super);
+ super_flags |= flags;
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_super_incompat_flags(disk_super, super_flags);
+ ret = btrfs_commit_transaction(trans, root);
+
+ return ret;
+}
+
+static int change_header_uuid(struct btrfs_root *root, struct extent_buffer *eb)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ int same_fsid = 1;
+ int same_chunk_tree_uuid = 1;
+ int ret;
+
+ same_fsid = !memcmp_extent_buffer(eb, fs_info->new_fsid,
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
+ same_chunk_tree_uuid =
+ !memcmp_extent_buffer(eb, fs_info->new_chunk_tree_uuid,
+ btrfs_header_chunk_tree_uuid(eb),
+ BTRFS_UUID_SIZE);
+ if (same_fsid && same_chunk_tree_uuid)
+ return 0;
+ if (!same_fsid)
+ write_extent_buffer(eb, fs_info->new_fsid, btrfs_header_fsid(),
+ BTRFS_FSID_SIZE);
+ if (!same_chunk_tree_uuid)
+ write_extent_buffer(eb, fs_info->new_chunk_tree_uuid,
+ btrfs_header_chunk_tree_uuid(eb),
+ BTRFS_UUID_SIZE);
+ ret = write_tree_block(NULL, root, eb);
+
+ return ret;
+}
+
+static int change_extents_uuid(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_root *root = fs_info->extent_root;
+ struct btrfs_path *path;
+ struct btrfs_key key = {0, 0, 0};
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ /*
+ * Here we don't use transaction as it will takes a lot of reserve
+ * space, and that will make a near-full btrfs unable to change uuid
+ */
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ while (1) {
+ struct btrfs_extent_item *ei;
+ struct extent_buffer *eb;
+ u64 flags;
+ u64 bytenr;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.type != BTRFS_EXTENT_ITEM_KEY &&
+ key.type != BTRFS_METADATA_ITEM_KEY)
+ goto next;
+ ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_extent_item);
+ flags = btrfs_extent_flags(path->nodes[0], ei);
+ if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
+ goto next;
+
+ bytenr = key.objectid;
+ eb = read_tree_block(root, bytenr, root->nodesize, 0);
+ if (IS_ERR(eb)) {
+ fprintf(stderr, "Failed to read tree block: %llu\n",
+ bytenr);
+ ret = PTR_ERR(eb);
+ goto out;
+ }
+ ret = change_header_uuid(root, eb);
+ free_extent_buffer(eb);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to change uuid of tree block: %llu\n",
+ bytenr);
+ goto out;
+ }
+next:
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int change_device_uuid(struct btrfs_root *root, struct extent_buffer *eb,
+ int slot)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_dev_item *di;
+ int ret = 0;
+
+ di = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
+ if (!memcmp_extent_buffer(eb, fs_info->new_fsid,
+ (unsigned long)btrfs_device_fsid(di),
+ BTRFS_FSID_SIZE))
+ return ret;
+
+ write_extent_buffer(eb, fs_info->new_fsid,
+ (unsigned long)btrfs_device_fsid(di),
+ BTRFS_FSID_SIZE);
+ ret = write_tree_block(NULL, root, eb);
+
+ return ret;
+}
+
+static int change_devices_uuid(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_root *root = fs_info->chunk_root;
+ struct btrfs_path *path;
+ struct btrfs_key key = {0, 0, 0};
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ /* No transaction again */
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ while (1) {
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.type != BTRFS_DEV_ITEM_KEY ||
+ key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
+ goto next;
+ ret = change_device_uuid(root, path->nodes[0], path->slots[0]);
+ if (ret < 0)
+ goto out;
+next:
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ }
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int change_fsid_prepare(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ u64 flags = btrfs_super_flags(fs_info->super_copy);
+ int ret = 0;
+
+ flags |= BTRFS_SUPER_FLAG_CHANGING_FSID;
+ btrfs_set_super_flags(fs_info->super_copy, flags);
+
+ memcpy(fs_info->super_copy->fsid, fs_info->new_fsid, BTRFS_FSID_SIZE);
+ ret = write_all_supers(tree_root);
+ if (ret < 0)
+ return ret;
+
+ /* also restore new chunk_tree_id into tree_root for restore */
+ write_extent_buffer(tree_root->node, fs_info->new_chunk_tree_uuid,
+ btrfs_header_chunk_tree_uuid(tree_root->node),
+ BTRFS_UUID_SIZE);
+ return write_tree_block(NULL, tree_root, tree_root->node);
+}
+
+static int change_fsid_done(struct btrfs_fs_info *fs_info)
+{
+ u64 flags = btrfs_super_flags(fs_info->super_copy);
+
+ flags &= ~BTRFS_SUPER_FLAG_CHANGING_FSID;
+ btrfs_set_super_flags(fs_info->super_copy, flags);
+
+ return write_all_supers(fs_info->tree_root);
+}
+
+/*
+ * Return 0 for no unfinished fsid change.
+ * Return >0 for unfinished fsid change, and restore unfinished fsid/
+ * chunk_tree_id into fsid_ret/chunk_id_ret.
+ */
+static int check_unfinished_fsid_change(struct btrfs_fs_info *fs_info,
+ uuid_t fsid_ret, uuid_t chunk_id_ret)
+{
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ u64 flags = btrfs_super_flags(fs_info->super_copy);
+
+ if (flags & BTRFS_SUPER_FLAG_CHANGING_FSID) {
+ memcpy(fsid_ret, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
+ read_extent_buffer(tree_root->node, chunk_id_ret,
+ btrfs_header_chunk_tree_uuid(tree_root->node),
+ BTRFS_UUID_SIZE);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Change fsid of a given fs.
+ *
+ * If new_fsid_str is not given, use a random generated UUID.
+ * Caller should check new_fsid_str is valid
+ */
+static int change_uuid(struct btrfs_fs_info *fs_info, const char *new_fsid_str)
+{
+ uuid_t new_fsid;
+ uuid_t new_chunk_id;
+ uuid_t old_fsid;
+ char uuid_buf[BTRFS_UUID_UNPARSED_SIZE];
+ int ret = 0;
+
+ if (check_unfinished_fsid_change(fs_info, new_fsid, new_chunk_id)) {
+ if (new_fsid_str) {
+ uuid_t tmp;
+
+ uuid_parse(new_fsid_str, tmp);
+ if (memcmp(tmp, new_fsid, BTRFS_FSID_SIZE)) {
+ fprintf(stderr,
+ "ERROR: New fsid %s is not the same with unfinished fsid change\n",
+ new_fsid_str);
+ return -EINVAL;
+ }
+ }
+ } else {
+ if (new_fsid_str)
+ uuid_parse(new_fsid_str, new_fsid);
+ else
+ uuid_generate(new_fsid);
+
+ uuid_generate(new_chunk_id);
+ }
+ fs_info->new_fsid = new_fsid;
+ fs_info->new_chunk_tree_uuid = new_chunk_id;
+
+ memcpy(old_fsid, (const char*)fs_info->fsid, BTRFS_UUID_SIZE);
+ uuid_unparse(old_fsid, uuid_buf);
+ printf("Current fsid: %s\n", uuid_buf);
+
+ uuid_unparse(new_fsid, uuid_buf);
+ printf("New fsid: %s\n", uuid_buf);
+ /* Now we can begin fsid change */
+ printf("Set superblock flag CHANGING_FSID\n");
+ ret = change_fsid_prepare(fs_info);
+ if (ret < 0)
+ goto out;
+
+ /* Change extents first */
+ printf("Change fsid in extents\n");
+ ret = change_extents_uuid(fs_info);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to change UUID of metadata\n");
+ goto out;
+ }
+
+ /* Then devices */
+ printf("Change fsid on devices\n");
+ ret = change_devices_uuid(fs_info);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to change UUID of devices\n");
+ goto out;
+ }
+
+ /* Last, change fsid in super */
+ memcpy(fs_info->fs_devices->fsid, fs_info->new_fsid,
+ BTRFS_FSID_SIZE);
+ memcpy(fs_info->super_copy->fsid, fs_info->new_fsid,
+ BTRFS_FSID_SIZE);
+ ret = write_all_supers(fs_info->tree_root);
+ if (ret < 0)
+ goto out;
+
+ /* Now fsid change is done */
+ printf("Clear superblock flag CHANGING_FSID\n");
+ ret = change_fsid_done(fs_info);
+ fs_info->new_fsid = NULL;
+ fs_info->new_chunk_tree_uuid = NULL;
+ printf("Fsid change finished\n");
+out:
+ return ret;
+}
+
+static void print_usage(void)
+{
+ fprintf(stderr, "usage: btrfstune [options] device\n");
+ fprintf(stderr, "\t-S value\tpositive value will enable seeding, zero to disable, negative is not allowed\n");
+ fprintf(stderr, "\t-r \t\tenable extended inode refs\n");
+ fprintf(stderr, "\t-x \t\tenable skinny metadata extent refs\n");
+ fprintf(stderr, "\t-n \t\tenable no-holes feature (more efficient sparse file representation)\n");
+ fprintf(stderr, "\t-f \t\tforce to do dangerous operation, make sure that you are aware of the dangers\n");
+ fprintf(stderr, "\t-u \t\tchange fsid, use a random one\n");
+ fprintf(stderr, "\t-U UUID\t\tchange fsid to UUID\n");
+}
+
+int main(int argc, char *argv[])
+{
+ struct btrfs_root *root;
+ enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_WRITES;
+ int success = 0;
+ int total = 0;
+ int seeding_flag = 0;
+ u64 seeding_value = 0;
+ int random_fsid = 0;
+ char *new_fsid_str = NULL;
+ int ret;
+ u64 super_flags = 0;
+
+ optind = 1;
+ while(1) {
+ static const struct option long_options[] = {
+ { "help", no_argument, NULL, GETOPT_VAL_HELP},
+ { NULL, 0, NULL, 0 }
+ };
+ int c = getopt_long(argc, argv, "S:rxfuU:n", long_options, NULL);
+
+ if (c < 0)
+ break;
+ switch(c) {
+ case 'S':
+ seeding_flag = 1;
+ seeding_value = arg_strtou64(optarg);
+ break;
+ case 'r':
+ super_flags |= BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF;
+ break;
+ case 'x':
+ super_flags |= BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA;
+ break;
+ case 'n':
+ super_flags |= BTRFS_FEATURE_INCOMPAT_NO_HOLES;
+ break;
+ case 'f':
+ force = 1;
+ break;
+ case 'U':
+ ctree_flags |= OPEN_CTREE_IGNORE_FSID_MISMATCH;
+ new_fsid_str = optarg;
+ break;
+ case 'u':
+ ctree_flags |= OPEN_CTREE_IGNORE_FSID_MISMATCH;
+ random_fsid = 1;
+ break;
+ case GETOPT_VAL_HELP:
+ default:
+ print_usage();
+ return c != GETOPT_VAL_HELP;
+ }
+ }
+
+ set_argv0(argv);
+ argc = argc - optind;
+ device = argv[optind];
+ if (check_argc_exact(argc, 1)) {
+ print_usage();
+ return 1;
+ }
+
+ if (random_fsid && new_fsid_str) {
+ fprintf(stderr,
+ "ERROR: Random fsid can't be used with specified fsid\n");
+ return 1;
+ }
+ if (!super_flags && !seeding_flag && !(random_fsid || new_fsid_str)) {
+ fprintf(stderr,
+ "ERROR: At least one option should be assigned.\n");
+ print_usage();
+ return 1;
+ }
+
+ if (new_fsid_str) {
+ uuid_t tmp;
+
+ ret = uuid_parse(new_fsid_str, tmp);
+ if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: Could not parse UUID: %s\n",
+ new_fsid_str);
+ return 1;
+ }
+ if (!test_uuid_unique(new_fsid_str)) {
+ fprintf(stderr,
+ "ERROR: Fsid %s is not unique\n",
+ new_fsid_str);
+ return 1;
+ }
+ }
+
+ ret = check_mounted(device);
+ if (ret < 0) {
+ fprintf(stderr, "Could not check mount status: %s\n",
+ strerror(-ret));
+ return 1;
+ } else if (ret) {
+ fprintf(stderr, "%s is mounted\n", device);
+ return 1;
+ }
+
+ root = open_ctree(device, 0, ctree_flags);
+
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ return 1;
+ }
+
+ if (seeding_flag) {
+ if (!seeding_value && !force) {
+ fprintf(stderr, "Warning: This is dangerous, clearing the seeding flag may cause the derived device not to be mountable!\n");
+ ret = ask_user("We are going to clear the seeding flag, are you sure?");
+ if (!ret) {
+ fprintf(stderr, "Clear seeding flag canceled\n");
+ ret = 1;
+ goto out;
+ }
+ }
+
+ ret = update_seeding_flag(root, seeding_value);
+ if (!ret)
+ success++;
+ total++;
+ }
+
+ if (super_flags) {
+ ret = set_super_incompat_flags(root, super_flags);
+ if (!ret)
+ success++;
+ total++;
+ }
+
+ if (random_fsid || new_fsid_str) {
+ if (!force) {
+ fprintf(stderr,
+ "Warning: It's highly recommended to run 'btrfs check' before this operation\n");
+ fprintf(stderr,
+ "Also canceling running UUID change progress may cause corruption\n");
+ ret = ask_user("We are going to change UUID, are your sure?");
+ if (!ret) {
+ fprintf(stderr, "UUID change canceled\n");
+ ret = 1;
+ goto out;
+ }
+ }
+ ret = change_uuid(root->fs_info, new_fsid_str);
+ if (!ret)
+ success++;
+ total++;
+ }
+
+ if (success == total) {
+ ret = 0;
+ } else {
+ root->fs_info->readonly = 1;
+ ret = 1;
+ fprintf(stderr, "btrfstune failed\n");
+ }
+out:
+ close_ctree(root);
+ btrfs_close_all_devices();
+
+ return ret;
+}
diff --git a/chunk-recover.c b/chunk-recover.c
new file mode 100644
index 00000000..b03330b4
--- /dev/null
+++ b/chunk-recover.c
@@ -0,0 +1,2407 @@
+/*
+ * Copyright (C) 2013 FUJITSU LIMITED. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "kerncompat.h"
+#include "androidcompat.h"
+
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <uuid/uuid.h>
+#include <pthread.h>
+
+#include "list.h"
+#include "radix-tree.h"
+#include "ctree.h"
+#include "extent-cache.h"
+#include "disk-io.h"
+#include "volumes.h"
+#include "transaction.h"
+#include "crc32c.h"
+#include "utils.h"
+#include "btrfsck.h"
+#include "commands.h"
+
+struct recover_control {
+ int verbose;
+ int yes;
+
+ u16 csum_size;
+ u32 sectorsize;
+ u32 leafsize;
+ u64 generation;
+ u64 chunk_root_generation;
+
+ struct btrfs_fs_devices *fs_devices;
+
+ struct cache_tree chunk;
+ struct block_group_tree bg;
+ struct device_extent_tree devext;
+ struct cache_tree eb_cache;
+
+ struct list_head good_chunks;
+ struct list_head bad_chunks;
+ struct list_head rebuild_chunks;
+ struct list_head unrepaired_chunks;
+ pthread_mutex_t rc_lock;
+};
+
+struct extent_record {
+ struct cache_extent cache;
+ u64 generation;
+ u8 csum[BTRFS_CSUM_SIZE];
+ struct btrfs_device *devices[BTRFS_MAX_MIRRORS];
+ u64 offsets[BTRFS_MAX_MIRRORS];
+ int nmirrors;
+};
+
+struct device_scan {
+ struct recover_control *rc;
+ struct btrfs_device *dev;
+ int fd;
+ u64 bytenr;
+};
+
+static struct extent_record *btrfs_new_extent_record(struct extent_buffer *eb)
+{
+ struct extent_record *rec;
+
+ rec = calloc(1, sizeof(*rec));
+ if (!rec) {
+ fprintf(stderr, "Fail to allocate memory for extent record.\n");
+ exit(1);
+ }
+
+ rec->cache.start = btrfs_header_bytenr(eb);
+ rec->cache.size = eb->len;
+ rec->generation = btrfs_header_generation(eb);
+ read_extent_buffer(eb, rec->csum, (unsigned long)btrfs_header_csum(eb),
+ BTRFS_CSUM_SIZE);
+ return rec;
+}
+
+static int process_extent_buffer(struct cache_tree *eb_cache,
+ struct extent_buffer *eb,
+ struct btrfs_device *device, u64 offset)
+{
+ struct extent_record *rec;
+ struct extent_record *exist;
+ struct cache_extent *cache;
+ int ret = 0;
+
+ rec = btrfs_new_extent_record(eb);
+ if (!rec->cache.size)
+ goto free_out;
+again:
+ cache = lookup_cache_extent(eb_cache,
+ rec->cache.start,
+ rec->cache.size);
+ if (cache) {
+ exist = container_of(cache, struct extent_record, cache);
+
+ if (exist->generation > rec->generation)
+ goto free_out;
+ if (exist->generation == rec->generation) {
+ if (exist->cache.start != rec->cache.start ||
+ exist->cache.size != rec->cache.size ||
+ memcmp(exist->csum, rec->csum, BTRFS_CSUM_SIZE)) {
+ ret = -EEXIST;
+ } else {
+ BUG_ON(exist->nmirrors >= BTRFS_MAX_MIRRORS);
+ exist->devices[exist->nmirrors] = device;
+ exist->offsets[exist->nmirrors] = offset;
+ exist->nmirrors++;
+ }
+ goto free_out;
+ }
+ remove_cache_extent(eb_cache, cache);
+ free(exist);
+ goto again;
+ }
+
+ rec->devices[0] = device;
+ rec->offsets[0] = offset;
+ rec->nmirrors++;
+ ret = insert_cache_extent(eb_cache, &rec->cache);
+ BUG_ON(ret);
+out:
+ return ret;
+free_out:
+ free(rec);
+ goto out;
+}
+
+static void free_extent_record(struct cache_extent *cache)
+{
+ struct extent_record *er;
+
+ er = container_of(cache, struct extent_record, cache);
+ free(er);
+}
+
+FREE_EXTENT_CACHE_BASED_TREE(extent_record, free_extent_record);
+
+static struct btrfs_chunk *create_chunk_item(struct chunk_record *record)
+{
+ struct btrfs_chunk *ret;
+ struct btrfs_stripe *chunk_stripe;
+ int i;
+
+ if (!record || record->num_stripes == 0)
+ return NULL;
+ ret = malloc(btrfs_chunk_item_size(record->num_stripes));
+ if (!ret)
+ return NULL;
+ btrfs_set_stack_chunk_length(ret, record->length);
+ btrfs_set_stack_chunk_owner(ret, record->owner);
+ btrfs_set_stack_chunk_stripe_len(ret, record->stripe_len);
+ btrfs_set_stack_chunk_type(ret, record->type_flags);
+ btrfs_set_stack_chunk_io_align(ret, record->io_align);
+ btrfs_set_stack_chunk_io_width(ret, record->io_width);
+ btrfs_set_stack_chunk_sector_size(ret, record->sector_size);
+ btrfs_set_stack_chunk_num_stripes(ret, record->num_stripes);
+ btrfs_set_stack_chunk_sub_stripes(ret, record->sub_stripes);
+ for (i = 0, chunk_stripe = &ret->stripe; i < record->num_stripes;
+ i++, chunk_stripe++) {
+ btrfs_set_stack_stripe_devid(chunk_stripe,
+ record->stripes[i].devid);
+ btrfs_set_stack_stripe_offset(chunk_stripe,
+ record->stripes[i].offset);
+ memcpy(chunk_stripe->dev_uuid, record->stripes[i].dev_uuid,
+ BTRFS_UUID_SIZE);
+ }
+ return ret;
+}
+
+static void init_recover_control(struct recover_control *rc, int verbose,
+ int yes)
+{
+ memset(rc, 0, sizeof(struct recover_control));
+ cache_tree_init(&rc->chunk);
+ cache_tree_init(&rc->eb_cache);
+ block_group_tree_init(&rc->bg);
+ device_extent_tree_init(&rc->devext);
+
+ INIT_LIST_HEAD(&rc->good_chunks);
+ INIT_LIST_HEAD(&rc->bad_chunks);
+ INIT_LIST_HEAD(&rc->rebuild_chunks);
+ INIT_LIST_HEAD(&rc->unrepaired_chunks);
+
+ rc->verbose = verbose;
+ rc->yes = yes;
+ pthread_mutex_init(&rc->rc_lock, NULL);
+}
+
+static void free_recover_control(struct recover_control *rc)
+{
+ free_block_group_tree(&rc->bg);
+ free_chunk_cache_tree(&rc->chunk);
+ free_device_extent_tree(&rc->devext);
+ free_extent_record_tree(&rc->eb_cache);
+ pthread_mutex_destroy(&rc->rc_lock);
+}
+
+static int process_block_group_item(struct block_group_tree *bg_cache,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct block_group_record *rec;
+ struct block_group_record *exist;
+ struct cache_extent *cache;
+ int ret = 0;
+
+ rec = btrfs_new_block_group_record(leaf, key, slot);
+ if (!rec->cache.size)
+ goto free_out;
+again:
+ cache = lookup_cache_extent(&bg_cache->tree,
+ rec->cache.start,
+ rec->cache.size);
+ if (cache) {
+ exist = container_of(cache, struct block_group_record, cache);
+
+ /*check the generation and replace if needed*/
+ if (exist->generation > rec->generation)
+ goto free_out;
+ if (exist->generation == rec->generation) {
+ int offset = offsetof(struct block_group_record,
+ generation);
+ /*
+ * According to the current kernel code, the following
+ * case is impossble, or there is something wrong in
+ * the kernel code.
+ */
+ if (memcmp(((void *)exist) + offset,
+ ((void *)rec) + offset,
+ sizeof(*rec) - offset))
+ ret = -EEXIST;
+ goto free_out;
+ }
+ remove_cache_extent(&bg_cache->tree, cache);
+ list_del_init(&exist->list);
+ free(exist);
+ /*
+ * We must do search again to avoid the following cache.
+ * /--old bg 1--//--old bg 2--/
+ * /--new bg--/
+ */
+ goto again;
+ }
+
+ ret = insert_block_group_record(bg_cache, rec);
+ BUG_ON(ret);
+out:
+ return ret;
+free_out:
+ free(rec);
+ goto out;
+}
+
+static int process_chunk_item(struct cache_tree *chunk_cache,
+ struct extent_buffer *leaf, struct btrfs_key *key,
+ int slot)
+{
+ struct chunk_record *rec;
+ struct chunk_record *exist;
+ struct cache_extent *cache;
+ int ret = 0;
+
+ rec = btrfs_new_chunk_record(leaf, key, slot);
+ if (!rec->cache.size)
+ goto free_out;
+again:
+ cache = lookup_cache_extent(chunk_cache, rec->offset, rec->length);
+ if (cache) {
+ exist = container_of(cache, struct chunk_record, cache);
+
+ if (exist->generation > rec->generation)
+ goto free_out;
+ if (exist->generation == rec->generation) {
+ int num_stripes = rec->num_stripes;
+ int rec_size = btrfs_chunk_record_size(num_stripes);
+ int offset = offsetof(struct chunk_record, generation);
+
+ if (exist->num_stripes != rec->num_stripes ||
+ memcmp(((void *)exist) + offset,
+ ((void *)rec) + offset,
+ rec_size - offset))
+ ret = -EEXIST;
+ goto free_out;
+ }
+ remove_cache_extent(chunk_cache, cache);
+ free(exist);
+ goto again;
+ }
+ ret = insert_cache_extent(chunk_cache, &rec->cache);
+ BUG_ON(ret);
+out:
+ return ret;
+free_out:
+ free(rec);
+ goto out;
+}
+
+static int process_device_extent_item(struct device_extent_tree *devext_cache,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct device_extent_record *rec;
+ struct device_extent_record *exist;
+ struct cache_extent *cache;
+ int ret = 0;
+
+ rec = btrfs_new_device_extent_record(leaf, key, slot);
+ if (!rec->cache.size)
+ goto free_out;
+again:
+ cache = lookup_cache_extent2(&devext_cache->tree,
+ rec->cache.objectid,
+ rec->cache.start,
+ rec->cache.size);
+ if (cache) {
+ exist = container_of(cache, struct device_extent_record, cache);
+ if (exist->generation > rec->generation)
+ goto free_out;
+ if (exist->generation == rec->generation) {
+ int offset = offsetof(struct device_extent_record,
+ generation);
+ if (memcmp(((void *)exist) + offset,
+ ((void *)rec) + offset,
+ sizeof(*rec) - offset))
+ ret = -EEXIST;
+ goto free_out;
+ }
+ remove_cache_extent(&devext_cache->tree, cache);
+ list_del_init(&exist->chunk_list);
+ list_del_init(&exist->device_list);
+ free(exist);
+ goto again;
+ }
+
+ ret = insert_device_extent_record(devext_cache, rec);
+ BUG_ON(ret);
+out:
+ return ret;
+free_out:
+ free(rec);
+ goto out;
+}
+
+static void print_block_group_info(struct block_group_record *rec, char *prefix)
+{
+ if (prefix)
+ printf("%s", prefix);
+ printf("Block Group: start = %llu, len = %llu, flag = %llx\n",
+ rec->objectid, rec->offset, rec->flags);
+}
+
+static void print_block_group_tree(struct block_group_tree *tree)
+{
+ struct cache_extent *cache;
+ struct block_group_record *rec;
+
+ printf("All Block Groups:\n");
+ for (cache = first_cache_extent(&tree->tree); cache;
+ cache = next_cache_extent(cache)) {
+ rec = container_of(cache, struct block_group_record, cache);
+ print_block_group_info(rec, "\t");
+ }
+ printf("\n");
+}
+
+static void print_stripe_info(struct stripe *data, char *prefix1, char *prefix2,
+ int index)
+{
+ if (prefix1)
+ printf("%s", prefix1);
+ if (prefix2)
+ printf("%s", prefix2);
+ printf("[%2d] Stripe: devid = %llu, offset = %llu\n",
+ index, data->devid, data->offset);
+}
+
+static void print_chunk_self_info(struct chunk_record *rec, char *prefix)
+{
+ int i;
+
+ if (prefix)
+ printf("%s", prefix);
+ printf("Chunk: start = %llu, len = %llu, type = %llx, num_stripes = %u\n",
+ rec->offset, rec->length, rec->type_flags, rec->num_stripes);
+ if (prefix)
+ printf("%s", prefix);
+ printf(" Stripes list:\n");
+ for (i = 0; i < rec->num_stripes; i++)
+ print_stripe_info(&rec->stripes[i], prefix, " ", i);
+}
+
+static void print_chunk_tree(struct cache_tree *tree)
+{
+ struct cache_extent *n;
+ struct chunk_record *entry;
+
+ printf("All Chunks:\n");
+ for (n = first_cache_extent(tree); n;
+ n = next_cache_extent(n)) {
+ entry = container_of(n, struct chunk_record, cache);
+ print_chunk_self_info(entry, "\t");
+ }
+ printf("\n");
+}
+
+static void print_device_extent_info(struct device_extent_record *rec,
+ char *prefix)
+{
+ if (prefix)
+ printf("%s", prefix);
+ printf("Device extent: devid = %llu, start = %llu, len = %llu, chunk offset = %llu\n",
+ rec->objectid, rec->offset, rec->length, rec->chunk_offset);
+}
+
+static void print_device_extent_tree(struct device_extent_tree *tree)
+{
+ struct cache_extent *n;
+ struct device_extent_record *entry;
+
+ printf("All Device Extents:\n");
+ for (n = first_cache_extent(&tree->tree); n;
+ n = next_cache_extent(n)) {
+ entry = container_of(n, struct device_extent_record, cache);
+ print_device_extent_info(entry, "\t");
+ }
+ printf("\n");
+}
+
+static void print_device_info(struct btrfs_device *device, char *prefix)
+{
+ if (prefix)
+ printf("%s", prefix);
+ printf("Device: id = %llu, name = %s\n",
+ device->devid, device->name);
+}
+
+static void print_all_devices(struct list_head *devices)
+{
+ struct btrfs_device *dev;
+
+ printf("All Devices:\n");
+ list_for_each_entry(dev, devices, dev_list)
+ print_device_info(dev, "\t");
+ printf("\n");
+}
+
+static void print_scan_result(struct recover_control *rc)
+{
+ if (!rc->verbose)
+ return;
+
+ printf("DEVICE SCAN RESULT:\n");
+ printf("Filesystem Information:\n");
+ printf("\tsectorsize: %d\n", rc->sectorsize);
+ printf("\tleafsize: %d\n", rc->leafsize);
+ printf("\ttree root generation: %llu\n", rc->generation);
+ printf("\tchunk root generation: %llu\n", rc->chunk_root_generation);
+ printf("\n");
+
+ print_all_devices(&rc->fs_devices->devices);
+ print_block_group_tree(&rc->bg);
+ print_chunk_tree(&rc->chunk);
+ print_device_extent_tree(&rc->devext);
+}
+
+static void print_chunk_info(struct chunk_record *chunk, char *prefix)
+{
+ struct device_extent_record *devext;
+ int i;
+
+ print_chunk_self_info(chunk, prefix);
+ if (prefix)
+ printf("%s", prefix);
+ if (chunk->bg_rec)
+ print_block_group_info(chunk->bg_rec, " ");
+ else
+ printf(" No block group.\n");
+ if (prefix)
+ printf("%s", prefix);
+ if (list_empty(&chunk->dextents)) {
+ printf(" No device extent.\n");
+ } else {
+ printf(" Device extent list:\n");
+ i = 0;
+ list_for_each_entry(devext, &chunk->dextents, chunk_list) {
+ if (prefix)
+ printf("%s", prefix);
+ printf("%s[%2d]", " ", i);
+ print_device_extent_info(devext, NULL);
+ i++;
+ }
+ }
+}
+
+static void print_check_result(struct recover_control *rc)
+{
+ struct chunk_record *chunk;
+ struct block_group_record *bg;
+ struct device_extent_record *devext;
+ int total = 0;
+ int good = 0;
+ int bad = 0;
+
+ if (!rc->verbose)
+ return;
+
+ printf("CHECK RESULT:\n");
+ printf("Recoverable Chunks:\n");
+ list_for_each_entry(chunk, &rc->good_chunks, list) {
+ print_chunk_info(chunk, " ");
+ good++;
+ total++;
+ }
+ list_for_each_entry(chunk, &rc->rebuild_chunks, list) {
+ print_chunk_info(chunk, " ");
+ good++;
+ total++;
+ }
+ list_for_each_entry(chunk, &rc->unrepaired_chunks, list) {
+ print_chunk_info(chunk, " ");
+ good++;
+ total++;
+ }
+ printf("Unrecoverable Chunks:\n");
+ list_for_each_entry(chunk, &rc->bad_chunks, list) {
+ print_chunk_info(chunk, " ");
+ bad++;
+ total++;
+ }
+ printf("\n");
+ printf("Total Chunks:\t\t%d\n", total);
+ printf(" Recoverable:\t\t%d\n", good);
+ printf(" Unrecoverable:\t%d\n", bad);
+
+ printf("\n");
+ printf("Orphan Block Groups:\n");
+ list_for_each_entry(bg, &rc->bg.block_groups, list)
+ print_block_group_info(bg, " ");
+
+ printf("\n");
+ printf("Orphan Device Extents:\n");
+ list_for_each_entry(devext, &rc->devext.no_chunk_orphans, chunk_list)
+ print_device_extent_info(devext, " ");
+ printf("\n");
+}
+
+static int check_chunk_by_metadata(struct recover_control *rc,
+ struct btrfs_root *root,
+ struct chunk_record *chunk, int bg_only)
+{
+ int ret;
+ int i;
+ int slot;
+ struct btrfs_path path;
+ struct btrfs_key key;
+ struct btrfs_root *dev_root;
+ struct stripe *stripe;
+ struct btrfs_dev_extent *dev_extent;
+ struct btrfs_block_group_item *bg_ptr;
+ struct extent_buffer *l;
+
+ btrfs_init_path(&path);
+
+ if (bg_only)
+ goto bg_check;
+
+ dev_root = root->fs_info->dev_root;
+ for (i = 0; i < chunk->num_stripes; i++) {
+ stripe = &chunk->stripes[i];
+
+ key.objectid = stripe->devid;
+ key.offset = stripe->offset;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+
+ ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Search device extent failed(%d)\n",
+ ret);
+ btrfs_release_path(&path);
+ return ret;
+ } else if (ret > 0) {
+ if (rc->verbose)
+ fprintf(stderr,
+ "No device extent[%llu, %llu]\n",
+ stripe->devid, stripe->offset);
+ btrfs_release_path(&path);
+ return -ENOENT;
+ }
+ l = path.nodes[0];
+ slot = path.slots[0];
+ dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
+ if (chunk->offset !=
+ btrfs_dev_extent_chunk_offset(l, dev_extent)) {
+ if (rc->verbose)
+ fprintf(stderr,
+ "Device tree unmatch with chunks dev_extent[%llu, %llu], chunk[%llu, %llu]\n",
+ btrfs_dev_extent_chunk_offset(l,
+ dev_extent),
+ btrfs_dev_extent_length(l, dev_extent),
+ chunk->offset, chunk->length);
+ btrfs_release_path(&path);
+ return -ENOENT;
+ }
+ btrfs_release_path(&path);
+ }
+
+bg_check:
+ key.objectid = chunk->offset;
+ key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ key.offset = chunk->length;
+
+ ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
+ 0, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Search block group failed(%d)\n", ret);
+ btrfs_release_path(&path);
+ return ret;
+ } else if (ret > 0) {
+ if (rc->verbose)
+ fprintf(stderr, "No block group[%llu, %llu]\n",
+ key.objectid, key.offset);
+ btrfs_release_path(&path);
+ return -ENOENT;
+ }
+
+ l = path.nodes[0];
+ slot = path.slots[0];
+ bg_ptr = btrfs_item_ptr(l, slot, struct btrfs_block_group_item);
+ if (chunk->type_flags != btrfs_disk_block_group_flags(l, bg_ptr)) {
+ if (rc->verbose)
+ fprintf(stderr,
+ "Chunk[%llu, %llu]'s type(%llu) is differemt with Block Group's type(%llu)\n",
+ chunk->offset, chunk->length, chunk->type_flags,
+ btrfs_disk_block_group_flags(l, bg_ptr));
+ btrfs_release_path(&path);
+ return -ENOENT;
+ }
+ btrfs_release_path(&path);
+ return 0;
+}
+
+static int check_all_chunks_by_metadata(struct recover_control *rc,
+ struct btrfs_root *root)
+{
+ struct chunk_record *chunk;
+ struct chunk_record *next;
+ LIST_HEAD(orphan_chunks);
+ int ret = 0;
+ int err;
+
+ list_for_each_entry_safe(chunk, next, &rc->good_chunks, list) {
+ err = check_chunk_by_metadata(rc, root, chunk, 0);
+ if (err) {
+ if (err == -ENOENT)
+ list_move_tail(&chunk->list, &orphan_chunks);
+ else if (err && !ret)
+ ret = err;
+ }
+ }
+
+ list_for_each_entry_safe(chunk, next, &rc->unrepaired_chunks, list) {
+ err = check_chunk_by_metadata(rc, root, chunk, 1);
+ if (err == -ENOENT)
+ list_move_tail(&chunk->list, &orphan_chunks);
+ else if (err && !ret)
+ ret = err;
+ }
+
+ list_for_each_entry(chunk, &rc->bad_chunks, list) {
+ err = check_chunk_by_metadata(rc, root, chunk, 1);
+ if (err != -ENOENT && !ret)
+ ret = err ? err : -EINVAL;
+ }
+ list_splice(&orphan_chunks, &rc->bad_chunks);
+ return ret;
+}
+
+static int extract_metadata_record(struct recover_control *rc,
+ struct extent_buffer *leaf)
+{
+ struct btrfs_key key;
+ int ret = 0;
+ int i;
+ u32 nritems;
+
+ nritems = btrfs_header_nritems(leaf);
+ for (i = 0; i < nritems; i++) {
+ btrfs_item_key_to_cpu(leaf, &key, i);
+ switch (key.type) {
+ case BTRFS_BLOCK_GROUP_ITEM_KEY:
+ pthread_mutex_lock(&rc->rc_lock);
+ ret = process_block_group_item(&rc->bg, leaf, &key, i);
+ pthread_mutex_unlock(&rc->rc_lock);
+ break;
+ case BTRFS_CHUNK_ITEM_KEY:
+ pthread_mutex_lock(&rc->rc_lock);
+ ret = process_chunk_item(&rc->chunk, leaf, &key, i);
+ pthread_mutex_unlock(&rc->rc_lock);
+ break;
+ case BTRFS_DEV_EXTENT_KEY:
+ pthread_mutex_lock(&rc->rc_lock);
+ ret = process_device_extent_item(&rc->devext, leaf,
+ &key, i);
+ pthread_mutex_unlock(&rc->rc_lock);
+ break;
+ }
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+static inline int is_super_block_address(u64 offset)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ if (offset == btrfs_sb_offset(i))
+ return 1;
+ }
+ return 0;
+}
+
+static int scan_one_device(void *dev_scan_struct)
+{
+ struct extent_buffer *buf;
+ u64 bytenr;
+ int ret = 0;
+ struct device_scan *dev_scan = (struct device_scan *)dev_scan_struct;
+ struct recover_control *rc = dev_scan->rc;
+ struct btrfs_device *device = dev_scan->dev;
+ int fd = dev_scan->fd;
+ int oldtype;
+
+ ret = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &oldtype);
+ if (ret)
+ return 1;
+
+ buf = malloc(sizeof(*buf) + rc->leafsize);
+ if (!buf)
+ return -ENOMEM;
+ buf->len = rc->leafsize;
+
+ bytenr = 0;
+ while (1) {
+ dev_scan->bytenr = bytenr;
+
+ if (is_super_block_address(bytenr))
+ bytenr += rc->sectorsize;
+
+ if (pread64(fd, buf->data, rc->leafsize, bytenr) <
+ rc->leafsize)
+ break;
+
+ if (memcmp_extent_buffer(buf, rc->fs_devices->fsid,
+ btrfs_header_fsid(),
+ BTRFS_FSID_SIZE)) {
+ bytenr += rc->sectorsize;
+ continue;
+ }
+
+ if (verify_tree_block_csum_silent(buf, rc->csum_size)) {
+ bytenr += rc->sectorsize;
+ continue;
+ }
+
+ pthread_mutex_lock(&rc->rc_lock);
+ ret = process_extent_buffer(&rc->eb_cache, buf, device, bytenr);
+ pthread_mutex_unlock(&rc->rc_lock);
+ if (ret)
+ goto out;
+
+ if (btrfs_header_level(buf) != 0)
+ goto next_node;
+
+ switch (btrfs_header_owner(buf)) {
+ case BTRFS_EXTENT_TREE_OBJECTID:
+ case BTRFS_DEV_TREE_OBJECTID:
+ /* different tree use different generation */
+ if (btrfs_header_generation(buf) > rc->generation)
+ break;
+ ret = extract_metadata_record(rc, buf);
+ if (ret)
+ goto out;
+ break;
+ case BTRFS_CHUNK_TREE_OBJECTID:
+ if (btrfs_header_generation(buf) >
+ rc->chunk_root_generation)
+ break;
+ ret = extract_metadata_record(rc, buf);
+ if (ret)
+ goto out;
+ break;
+ }
+next_node:
+ bytenr += rc->leafsize;
+ }
+out:
+ close(fd);
+ free(buf);
+ return ret;
+}
+
+static int scan_devices(struct recover_control *rc)
+{
+ int ret = 0;
+ int fd;
+ struct btrfs_device *dev;
+ struct device_scan *dev_scans;
+ pthread_t *t_scans;
+ long *t_rets;
+ int devnr = 0;
+ int devidx = 0;
+ int i;
+ int all_done;
+
+ list_for_each_entry(dev, &rc->fs_devices->devices, dev_list)
+ devnr++;
+ dev_scans = (struct device_scan *)malloc(sizeof(struct device_scan)
+ * devnr);
+ if (!dev_scans)
+ return -ENOMEM;
+ t_scans = (pthread_t *)malloc(sizeof(pthread_t) * devnr);
+ if (!t_scans) {
+ free(dev_scans);
+ return -ENOMEM;
+ }
+ t_rets = (long *)malloc(sizeof(long) * devnr);
+ if (!t_rets) {
+ free(dev_scans);
+ free(t_scans);
+ return -ENOMEM;
+ }
+
+ list_for_each_entry(dev, &rc->fs_devices->devices, dev_list) {
+ fd = open(dev->name, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "Failed to open device %s\n",
+ dev->name);
+ ret = 1;
+ goto out2;
+ }
+ dev_scans[devidx].rc = rc;
+ dev_scans[devidx].dev = dev;
+ dev_scans[devidx].fd = fd;
+ dev_scans[devidx].bytenr = -1;
+ devidx++;
+ }
+
+ for (i = 0; i < devidx; i++) {
+ ret = pthread_create(&t_scans[i], NULL,
+ (void *)scan_one_device,
+ (void *)&dev_scans[i]);
+ if (ret)
+ goto out1;
+
+ dev_scans[i].bytenr = 0;
+ }
+
+ while (1) {
+ all_done = 1;
+ for (i = 0; i < devidx; i++) {
+ if (dev_scans[i].bytenr == -1)
+ continue;
+ ret = pthread_tryjoin_np(t_scans[i],
+ (void **)&t_rets[i]);
+ if (ret == EBUSY) {
+ all_done = 0;
+ continue;
+ }
+ if (ret || t_rets[i]) {
+ ret = 1;
+ goto out1;
+ }
+ dev_scans[i].bytenr = -1;
+ }
+
+ printf("\rScanning: ");
+ for (i = 0; i < devidx; i++) {
+ if (dev_scans[i].bytenr == -1)
+ printf("%sDONE in dev%d",
+ i ? ", " : "", i);
+ else
+ printf("%s%llu in dev%d",
+ i ? ", " : "", dev_scans[i].bytenr, i);
+ }
+ /* clear chars if exist in tail */
+ printf(" ");
+ printf("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b");
+ fflush(stdout);
+
+ if (all_done) {
+ printf("\n");
+ break;
+ }
+
+ sleep(1);
+ }
+out1:
+ for (i = 0; i < devidx; i++) {
+ if (dev_scans[i].bytenr == -1)
+ continue;
+ pthread_cancel(t_scans[i]);
+ }
+out2:
+ free(dev_scans);
+ free(t_scans);
+ free(t_rets);
+ return !!ret;
+}
+
+static int build_device_map_by_chunk_record(struct btrfs_root *root,
+ struct chunk_record *chunk)
+{
+ int ret = 0;
+ int i;
+ u64 devid;
+ u8 uuid[BTRFS_UUID_SIZE];
+ u16 num_stripes;
+ struct btrfs_mapping_tree *map_tree;
+ struct map_lookup *map;
+ struct stripe *stripe;
+
+ map_tree = &root->fs_info->mapping_tree;
+ num_stripes = chunk->num_stripes;
+ map = malloc(btrfs_map_lookup_size(num_stripes));
+ if (!map)
+ return -ENOMEM;
+ map->ce.start = chunk->offset;
+ map->ce.size = chunk->length;
+ map->num_stripes = num_stripes;
+ map->io_width = chunk->io_width;
+ map->io_align = chunk->io_align;
+ map->sector_size = chunk->sector_size;
+ map->stripe_len = chunk->stripe_len;
+ map->type = chunk->type_flags;
+ map->sub_stripes = chunk->sub_stripes;
+
+ for (i = 0, stripe = chunk->stripes; i < num_stripes; i++, stripe++) {
+ devid = stripe->devid;
+ memcpy(uuid, stripe->dev_uuid, BTRFS_UUID_SIZE);
+ map->stripes[i].physical = stripe->offset;
+ map->stripes[i].dev = btrfs_find_device(root, devid,
+ uuid, NULL);
+ if (!map->stripes[i].dev) {
+ kfree(map);
+ return -EIO;
+ }
+ }
+
+ ret = insert_cache_extent(&map_tree->cache_tree, &map->ce);
+ return ret;
+}
+
+static int build_device_maps_by_chunk_records(struct recover_control *rc,
+ struct btrfs_root *root)
+{
+ int ret = 0;
+ struct chunk_record *chunk;
+
+ list_for_each_entry(chunk, &rc->good_chunks, list) {
+ ret = build_device_map_by_chunk_record(root, chunk);
+ if (ret)
+ return ret;
+ }
+ list_for_each_entry(chunk, &rc->rebuild_chunks, list) {
+ ret = build_device_map_by_chunk_record(root, chunk);
+ if (ret)
+ return ret;
+ }
+ return ret;
+}
+
+static int block_group_remove_all_extent_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct block_group_record *bg)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_key key;
+ struct btrfs_path path;
+ struct extent_buffer *leaf;
+ u64 start = bg->objectid;
+ u64 end = bg->objectid + bg->offset;
+ u64 old_val;
+ int nitems;
+ int ret;
+ int i;
+ int del_s, del_nr;
+
+ btrfs_init_path(&path);
+ root = root->fs_info->extent_root;
+
+ key.objectid = start;
+ key.offset = 0;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+again:
+ ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
+ if (ret < 0)
+ goto err;
+ else if (ret > 0)
+ ret = 0;
+
+ leaf = path.nodes[0];
+ nitems = btrfs_header_nritems(leaf);
+ if (!nitems) {
+ /* The tree is empty. */
+ ret = 0;
+ goto err;
+ }
+
+ if (path.slots[0] >= nitems) {
+ ret = btrfs_next_leaf(root, &path);
+ if (ret < 0)
+ goto err;
+ if (ret > 0) {
+ ret = 0;
+ goto err;
+ }
+ leaf = path.nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, 0);
+ if (key.objectid >= end)
+ goto err;
+ btrfs_release_path(&path);
+ goto again;
+ }
+
+ del_nr = 0;
+ del_s = -1;
+ for (i = path.slots[0]; i < nitems; i++) {
+ btrfs_item_key_to_cpu(leaf, &key, i);
+ if (key.objectid >= end)
+ break;
+
+ if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
+ if (del_nr == 0)
+ continue;
+ else
+ break;
+ }
+
+ if (del_s == -1)
+ del_s = i;
+ del_nr++;
+ if (key.type == BTRFS_EXTENT_ITEM_KEY ||
+ key.type == BTRFS_METADATA_ITEM_KEY) {
+ old_val = btrfs_super_bytes_used(fs_info->super_copy);
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
+ old_val += root->leafsize;
+ else
+ old_val += key.offset;
+ btrfs_set_super_bytes_used(fs_info->super_copy,
+ old_val);
+ }
+ }
+
+ if (del_nr) {
+ ret = btrfs_del_items(trans, root, &path, del_s, del_nr);
+ if (ret)
+ goto err;
+ }
+
+ if (key.objectid < end) {
+ if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
+ key.objectid += root->sectorsize;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = 0;
+ }
+ btrfs_release_path(&path);
+ goto again;
+ }
+err:
+ btrfs_release_path(&path);
+ return ret;
+}
+
+static int block_group_free_all_extent(struct btrfs_root *root,
+ struct block_group_record *bg)
+{
+ struct btrfs_block_group_cache *cache;
+ struct btrfs_fs_info *info;
+ u64 start;
+ u64 end;
+
+ info = root->fs_info;
+ cache = btrfs_lookup_block_group(info, bg->objectid);
+ if (!cache)
+ return -ENOENT;
+
+ start = cache->key.objectid;
+ end = start + cache->key.offset - 1;
+
+ set_extent_bits(&info->block_group_cache, start, end,
+ BLOCK_GROUP_DIRTY, GFP_NOFS);
+ set_extent_dirty(&info->free_space_cache, start, end, GFP_NOFS);
+
+ btrfs_set_block_group_used(&cache->item, 0);
+
+ return 0;
+}
+
+static int remove_chunk_extent_item(struct btrfs_trans_handle *trans,
+ struct recover_control *rc,
+ struct btrfs_root *root)
+{
+ struct chunk_record *chunk;
+ int ret = 0;
+
+ list_for_each_entry(chunk, &rc->good_chunks, list) {
+ if (!(chunk->type_flags & BTRFS_BLOCK_GROUP_SYSTEM))
+ continue;
+ ret = block_group_remove_all_extent_items(trans, root,
+ chunk->bg_rec);
+ if (ret)
+ return ret;
+
+ ret = block_group_free_all_extent(root, chunk->bg_rec);
+ if (ret)
+ return ret;
+ }
+ return ret;
+}
+
+static int __rebuild_chunk_root(struct btrfs_trans_handle *trans,
+ struct recover_control *rc,
+ struct btrfs_root *root)
+{
+ u64 min_devid = -1;
+ struct btrfs_device *dev;
+ struct extent_buffer *cow;
+ struct btrfs_disk_key disk_key;
+ int ret = 0;
+
+ list_for_each_entry(dev, &rc->fs_devices->devices, dev_list) {
+ if (min_devid > dev->devid)
+ min_devid = dev->devid;
+ }
+ btrfs_set_disk_key_objectid(&disk_key, BTRFS_DEV_ITEMS_OBJECTID);
+ btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_ITEM_KEY);
+ btrfs_set_disk_key_offset(&disk_key, min_devid);
+
+ cow = btrfs_alloc_free_block(trans, root, root->nodesize,
+ BTRFS_CHUNK_TREE_OBJECTID,
+ &disk_key, 0, 0, 0);
+ btrfs_set_header_bytenr(cow, cow->start);
+ btrfs_set_header_generation(cow, trans->transid);
+ btrfs_set_header_nritems(cow, 0);
+ btrfs_set_header_level(cow, 0);
+ btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
+ btrfs_set_header_owner(cow, BTRFS_CHUNK_TREE_OBJECTID);
+ write_extent_buffer(cow, root->fs_info->fsid,
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
+
+ write_extent_buffer(cow, root->fs_info->chunk_tree_uuid,
+ btrfs_header_chunk_tree_uuid(cow),
+ BTRFS_UUID_SIZE);
+
+ root->node = cow;
+ btrfs_mark_buffer_dirty(cow);
+
+ return ret;
+}
+
+static int __rebuild_device_items(struct btrfs_trans_handle *trans,
+ struct recover_control *rc,
+ struct btrfs_root *root)
+{
+ struct btrfs_device *dev;
+ struct btrfs_key key;
+ struct btrfs_dev_item dev_item_tmp;
+ struct btrfs_dev_item *dev_item = &dev_item_tmp;
+ int ret = 0;
+
+ list_for_each_entry(dev, &rc->fs_devices->devices, dev_list) {
+ key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ key.type = BTRFS_DEV_ITEM_KEY;
+ key.offset = dev->devid;
+
+ btrfs_set_stack_device_generation(dev_item, 0);
+ btrfs_set_stack_device_type(dev_item, dev->type);
+ btrfs_set_stack_device_id(dev_item, dev->devid);
+ btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes);
+ btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
+ btrfs_set_stack_device_io_align(dev_item, dev->io_align);
+ btrfs_set_stack_device_io_width(dev_item, dev->io_width);
+ btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
+ memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
+ memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);
+
+ ret = btrfs_insert_item(trans, root, &key,
+ dev_item, sizeof(*dev_item));
+ }
+
+ return ret;
+}
+
+static int __insert_chunk_item(struct btrfs_trans_handle *trans,
+ struct chunk_record *chunk_rec,
+ struct btrfs_root *chunk_root)
+{
+ struct btrfs_key key;
+ struct btrfs_chunk *chunk = NULL;
+ int ret = 0;
+
+ chunk = create_chunk_item(chunk_rec);
+ if (!chunk)
+ return -ENOMEM;
+ key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+ key.offset = chunk_rec->offset;
+
+ ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
+ btrfs_chunk_item_size(chunk_rec->num_stripes));
+ free(chunk);
+ return ret;
+}
+
+static int __rebuild_chunk_items(struct btrfs_trans_handle *trans,
+ struct recover_control *rc,
+ struct btrfs_root *root)
+{
+ struct btrfs_root *chunk_root;
+ struct chunk_record *chunk_rec;
+ int ret;
+
+ chunk_root = root->fs_info->chunk_root;
+
+ list_for_each_entry(chunk_rec, &rc->good_chunks, list) {
+ ret = __insert_chunk_item(trans, chunk_rec, chunk_root);
+ if (ret)
+ return ret;
+ }
+ list_for_each_entry(chunk_rec, &rc->rebuild_chunks, list) {
+ ret = __insert_chunk_item(trans, chunk_rec, chunk_root);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int rebuild_chunk_tree(struct btrfs_trans_handle *trans,
+ struct recover_control *rc,
+ struct btrfs_root *root)
+{
+ int ret = 0;
+
+ root = root->fs_info->chunk_root;
+
+ ret = __rebuild_chunk_root(trans, rc, root);
+ if (ret)
+ return ret;
+
+ ret = __rebuild_device_items(trans, rc, root);
+ if (ret)
+ return ret;
+
+ ret = __rebuild_chunk_items(trans, rc, root);
+
+ return ret;
+}
+
+static int rebuild_sys_array(struct recover_control *rc,
+ struct btrfs_root *root)
+{
+ struct btrfs_chunk *chunk;
+ struct btrfs_key key;
+ struct chunk_record *chunk_rec;
+ int ret = 0;
+ u16 num_stripes;
+
+ btrfs_set_super_sys_array_size(root->fs_info->super_copy, 0);
+
+ list_for_each_entry(chunk_rec, &rc->good_chunks, list) {
+ if (!(chunk_rec->type_flags & BTRFS_BLOCK_GROUP_SYSTEM))
+ continue;
+
+ num_stripes = chunk_rec->num_stripes;
+ chunk = create_chunk_item(chunk_rec);
+ if (!chunk) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+ key.offset = chunk_rec->offset;
+
+ ret = btrfs_add_system_chunk(NULL, root, &key, chunk,
+ btrfs_chunk_item_size(num_stripes));
+ free(chunk);
+ if (ret)
+ break;
+ }
+ return ret;
+
+}
+
+static int calculate_bg_used(struct btrfs_root *extent_root,
+ struct chunk_record *chunk_rec,
+ struct btrfs_path *path,
+ u64 *used)
+{
+ struct extent_buffer *node;
+ struct btrfs_key found_key;
+ int slot;
+ int ret = 0;
+ u64 used_ret = 0;
+
+ while (1) {
+ node = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(node, &found_key, slot);
+ if (found_key.objectid >= chunk_rec->offset + chunk_rec->length)
+ break;
+ if (found_key.type != BTRFS_METADATA_ITEM_KEY &&
+ found_key.type != BTRFS_EXTENT_DATA_KEY)
+ goto next;
+ if (found_key.type == BTRFS_METADATA_ITEM_KEY)
+ used_ret += extent_root->nodesize;
+ else
+ used_ret += found_key.offset;
+next:
+ if (slot + 1 < btrfs_header_nritems(node)) {
+ slot++;
+ } else {
+ ret = btrfs_next_leaf(extent_root, path);
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ if (ret < 0)
+ break;
+ }
+ }
+ if (!ret)
+ *used = used_ret;
+ return ret;
+}
+
+static int __insert_block_group(struct btrfs_trans_handle *trans,
+ struct chunk_record *chunk_rec,
+ struct btrfs_root *extent_root,
+ u64 used)
+{
+ struct btrfs_block_group_item bg_item;
+ struct btrfs_key key;
+ int ret = 0;
+
+ btrfs_set_block_group_used(&bg_item, used);
+ btrfs_set_block_group_chunk_objectid(&bg_item, used);
+ btrfs_set_block_group_flags(&bg_item, chunk_rec->type_flags);
+ key.objectid = chunk_rec->offset;
+ key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ key.offset = chunk_rec->length;
+
+ ret = btrfs_insert_item(trans, extent_root, &key, &bg_item,
+ sizeof(bg_item));
+ return ret;
+}
+
+/*
+ * Search through the extent tree to rebuild the 'used' member of the block
+ * group.
+ * However, since block group and extent item shares the extent tree,
+ * the extent item may also missing.
+ * In that case, we fill the 'used' with the length of the block group to
+ * ensure no write into the block group.
+ * Btrfsck will hate it but we will inform user to call '--init-extent-tree'
+ * if possible, or just salvage as much data as possible from the fs.
+ */
+static int rebuild_block_group(struct btrfs_trans_handle *trans,
+ struct recover_control *rc,
+ struct btrfs_root *root)
+{
+ struct chunk_record *chunk_rec;
+ struct btrfs_key search_key;
+ struct btrfs_path *path;
+ u64 used = 0;
+ int ret = 0;
+
+ if (list_empty(&rc->rebuild_chunks))
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ list_for_each_entry(chunk_rec, &rc->rebuild_chunks, list) {
+ search_key.objectid = chunk_rec->offset;
+ search_key.type = BTRFS_EXTENT_ITEM_KEY;
+ search_key.offset = 0;
+ ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
+ &search_key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ ret = calculate_bg_used(root->fs_info->extent_root,
+ chunk_rec, path, &used);
+ /*
+ * Extent tree is damaged, better to rebuild the whole extent
+ * tree. Currently, change the used to chunk's len to prevent
+ * write/block reserve happening in that block group.
+ */
+ if (ret < 0) {
+ fprintf(stderr,
+ "Fail to search extent tree for block group: [%llu,%llu]\n",
+ chunk_rec->offset,
+ chunk_rec->offset + chunk_rec->length);
+ fprintf(stderr,
+ "Mark the block group full to prevent block rsv problems\n");
+ used = chunk_rec->length;
+ }
+ btrfs_release_path(path);
+ ret = __insert_block_group(trans, chunk_rec,
+ root->fs_info->extent_root,
+ used);
+ if (ret < 0)
+ goto out;
+ }
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static struct btrfs_root *
+open_ctree_with_broken_chunk(struct recover_control *rc)
+{
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_super_block *disk_super;
+ struct extent_buffer *eb;
+ u32 sectorsize;
+ u32 nodesize;
+ u32 leafsize;
+ u32 stripesize;
+ int ret;
+
+ fs_info = btrfs_new_fs_info(1, BTRFS_SUPER_INFO_OFFSET);
+ if (!fs_info) {
+ fprintf(stderr, "Failed to allocate memory for fs_info\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ fs_info->is_chunk_recover = 1;
+
+ fs_info->fs_devices = rc->fs_devices;
+ ret = btrfs_open_devices(fs_info->fs_devices, O_RDWR);
+ if (ret)
+ goto out;
+
+ disk_super = fs_info->super_copy;
+ ret = btrfs_read_dev_super(fs_info->fs_devices->latest_bdev,
+ disk_super, fs_info->super_bytenr, 1);
+ if (ret) {
+ fprintf(stderr, "No valid btrfs found\n");
+ goto out_devices;
+ }
+
+ memcpy(fs_info->fsid, &disk_super->fsid, BTRFS_FSID_SIZE);
+
+ ret = btrfs_check_fs_compatibility(disk_super, 1);
+ if (ret)
+ goto out_devices;
+
+ nodesize = btrfs_super_nodesize(disk_super);
+ leafsize = btrfs_super_leafsize(disk_super);
+ sectorsize = btrfs_super_sectorsize(disk_super);
+ stripesize = btrfs_super_stripesize(disk_super);
+
+ __setup_root(nodesize, leafsize, sectorsize, stripesize,
+ fs_info->chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
+
+ ret = build_device_maps_by_chunk_records(rc, fs_info->chunk_root);
+ if (ret)
+ goto out_cleanup;
+
+ ret = btrfs_setup_all_roots(fs_info, 0, 0);
+ if (ret)
+ goto out_failed;
+
+ eb = fs_info->tree_root->node;
+ read_extent_buffer(eb, fs_info->chunk_tree_uuid,
+ btrfs_header_chunk_tree_uuid(eb),
+ BTRFS_UUID_SIZE);
+
+ return fs_info->fs_root;
+out_failed:
+ btrfs_release_all_roots(fs_info);
+out_cleanup:
+ btrfs_cleanup_all_caches(fs_info);
+out_devices:
+ btrfs_close_devices(fs_info->fs_devices);
+out:
+ btrfs_free_fs_info(fs_info);
+ return ERR_PTR(ret);
+}
+
+static int recover_prepare(struct recover_control *rc, char *path)
+{
+ int ret;
+ int fd;
+ struct btrfs_super_block *sb;
+ char buf[BTRFS_SUPER_INFO_SIZE];
+ struct btrfs_fs_devices *fs_devices;
+
+ ret = 0;
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "open %s\n error.\n", path);
+ return -1;
+ }
+
+ sb = (struct btrfs_super_block*)buf;
+ ret = btrfs_read_dev_super(fd, sb, BTRFS_SUPER_INFO_OFFSET, 1);
+ if (ret) {
+ fprintf(stderr, "read super block error\n");
+ goto out_close_fd;
+ }
+
+ rc->sectorsize = btrfs_super_sectorsize(sb);
+ rc->leafsize = btrfs_super_leafsize(sb);
+ rc->generation = btrfs_super_generation(sb);
+ rc->chunk_root_generation = btrfs_super_chunk_root_generation(sb);
+ rc->csum_size = btrfs_super_csum_size(sb);
+
+ /* if seed, the result of scanning below will be partial */
+ if (btrfs_super_flags(sb) & BTRFS_SUPER_FLAG_SEEDING) {
+ fprintf(stderr, "this device is seed device\n");
+ ret = -1;
+ goto out_close_fd;
+ }
+
+ ret = btrfs_scan_fs_devices(fd, path, &fs_devices, 0, 1, 0);
+ if (ret)
+ goto out_close_fd;
+
+ rc->fs_devices = fs_devices;
+
+ if (rc->verbose)
+ print_all_devices(&rc->fs_devices->devices);
+
+out_close_fd:
+ close(fd);
+ return ret;
+}
+
+static int btrfs_get_device_extents(u64 chunk_object,
+ struct list_head *orphan_devexts,
+ struct list_head *ret_list)
+{
+ struct device_extent_record *devext;
+ struct device_extent_record *next;
+ int count = 0;
+
+ list_for_each_entry_safe(devext, next, orphan_devexts, chunk_list) {
+ if (devext->chunk_offset == chunk_object) {
+ list_move_tail(&devext->chunk_list, ret_list);
+ count++;
+ }
+ }
+ return count;
+}
+
+static int calc_num_stripes(u64 type)
+{
+ if (type & (BTRFS_BLOCK_GROUP_RAID0 |
+ BTRFS_BLOCK_GROUP_RAID10 |
+ BTRFS_BLOCK_GROUP_RAID5 |
+ BTRFS_BLOCK_GROUP_RAID6))
+ return 0;
+ else if (type & (BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_DUP))
+ return 2;
+ else
+ return 1;
+}
+
+static inline int calc_sub_nstripes(u64 type)
+{
+ if (type & BTRFS_BLOCK_GROUP_RAID10)
+ return 2;
+ else
+ return 1;
+}
+
+static int btrfs_verify_device_extents(struct block_group_record *bg,
+ struct list_head *devexts, int ndevexts)
+{
+ struct device_extent_record *devext;
+ u64 stripe_length;
+ int expected_num_stripes;
+
+ expected_num_stripes = calc_num_stripes(bg->flags);
+ if (expected_num_stripes && expected_num_stripes != ndevexts)
+ return 1;
+
+ if (check_num_stripes(bg->flags, ndevexts) < 0)
+ return 1;
+
+ stripe_length = calc_stripe_length(bg->flags, bg->offset, ndevexts);
+ list_for_each_entry(devext, devexts, chunk_list) {
+ if (devext->length != stripe_length)
+ return 1;
+ }
+ return 0;
+}
+
+static int btrfs_rebuild_unordered_chunk_stripes(struct recover_control *rc,
+ struct chunk_record *chunk)
+{
+ struct device_extent_record *devext;
+ struct btrfs_device *device;
+ int i;
+
+ devext = list_first_entry(&chunk->dextents, struct device_extent_record,
+ chunk_list);
+ for (i = 0; i < chunk->num_stripes; i++) {
+ chunk->stripes[i].devid = devext->objectid;
+ chunk->stripes[i].offset = devext->offset;
+ device = btrfs_find_device_by_devid(rc->fs_devices,
+ devext->objectid,
+ 0);
+ if (!device)
+ return -ENOENT;
+ BUG_ON(btrfs_find_device_by_devid(rc->fs_devices,
+ devext->objectid,
+ 1));
+ memcpy(chunk->stripes[i].dev_uuid, device->uuid,
+ BTRFS_UUID_SIZE);
+ devext = list_next_entry(devext, chunk_list);
+ }
+ return 0;
+}
+
+static int btrfs_calc_stripe_index(struct chunk_record *chunk, u64 logical)
+{
+ u64 offset = logical - chunk->offset;
+ int stripe_nr;
+ int nr_data_stripes;
+ int index;
+
+ stripe_nr = offset / chunk->stripe_len;
+ if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID0) {
+ index = stripe_nr % chunk->num_stripes;
+ } else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID10) {
+ index = stripe_nr % (chunk->num_stripes / chunk->sub_stripes);
+ index *= chunk->sub_stripes;
+ } else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID5) {
+ nr_data_stripes = chunk->num_stripes - 1;
+ index = stripe_nr % nr_data_stripes;
+ stripe_nr /= nr_data_stripes;
+ index = (index + stripe_nr) % chunk->num_stripes;
+ } else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID6) {
+ nr_data_stripes = chunk->num_stripes - 2;
+ index = stripe_nr % nr_data_stripes;
+ stripe_nr /= nr_data_stripes;
+ index = (index + stripe_nr) % chunk->num_stripes;
+ } else {
+ return -1;
+ }
+ return index;
+}
+
+/* calc the logical offset which is the start of the next stripe. */
+static inline u64 btrfs_next_stripe_logical_offset(struct chunk_record *chunk,
+ u64 logical)
+{
+ u64 offset = logical - chunk->offset;
+
+ offset /= chunk->stripe_len;
+ offset *= chunk->stripe_len;
+ offset += chunk->stripe_len;
+
+ return offset + chunk->offset;
+}
+
+static int is_extent_record_in_device_extent(struct extent_record *er,
+ struct device_extent_record *dext,
+ int *mirror)
+{
+ int i;
+
+ for (i = 0; i < er->nmirrors; i++) {
+ if (er->devices[i]->devid == dext->objectid &&
+ er->offsets[i] >= dext->offset &&
+ er->offsets[i] < dext->offset + dext->length) {
+ *mirror = i;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int
+btrfs_rebuild_ordered_meta_chunk_stripes(struct recover_control *rc,
+ struct chunk_record *chunk)
+{
+ u64 start = chunk->offset;
+ u64 end = chunk->offset + chunk->length;
+ struct cache_extent *cache;
+ struct extent_record *er;
+ struct device_extent_record *devext;
+ struct device_extent_record *next;
+ struct btrfs_device *device;
+ LIST_HEAD(devexts);
+ int index;
+ int mirror;
+ int ret;
+
+ cache = lookup_cache_extent(&rc->eb_cache,
+ start, chunk->length);
+ if (!cache) {
+ /* No used space, we can reorder the stripes freely. */
+ ret = btrfs_rebuild_unordered_chunk_stripes(rc, chunk);
+ return ret;
+ }
+
+ list_splice_init(&chunk->dextents, &devexts);
+again:
+ er = container_of(cache, struct extent_record, cache);
+ index = btrfs_calc_stripe_index(chunk, er->cache.start);
+ BUG_ON(index == -1);
+ if (chunk->stripes[index].devid)
+ goto next;
+ list_for_each_entry_safe(devext, next, &devexts, chunk_list) {
+ if (is_extent_record_in_device_extent(er, devext, &mirror)) {
+ chunk->stripes[index].devid = devext->objectid;
+ chunk->stripes[index].offset = devext->offset;
+ memcpy(chunk->stripes[index].dev_uuid,
+ er->devices[mirror]->uuid,
+ BTRFS_UUID_SIZE);
+ index++;
+ list_move(&devext->chunk_list, &chunk->dextents);
+ }
+ }
+next:
+ start = btrfs_next_stripe_logical_offset(chunk, er->cache.start);
+ if (start >= end)
+ goto no_extent_record;
+
+ cache = lookup_cache_extent(&rc->eb_cache, start, end - start);
+ if (cache)
+ goto again;
+no_extent_record:
+ if (list_empty(&devexts))
+ return 0;
+
+ if (chunk->type_flags & (BTRFS_BLOCK_GROUP_RAID5 |
+ BTRFS_BLOCK_GROUP_RAID6)) {
+ /* Fixme: try to recover the order by the parity block. */
+ list_splice_tail(&devexts, &chunk->dextents);
+ return -EINVAL;
+ }
+
+ /* There is no data on the lost stripes, we can reorder them freely. */
+ for (index = 0; index < chunk->num_stripes; index++) {
+ if (chunk->stripes[index].devid)
+ continue;
+
+ devext = list_first_entry(&devexts,
+ struct device_extent_record,
+ chunk_list);
+ list_move(&devext->chunk_list, &chunk->dextents);
+
+ chunk->stripes[index].devid = devext->objectid;
+ chunk->stripes[index].offset = devext->offset;
+ device = btrfs_find_device_by_devid(rc->fs_devices,
+ devext->objectid,
+ 0);
+ if (!device) {
+ list_splice_tail(&devexts, &chunk->dextents);
+ return -EINVAL;
+ }
+ BUG_ON(btrfs_find_device_by_devid(rc->fs_devices,
+ devext->objectid,
+ 1));
+ memcpy(chunk->stripes[index].dev_uuid, device->uuid,
+ BTRFS_UUID_SIZE);
+ }
+ return 0;
+}
+
+#define BTRFS_ORDERED_RAID (BTRFS_BLOCK_GROUP_RAID0 | \
+ BTRFS_BLOCK_GROUP_RAID10 | \
+ BTRFS_BLOCK_GROUP_RAID5 | \
+ BTRFS_BLOCK_GROUP_RAID6)
+
+static int btrfs_rebuild_chunk_stripes(struct recover_control *rc,
+ struct chunk_record *chunk)
+{
+ int ret;
+
+ /*
+ * All the data in the system metadata chunk will be dropped,
+ * so we need not guarantee that the data is right or not, that
+ * is we can reorder the stripes in the system metadata chunk.
+ */
+ if ((chunk->type_flags & BTRFS_BLOCK_GROUP_METADATA) &&
+ (chunk->type_flags & BTRFS_ORDERED_RAID))
+ ret =btrfs_rebuild_ordered_meta_chunk_stripes(rc, chunk);
+ else if ((chunk->type_flags & BTRFS_BLOCK_GROUP_DATA) &&
+ (chunk->type_flags & BTRFS_ORDERED_RAID))
+ ret = 1; /* Be handled after the fs is opened. */
+ else
+ ret = btrfs_rebuild_unordered_chunk_stripes(rc, chunk);
+
+ return ret;
+}
+
+static int next_csum(struct btrfs_root *root,
+ struct extent_buffer **leaf,
+ struct btrfs_path *path,
+ int *slot,
+ u64 *csum_offset,
+ u32 *tree_csum,
+ u64 end,
+ struct btrfs_key *key)
+{
+ int ret = 0;
+ struct btrfs_root *csum_root = root->fs_info->csum_root;
+ struct btrfs_csum_item *csum_item;
+ u32 blocksize = root->sectorsize;
+ u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+ int csums_in_item = btrfs_item_size_nr(*leaf, *slot) / csum_size;
+
+ if (*csum_offset >= csums_in_item) {
+ ++(*slot);
+ *csum_offset = 0;
+ if (*slot >= btrfs_header_nritems(*leaf)) {
+ ret = btrfs_next_leaf(csum_root, path);
+ if (ret < 0)
+ return -1;
+ else if (ret > 0)
+ return 1;
+ *leaf = path->nodes[0];
+ *slot = path->slots[0];
+ }
+ btrfs_item_key_to_cpu(*leaf, key, *slot);
+ }
+
+ if (key->offset + (*csum_offset) * blocksize >= end)
+ return 2;
+ csum_item = btrfs_item_ptr(*leaf, *slot, struct btrfs_csum_item);
+ csum_item = (struct btrfs_csum_item *)((unsigned char *)csum_item
+ + (*csum_offset) * csum_size);
+ read_extent_buffer(*leaf, tree_csum,
+ (unsigned long)csum_item, csum_size);
+ return ret;
+}
+
+static u64 calc_data_offset(struct btrfs_key *key,
+ struct chunk_record *chunk,
+ u64 dev_offset,
+ u64 csum_offset,
+ u32 blocksize)
+{
+ u64 data_offset;
+ int logical_stripe_nr;
+ int dev_stripe_nr;
+ int nr_data_stripes;
+
+ data_offset = key->offset + csum_offset * blocksize - chunk->offset;
+ nr_data_stripes = chunk->num_stripes;
+
+ if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID5)
+ nr_data_stripes -= 1;
+ else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID6)
+ nr_data_stripes -= 2;
+
+ logical_stripe_nr = data_offset / chunk->stripe_len;
+ dev_stripe_nr = logical_stripe_nr / nr_data_stripes;
+
+ data_offset -= logical_stripe_nr * chunk->stripe_len;
+ data_offset += dev_stripe_nr * chunk->stripe_len;
+
+ return dev_offset + data_offset;
+}
+
+static int check_one_csum(int fd, u64 start, u32 len, u32 tree_csum)
+{
+ char *data;
+ int ret = 0;
+ u32 csum_result = ~(u32)0;
+
+ data = malloc(len);
+ if (!data)
+ return -1;
+ ret = pread64(fd, data, len, start);
+ if (ret < 0 || ret != len) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+ csum_result = btrfs_csum_data(NULL, data, csum_result, len);
+ btrfs_csum_final(csum_result, (char *)&csum_result);
+ if (csum_result != tree_csum)
+ ret = 1;
+out:
+ free(data);
+ return ret;
+}
+
+static u64 item_end_offset(struct btrfs_root *root, struct btrfs_key *key,
+ struct extent_buffer *leaf, int slot) {
+ u32 blocksize = root->sectorsize;
+ u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+
+ u64 offset = btrfs_item_size_nr(leaf, slot);
+ offset /= csum_size;
+ offset *= blocksize;
+ offset += key->offset;
+
+ return offset;
+}
+
+static int insert_stripe(struct list_head *devexts,
+ struct recover_control *rc,
+ struct chunk_record *chunk,
+ int index) {
+ struct device_extent_record *devext;
+ struct btrfs_device *dev;
+
+ devext = list_entry(devexts->next, struct device_extent_record,
+ chunk_list);
+ dev = btrfs_find_device_by_devid(rc->fs_devices, devext->objectid,
+ 0);
+ if (!dev)
+ return 1;
+ BUG_ON(btrfs_find_device_by_devid(rc->fs_devices, devext->objectid,
+ 1));
+
+ chunk->stripes[index].devid = devext->objectid;
+ chunk->stripes[index].offset = devext->offset;
+ memcpy(chunk->stripes[index].dev_uuid, dev->uuid, BTRFS_UUID_SIZE);
+
+ list_move(&devext->chunk_list, &chunk->dextents);
+
+ return 0;
+}
+
+static inline int count_devext_records(struct list_head *record_list)
+{
+ int num_of_records = 0;
+ struct device_extent_record *devext;
+
+ list_for_each_entry(devext, record_list, chunk_list)
+ num_of_records++;
+
+ return num_of_records;
+}
+
+static int fill_chunk_up(struct chunk_record *chunk, struct list_head *devexts,
+ struct recover_control *rc)
+{
+ int ret = 0;
+ int i;
+
+ for (i = 0; i < chunk->num_stripes; i++) {
+ if (!chunk->stripes[i].devid) {
+ ret = insert_stripe(devexts, rc, chunk, i);
+ if (ret)
+ break;
+ }
+ }
+
+ return ret;
+}
+
+#define EQUAL_STRIPE (1 << 0)
+
+static int rebuild_raid_data_chunk_stripes(struct recover_control *rc,
+ struct btrfs_root *root,
+ struct chunk_record *chunk,
+ u8 *flags)
+{
+ int i;
+ int ret = 0;
+ int slot;
+ struct btrfs_path path;
+ struct btrfs_key prev_key;
+ struct btrfs_key key;
+ struct btrfs_root *csum_root;
+ struct extent_buffer *leaf;
+ struct device_extent_record *devext;
+ struct device_extent_record *next;
+ struct btrfs_device *dev;
+ u64 start = chunk->offset;
+ u64 end = start + chunk->stripe_len;
+ u64 chunk_end = chunk->offset + chunk->length;
+ u64 csum_offset = 0;
+ u64 data_offset;
+ u32 blocksize = root->sectorsize;
+ u32 tree_csum;
+ int index = 0;
+ int num_unordered = 0;
+ LIST_HEAD(unordered);
+ LIST_HEAD(candidates);
+
+ csum_root = root->fs_info->csum_root;
+ btrfs_init_path(&path);
+ list_splice_init(&chunk->dextents, &candidates);
+again:
+ if (list_is_last(candidates.next, &candidates))
+ goto out;
+
+ key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
+ key.type = BTRFS_EXTENT_CSUM_KEY;
+ key.offset = start;
+
+ ret = btrfs_search_slot(NULL, csum_root, &key, &path, 0, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Search csum failed(%d)\n", ret);
+ goto fail_out;
+ }
+ leaf = path.nodes[0];
+ slot = path.slots[0];
+ if (ret > 0) {
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(csum_root, &path);
+ if (ret < 0) {
+ fprintf(stderr,
+ "Walk tree failed(%d)\n", ret);
+ goto fail_out;
+ } else if (ret > 0) {
+ slot = btrfs_header_nritems(leaf) - 1;
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (item_end_offset(root, &key, leaf, slot)
+ > start) {
+ csum_offset = start - key.offset;
+ csum_offset /= blocksize;
+ goto next_csum;
+ }
+ goto next_stripe;
+ }
+ leaf = path.nodes[0];
+ slot = path.slots[0];
+ }
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ ret = btrfs_previous_item(csum_root, &path, 0,
+ BTRFS_EXTENT_CSUM_KEY);
+ if (ret < 0)
+ goto fail_out;
+ else if (ret > 0) {
+ if (key.offset >= end)
+ goto next_stripe;
+ else
+ goto next_csum;
+ }
+ leaf = path.nodes[0];
+ slot = path.slots[0];
+
+ btrfs_item_key_to_cpu(leaf, &prev_key, slot);
+ if (item_end_offset(root, &prev_key, leaf, slot) > start) {
+ csum_offset = start - prev_key.offset;
+ csum_offset /= blocksize;
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ } else {
+ if (key.offset >= end)
+ goto next_stripe;
+ }
+
+ if (key.offset + csum_offset * blocksize > chunk_end)
+ goto out;
+ }
+next_csum:
+ ret = next_csum(root, &leaf, &path, &slot, &csum_offset, &tree_csum,
+ end, &key);
+ if (ret < 0) {
+ fprintf(stderr, "Fetch csum failed\n");
+ goto fail_out;
+ } else if (ret == 1) {
+ if (!(*flags & EQUAL_STRIPE))
+ *flags |= EQUAL_STRIPE;
+ goto out;
+ } else if (ret == 2)
+ goto next_stripe;
+
+ list_for_each_entry_safe(devext, next, &candidates, chunk_list) {
+ data_offset = calc_data_offset(&key, chunk, devext->offset,
+ csum_offset, blocksize);
+ dev = btrfs_find_device_by_devid(rc->fs_devices,
+ devext->objectid, 0);
+ if (!dev) {
+ ret = 1;
+ goto fail_out;
+ }
+ BUG_ON(btrfs_find_device_by_devid(rc->fs_devices,
+ devext->objectid, 1));
+
+ ret = check_one_csum(dev->fd, data_offset, blocksize,
+ tree_csum);
+ if (ret < 0)
+ goto fail_out;
+ else if (ret > 0)
+ list_move(&devext->chunk_list, &unordered);
+ }
+
+ if (list_empty(&candidates)) {
+ num_unordered = count_devext_records(&unordered);
+ if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID6
+ && num_unordered == 2) {
+ btrfs_release_path(&path);
+ ret = fill_chunk_up(chunk, &unordered, rc);
+ return ret;
+ }
+
+ goto next_stripe;
+ }
+
+ if (list_is_last(candidates.next, &candidates)) {
+ index = btrfs_calc_stripe_index(chunk,
+ key.offset + csum_offset * blocksize);
+ BUG_ON(index == -1);
+ if (chunk->stripes[index].devid)
+ goto next_stripe;
+ ret = insert_stripe(&candidates, rc, chunk, index);
+ if (ret)
+ goto fail_out;
+ } else {
+ csum_offset++;
+ goto next_csum;
+ }
+next_stripe:
+ start = btrfs_next_stripe_logical_offset(chunk, start);
+ end = min(start + chunk->stripe_len, chunk_end);
+ list_splice_init(&unordered, &candidates);
+ btrfs_release_path(&path);
+ csum_offset = 0;
+ if (end < chunk_end)
+ goto again;
+out:
+ ret = 0;
+ list_splice_init(&candidates, &unordered);
+ num_unordered = count_devext_records(&unordered);
+ if (num_unordered == 1) {
+ for (i = 0; i < chunk->num_stripes; i++) {
+ if (!chunk->stripes[i].devid) {
+ index = i;
+ break;
+ }
+ }
+ ret = insert_stripe(&unordered, rc, chunk, index);
+ if (ret)
+ goto fail_out;
+ } else {
+ if ((num_unordered == 2 && chunk->type_flags
+ & BTRFS_BLOCK_GROUP_RAID5)
+ || (num_unordered == 3 && chunk->type_flags
+ & BTRFS_BLOCK_GROUP_RAID6)) {
+ ret = fill_chunk_up(chunk, &unordered, rc);
+ }
+ }
+fail_out:
+ ret = !!ret || (list_empty(&unordered) ? 0 : 1);
+ list_splice_init(&candidates, &chunk->dextents);
+ list_splice_init(&unordered, &chunk->dextents);
+ btrfs_release_path(&path);
+
+ return ret;
+}
+
+static int btrfs_rebuild_ordered_data_chunk_stripes(struct recover_control *rc,
+ struct btrfs_root *root)
+{
+ struct chunk_record *chunk;
+ struct chunk_record *next;
+ int ret = 0;
+ int err;
+ u8 flags;
+
+ list_for_each_entry_safe(chunk, next, &rc->unrepaired_chunks, list) {
+ if ((chunk->type_flags & BTRFS_BLOCK_GROUP_DATA)
+ && (chunk->type_flags & BTRFS_ORDERED_RAID)) {
+ flags = 0;
+ err = rebuild_raid_data_chunk_stripes(rc, root, chunk,
+ &flags);
+ if (err) {
+ list_move(&chunk->list, &rc->bad_chunks);
+ if (flags & EQUAL_STRIPE)
+ fprintf(stderr,
+ "Failure: too many equal stripes in chunk[%llu %llu]\n",
+ chunk->offset, chunk->length);
+ if (!ret)
+ ret = err;
+ } else
+ list_move(&chunk->list, &rc->good_chunks);
+ }
+ }
+ return ret;
+}
+
+static int btrfs_recover_chunks(struct recover_control *rc)
+{
+ struct chunk_record *chunk;
+ struct block_group_record *bg;
+ struct block_group_record *next;
+ LIST_HEAD(new_chunks);
+ LIST_HEAD(devexts);
+ int nstripes;
+ int ret;
+
+ /* create the chunk by block group */
+ list_for_each_entry_safe(bg, next, &rc->bg.block_groups, list) {
+ nstripes = btrfs_get_device_extents(bg->objectid,
+ &rc->devext.no_chunk_orphans,
+ &devexts);
+ chunk = calloc(1, btrfs_chunk_record_size(nstripes));
+ if (!chunk)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&chunk->dextents);
+ chunk->bg_rec = bg;
+ chunk->cache.start = bg->objectid;
+ chunk->cache.size = bg->offset;
+ chunk->objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+ chunk->type = BTRFS_CHUNK_ITEM_KEY;
+ chunk->offset = bg->objectid;
+ chunk->generation = bg->generation;
+ chunk->length = bg->offset;
+ chunk->owner = BTRFS_CHUNK_TREE_OBJECTID;
+ chunk->stripe_len = BTRFS_STRIPE_LEN;
+ chunk->type_flags = bg->flags;
+ chunk->io_width = BTRFS_STRIPE_LEN;
+ chunk->io_align = BTRFS_STRIPE_LEN;
+ chunk->sector_size = rc->sectorsize;
+ chunk->sub_stripes = calc_sub_nstripes(bg->flags);
+
+ ret = insert_cache_extent(&rc->chunk, &chunk->cache);
+ BUG_ON(ret);
+
+ list_del_init(&bg->list);
+ if (!nstripes) {
+ list_add_tail(&chunk->list, &rc->bad_chunks);
+ continue;
+ }
+
+ list_splice_init(&devexts, &chunk->dextents);
+
+ ret = btrfs_verify_device_extents(bg, &devexts, nstripes);
+ if (ret) {
+ list_add_tail(&chunk->list, &rc->bad_chunks);
+ continue;
+ }
+
+ chunk->num_stripes = nstripes;
+ ret = btrfs_rebuild_chunk_stripes(rc, chunk);
+ if (ret > 0)
+ list_add_tail(&chunk->list, &rc->unrepaired_chunks);
+ else if (ret < 0)
+ list_add_tail(&chunk->list, &rc->bad_chunks);
+ else
+ list_add_tail(&chunk->list, &rc->good_chunks);
+ }
+ /*
+ * Don't worry about the lost orphan device extents, they don't
+ * have its chunk and block group, they must be the old ones that
+ * we have dropped.
+ */
+ return 0;
+}
+
+static inline int is_chunk_overlap(struct chunk_record *chunk1,
+ struct chunk_record *chunk2)
+{
+ if (chunk1->offset >= chunk2->offset + chunk2->length ||
+ chunk1->offset + chunk1->length <= chunk2->offset)
+ return 0;
+ return 1;
+}
+
+/* Move invalid(overlap with good chunks) rebuild chunks to bad chunk list */
+static void validate_rebuild_chunks(struct recover_control *rc)
+{
+ struct chunk_record *good;
+ struct chunk_record *rebuild;
+ struct chunk_record *tmp;
+
+ list_for_each_entry_safe(rebuild, tmp, &rc->rebuild_chunks, list) {
+ list_for_each_entry(good, &rc->good_chunks, list) {
+ if (is_chunk_overlap(rebuild, good)) {
+ list_move_tail(&rebuild->list,
+ &rc->bad_chunks);
+ break;
+ }
+ }
+ }
+}
+
+/*
+ * Return 0 when successful, < 0 on error and > 0 if aborted by user
+ */
+int btrfs_recover_chunk_tree(char *path, int verbose, int yes)
+{
+ int ret = 0;
+ struct btrfs_root *root = NULL;
+ struct btrfs_trans_handle *trans;
+ struct recover_control rc;
+
+ init_recover_control(&rc, verbose, yes);
+
+ ret = recover_prepare(&rc, path);
+ if (ret) {
+ fprintf(stderr, "recover prepare error\n");
+ return ret;
+ }
+
+ ret = scan_devices(&rc);
+ if (ret) {
+ fprintf(stderr, "scan chunk headers error\n");
+ goto fail_rc;
+ }
+
+ if (cache_tree_empty(&rc.chunk) &&
+ cache_tree_empty(&rc.bg.tree) &&
+ cache_tree_empty(&rc.devext.tree)) {
+ fprintf(stderr, "no recoverable chunk\n");
+ goto fail_rc;
+ }
+
+ print_scan_result(&rc);
+
+ ret = check_chunks(&rc.chunk, &rc.bg, &rc.devext, &rc.good_chunks,
+ &rc.bad_chunks, &rc.rebuild_chunks, 1);
+ if (ret) {
+ if (!list_empty(&rc.bg.block_groups) ||
+ !list_empty(&rc.devext.no_chunk_orphans)) {
+ ret = btrfs_recover_chunks(&rc);
+ if (ret)
+ goto fail_rc;
+ }
+ } else {
+ print_check_result(&rc);
+ printf("Check chunks successfully with no orphans\n");
+ goto fail_rc;
+ }
+ validate_rebuild_chunks(&rc);
+ print_check_result(&rc);
+
+ root = open_ctree_with_broken_chunk(&rc);
+ if (IS_ERR(root)) {
+ fprintf(stderr, "open with broken chunk error\n");
+ ret = PTR_ERR(root);
+ goto fail_rc;
+ }
+
+ ret = check_all_chunks_by_metadata(&rc, root);
+ if (ret) {
+ fprintf(stderr, "The chunks in memory can not match the metadata of the fs. Repair failed.\n");
+ goto fail_close_ctree;
+ }
+
+ ret = btrfs_rebuild_ordered_data_chunk_stripes(&rc, root);
+ if (ret) {
+ fprintf(stderr, "Failed to rebuild ordered chunk stripes.\n");
+ goto fail_close_ctree;
+ }
+
+ if (!rc.yes) {
+ ret = ask_user("We are going to rebuild the chunk tree on disk, it might destroy the old metadata on the disk, Are you sure?");
+ if (!ret) {
+ ret = 1;
+ goto fail_close_ctree;
+ }
+ }
+
+ trans = btrfs_start_transaction(root, 1);
+ ret = remove_chunk_extent_item(trans, &rc, root);
+ BUG_ON(ret);
+
+ ret = rebuild_chunk_tree(trans, &rc, root);
+ BUG_ON(ret);
+
+ ret = rebuild_sys_array(&rc, root);
+ BUG_ON(ret);
+
+ ret = rebuild_block_group(trans, &rc, root);
+ if (ret) {
+ printf("Fail to rebuild block groups.\n");
+ printf("Recommend to run 'btrfs check --init-extent-tree <dev>' after recovery\n");
+ }
+
+ btrfs_commit_transaction(trans, root);
+fail_close_ctree:
+ close_ctree(root);
+fail_rc:
+ free_recover_control(&rc);
+ return ret;
+}
diff --git a/cmds-balance.c b/cmds-balance.c
new file mode 100644
index 00000000..e0d10aae
--- /dev/null
+++ b/cmds-balance.c
@@ -0,0 +1,857 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "ioctl.h"
+#include "volumes.h"
+
+#include "commands.h"
+#include "utils.h"
+
+static const char * const balance_cmd_group_usage[] = {
+ "btrfs balance <command> [options] <path>",
+ "btrfs balance <path>",
+ NULL
+};
+
+static int parse_one_profile(const char *profile, u64 *flags)
+{
+ if (!strcmp(profile, "raid0")) {
+ *flags |= BTRFS_BLOCK_GROUP_RAID0;
+ } else if (!strcmp(profile, "raid1")) {
+ *flags |= BTRFS_BLOCK_GROUP_RAID1;
+ } else if (!strcmp(profile, "raid10")) {
+ *flags |= BTRFS_BLOCK_GROUP_RAID10;
+ } else if (!strcmp(profile, "raid5")) {
+ *flags |= BTRFS_BLOCK_GROUP_RAID5;
+ } else if (!strcmp(profile, "raid6")) {
+ *flags |= BTRFS_BLOCK_GROUP_RAID6;
+ } else if (!strcmp(profile, "dup")) {
+ *flags |= BTRFS_BLOCK_GROUP_DUP;
+ } else if (!strcmp(profile, "single")) {
+ *flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+ } else {
+ error("unknown profile: %s", profile);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int parse_profiles(char *profiles, u64 *flags)
+{
+ char *this_char;
+ char *save_ptr = NULL; /* Satisfy static checkers */
+
+ for (this_char = strtok_r(profiles, "|", &save_ptr);
+ this_char != NULL;
+ this_char = strtok_r(NULL, "|", &save_ptr)) {
+ if (parse_one_profile(this_char, flags))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int parse_u64(const char *str, u64 *result)
+{
+ char *endptr;
+ u64 val;
+
+ val = strtoull(str, &endptr, 10);
+ if (*endptr)
+ return 1;
+
+ *result = val;
+ return 0;
+}
+
+/*
+ * Parse range that's missing some part that can be implicit:
+ * a..b - exact range, a can be equal to b
+ * a.. - implicitly unbounded maximum (end == (u64)-1)
+ * ..b - implicitly starting at 0
+ * a - invalid; unclear semantics, use parse_u64 instead
+ *
+ * Returned values are u64, value validation and interpretation should be done
+ * by the caller.
+ */
+static int parse_range(const char *range, u64 *start, u64 *end)
+{
+ char *dots;
+ char *endptr;
+ const char *rest;
+ int skipped = 0;
+
+ dots = strstr(range, "..");
+ if (!dots)
+ return 1;
+
+ rest = dots + 2;
+
+ if (!*rest) {
+ *end = (u64)-1;
+ skipped++;
+ } else {
+ *end = strtoull(rest, &endptr, 10);
+ if (*endptr)
+ return 1;
+ }
+ if (dots == range) {
+ *start = 0;
+ skipped++;
+ } else {
+ *start = strtoull(range, &endptr, 10);
+ if (*endptr != 0 && *endptr != '.')
+ return 1;
+ }
+
+ if (*start > *end) {
+ error("range %llu..%llu doesn't make sense",
+ (unsigned long long)*start,
+ (unsigned long long)*end);
+ return 1;
+ }
+
+ if (skipped <= 1)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Parse range and check if start < end
+ */
+static int parse_range_strict(const char *range, u64 *start, u64 *end)
+{
+ if (parse_range(range, start, end) == 0) {
+ if (*start >= *end) {
+ error("range %llu..%llu not allowed",
+ (unsigned long long)*start,
+ (unsigned long long)*end);
+ return 1;
+ }
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Convert 64bit range to 32bit with boundary checkso
+ */
+static int range_to_u32(u64 start, u64 end, u32 *start32, u32 *end32)
+{
+ if (start > (u32)-1)
+ return 1;
+
+ if (end != (u64)-1 && end > (u32)-1)
+ return 1;
+
+ *start32 = (u32)start;
+ *end32 = (u32)end;
+
+ return 0;
+}
+
+__attribute__ ((unused))
+static int parse_range_u32(const char *range, u32 *start, u32 *end)
+{
+ u64 tmp_start;
+ u64 tmp_end;
+
+ if (parse_range(range, &tmp_start, &tmp_end))
+ return 1;
+
+ if (range_to_u32(tmp_start, tmp_end, start, end))
+ return 1;
+
+ return 0;
+}
+
+__attribute__ ((unused))
+static void print_range(u64 start, u64 end)
+{
+ if (start)
+ printf("%llu", (unsigned long long)start);
+ printf("..");
+ if (end != (u64)-1)
+ printf("%llu", (unsigned long long)end);
+}
+
+__attribute__ ((unused))
+static void print_range_u32(u32 start, u32 end)
+{
+ if (start)
+ printf("%u", start);
+ printf("..");
+ if (end != (u32)-1)
+ printf("%u", end);
+}
+
+static int parse_filters(char *filters, struct btrfs_balance_args *args)
+{
+ char *this_char;
+ char *value;
+ char *save_ptr = NULL; /* Satisfy static checkers */
+
+ if (!filters)
+ return 0;
+
+ for (this_char = strtok_r(filters, ",", &save_ptr);
+ this_char != NULL;
+ this_char = strtok_r(NULL, ",", &save_ptr)) {
+ if ((value = strchr(this_char, '=')) != NULL)
+ *value++ = 0;
+ if (!strcmp(this_char, "profiles")) {
+ if (!value || !*value) {
+ error("the profiles filter requires an argument");
+ return 1;
+ }
+ if (parse_profiles(value, &args->profiles)) {
+ error("invalid profiles argument");
+ return 1;
+ }
+ args->flags |= BTRFS_BALANCE_ARGS_PROFILES;
+ } else if (!strcmp(this_char, "usage")) {
+ if (!value || !*value) {
+ error("the usage filter requires an argument");
+ return 1;
+ }
+ if (parse_u64(value, &args->usage)) {
+ if (parse_range_u32(value, &args->usage_min,
+ &args->usage_max)) {
+ error("invalid usage argument: %s",
+ value);
+ return 1;
+ }
+ if (args->usage_max > 100) {
+ error("invalid usage argument: %s",
+ value);
+ }
+ args->flags &= ~BTRFS_BALANCE_ARGS_USAGE;
+ args->flags |= BTRFS_BALANCE_ARGS_USAGE_RANGE;
+ } else {
+ if (args->usage > 100) {
+ error("invalid usage argument: %s",
+ value);
+ return 1;
+ }
+ args->flags &= ~BTRFS_BALANCE_ARGS_USAGE_RANGE;
+ args->flags |= BTRFS_BALANCE_ARGS_USAGE;
+ }
+ args->flags |= BTRFS_BALANCE_ARGS_USAGE;
+ } else if (!strcmp(this_char, "devid")) {
+ if (!value || !*value) {
+ error("the devid filter requires an argument");
+ return 1;
+ }
+ if (parse_u64(value, &args->devid) || args->devid == 0) {
+ error("invalid devid argument: %s", value);
+ return 1;
+ }
+ args->flags |= BTRFS_BALANCE_ARGS_DEVID;
+ } else if (!strcmp(this_char, "drange")) {
+ if (!value || !*value) {
+ error("the drange filter requires an argument");
+ return 1;
+ }
+ if (parse_range_strict(value, &args->pstart, &args->pend)) {
+ error("invalid drange argument");
+ return 1;
+ }
+ args->flags |= BTRFS_BALANCE_ARGS_DRANGE;
+ } else if (!strcmp(this_char, "vrange")) {
+ if (!value || !*value) {
+ error("the vrange filter requires an argument");
+ return 1;
+ }
+ if (parse_range_strict(value, &args->vstart, &args->vend)) {
+ error("invalid vrange argument");
+ return 1;
+ }
+ args->flags |= BTRFS_BALANCE_ARGS_VRANGE;
+ } else if (!strcmp(this_char, "convert")) {
+ if (!value || !*value) {
+ error("the convert option requires an argument");
+ return 1;
+ }
+ if (parse_one_profile(value, &args->target)) {
+ error("invalid convert argument");
+ return 1;
+ }
+ args->flags |= BTRFS_BALANCE_ARGS_CONVERT;
+ } else if (!strcmp(this_char, "soft")) {
+ args->flags |= BTRFS_BALANCE_ARGS_SOFT;
+ } else if (!strcmp(this_char, "limit")) {
+ if (!value || !*value) {
+ error("the limit filter requires an argument");
+ return 1;
+ }
+ if (parse_u64(value, &args->limit)) {
+ if (parse_range_u32(value, &args->limit_min,
+ &args->limit_max)) {
+ error("Invalid limit argument: %s",
+ value);
+ return 1;
+ }
+ args->flags &= ~BTRFS_BALANCE_ARGS_LIMIT;
+ args->flags |= BTRFS_BALANCE_ARGS_LIMIT_RANGE;
+ } else {
+ args->flags &= ~BTRFS_BALANCE_ARGS_LIMIT_RANGE;
+ args->flags |= BTRFS_BALANCE_ARGS_LIMIT;
+ }
+ } else if (!strcmp(this_char, "stripes")) {
+ if (!value || !*value) {
+ error("the stripes filter requires an argument");
+ return 1;
+ }
+ if (parse_range_u32(value, &args->stripes_min,
+ &args->stripes_max)) {
+ error("invalid stripes argument");
+ return 1;
+ }
+ args->flags |= BTRFS_BALANCE_ARGS_STRIPES_RANGE;
+ } else {
+ error("unrecognized balance option: %s", this_char);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static void dump_balance_args(struct btrfs_balance_args *args)
+{
+ if (args->flags & BTRFS_BALANCE_ARGS_CONVERT) {
+ printf("converting, target=%llu, soft is %s",
+ (unsigned long long)args->target,
+ (args->flags & BTRFS_BALANCE_ARGS_SOFT) ? "on" : "off");
+ } else {
+ printf("balancing");
+ }
+
+ if (args->flags & BTRFS_BALANCE_ARGS_PROFILES)
+ printf(", profiles=%llu", (unsigned long long)args->profiles);
+ if (args->flags & BTRFS_BALANCE_ARGS_USAGE)
+ printf(", usage=%llu", (unsigned long long)args->usage);
+ if (args->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) {
+ printf(", usage=");
+ print_range_u32(args->usage_min, args->usage_max);
+ }
+ if (args->flags & BTRFS_BALANCE_ARGS_DEVID)
+ printf(", devid=%llu", (unsigned long long)args->devid);
+ if (args->flags & BTRFS_BALANCE_ARGS_DRANGE)
+ printf(", drange=%llu..%llu",
+ (unsigned long long)args->pstart,
+ (unsigned long long)args->pend);
+ if (args->flags & BTRFS_BALANCE_ARGS_VRANGE)
+ printf(", vrange=%llu..%llu",
+ (unsigned long long)args->vstart,
+ (unsigned long long)args->vend);
+ if (args->flags & BTRFS_BALANCE_ARGS_LIMIT)
+ printf(", limit=%llu", (unsigned long long)args->limit);
+ if (args->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE) {
+ printf(", limit=");
+ print_range_u32(args->limit_min, args->limit_max);
+ }
+ if (args->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) {
+ printf(", stripes=");
+ print_range_u32(args->stripes_min, args->stripes_max);
+ }
+
+ printf("\n");
+}
+
+static void dump_ioctl_balance_args(struct btrfs_ioctl_balance_args *args)
+{
+ printf("Dumping filters: flags 0x%llx, state 0x%llx, force is %s\n",
+ (unsigned long long)args->flags, (unsigned long long)args->state,
+ (args->flags & BTRFS_BALANCE_FORCE) ? "on" : "off");
+ if (args->flags & BTRFS_BALANCE_DATA) {
+ printf(" DATA (flags 0x%llx): ",
+ (unsigned long long)args->data.flags);
+ dump_balance_args(&args->data);
+ }
+ if (args->flags & BTRFS_BALANCE_METADATA) {
+ printf(" METADATA (flags 0x%llx): ",
+ (unsigned long long)args->meta.flags);
+ dump_balance_args(&args->meta);
+ }
+ if (args->flags & BTRFS_BALANCE_SYSTEM) {
+ printf(" SYSTEM (flags 0x%llx): ",
+ (unsigned long long)args->sys.flags);
+ dump_balance_args(&args->sys);
+ }
+}
+
+static int do_balance_v1(int fd)
+{
+ struct btrfs_ioctl_vol_args args;
+ int ret;
+
+ memset(&args, 0, sizeof(args));
+ ret = ioctl(fd, BTRFS_IOC_BALANCE, &args);
+ return ret;
+}
+
+static int do_balance(const char *path, struct btrfs_ioctl_balance_args *args,
+ int nofilters)
+{
+ int fd;
+ int ret;
+ int e;
+ DIR *dirstream = NULL;
+
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ ret = ioctl(fd, BTRFS_IOC_BALANCE_V2, args);
+ e = errno;
+
+ if (ret < 0) {
+ /*
+ * older kernels don't have the new balance ioctl, try the
+ * old one. But, the old one doesn't know any filters, so
+ * don't fall back if they tried to use the fancy new things
+ */
+ if (e == ENOTTY && nofilters) {
+ ret = do_balance_v1(fd);
+ if (ret == 0)
+ goto out;
+ e = errno;
+ }
+
+ if (e == ECANCELED) {
+ if (args->state & BTRFS_BALANCE_STATE_PAUSE_REQ)
+ fprintf(stderr, "balance paused by user\n");
+ if (args->state & BTRFS_BALANCE_STATE_CANCEL_REQ)
+ fprintf(stderr, "balance canceled by user\n");
+ ret = 0;
+ } else {
+ error("error during balancing '%s': %s", path, strerror(e));
+ if (e != EINPROGRESS)
+ fprintf(stderr,
+ "There may be more info in syslog - try dmesg | tail\n");
+ ret = 1;
+ }
+ } else {
+ printf("Done, had to relocate %llu out of %llu chunks\n",
+ (unsigned long long)args->stat.completed,
+ (unsigned long long)args->stat.considered);
+ ret = 0;
+ }
+
+out:
+ close_file_or_dir(fd, dirstream);
+ return ret;
+}
+
+static const char * const cmd_balance_start_usage[] = {
+ "btrfs balance start [options] <path>",
+ "Balance chunks across the devices",
+ "Balance and/or convert (change allocation profile of) chunks that",
+ "passed all filters in a comma-separated list of filters for a",
+ "particular chunk type. If filter list is not given balance all",
+ "chunks of that type. In case none of the -d, -m or -s options is",
+ "given balance all chunks in a filesystem.",
+ "",
+ "-d[filters] act on data chunks",
+ "-m[filters] act on metadata chunks",
+ "-s[filters] act on system chunks (only under -f)",
+ "-v be verbose",
+ "-f force reducing of metadata integrity",
+ NULL
+};
+
+static int cmd_balance_start(int argc, char **argv)
+{
+ struct btrfs_ioctl_balance_args args;
+ struct btrfs_balance_args *ptrs[] = { &args.data, &args.sys,
+ &args.meta, NULL };
+ int force = 0;
+ int verbose = 0;
+ int nofilters = 1;
+ int i;
+
+ memset(&args, 0, sizeof(args));
+
+ optind = 1;
+ while (1) {
+ static const struct option longopts[] = {
+ { "data", optional_argument, NULL, 'd'},
+ { "metadata", optional_argument, NULL, 'm' },
+ { "system", optional_argument, NULL, 's' },
+ { "force", no_argument, NULL, 'f' },
+ { "verbose", no_argument, NULL, 'v' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ int opt = getopt_long(argc, argv, "d::s::m::fv", longopts, NULL);
+ if (opt < 0)
+ break;
+
+ switch (opt) {
+ case 'd':
+ nofilters = 0;
+ args.flags |= BTRFS_BALANCE_DATA;
+
+ if (parse_filters(optarg, &args.data))
+ return 1;
+ break;
+ case 's':
+ nofilters = 0;
+ args.flags |= BTRFS_BALANCE_SYSTEM;
+
+ if (parse_filters(optarg, &args.sys))
+ return 1;
+ break;
+ case 'm':
+ nofilters = 0;
+ args.flags |= BTRFS_BALANCE_METADATA;
+
+ if (parse_filters(optarg, &args.meta))
+ return 1;
+ break;
+ case 'f':
+ force = 1;
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ default:
+ usage(cmd_balance_start_usage);
+ }
+ }
+
+ if (check_argc_exact(argc - optind, 1))
+ usage(cmd_balance_start_usage);
+
+ /*
+ * allow -s only under --force, otherwise do with system chunks
+ * the same thing we were ordered to do with meta chunks
+ */
+ if (args.flags & BTRFS_BALANCE_SYSTEM) {
+ if (!force) {
+ error(
+ "Refusing to explicitly operate on system chunks.\n"
+ "Pass --force if you really want to do that.");
+ return 1;
+ }
+ } else if (args.flags & BTRFS_BALANCE_METADATA) {
+ args.flags |= BTRFS_BALANCE_SYSTEM;
+ memcpy(&args.sys, &args.meta,
+ sizeof(struct btrfs_balance_args));
+ }
+
+ if (nofilters) {
+ /* relocate everything - no filters */
+ args.flags |= BTRFS_BALANCE_TYPE_MASK;
+ }
+
+ /* drange makes sense only when devid is set */
+ for (i = 0; ptrs[i]; i++) {
+ if ((ptrs[i]->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
+ !(ptrs[i]->flags & BTRFS_BALANCE_ARGS_DEVID)) {
+ error("drange filter must be used with devid filter");
+ return 1;
+ }
+ }
+
+ /* soft makes sense only when convert for corresponding type is set */
+ for (i = 0; ptrs[i]; i++) {
+ if ((ptrs[i]->flags & BTRFS_BALANCE_ARGS_SOFT) &&
+ !(ptrs[i]->flags & BTRFS_BALANCE_ARGS_CONVERT)) {
+ error("'soft' option can be used only when converting profiles");
+ return 1;
+ }
+ }
+
+ if (force)
+ args.flags |= BTRFS_BALANCE_FORCE;
+ if (verbose)
+ dump_ioctl_balance_args(&args);
+
+ return do_balance(argv[optind], &args, nofilters);
+}
+
+static const char * const cmd_balance_pause_usage[] = {
+ "btrfs balance pause <path>",
+ "Pause running balance",
+ NULL
+};
+
+static int cmd_balance_pause(int argc, char **argv)
+{
+ const char *path;
+ int fd;
+ int ret;
+ int e;
+ DIR *dirstream = NULL;
+
+ if (check_argc_exact(argc, 2))
+ usage(cmd_balance_pause_usage);
+
+ path = argv[1];
+
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ ret = ioctl(fd, BTRFS_IOC_BALANCE_CTL, BTRFS_BALANCE_CTL_PAUSE);
+ e = errno;
+ close_file_or_dir(fd, dirstream);
+
+ if (ret < 0) {
+ error("balance pause on '%s' failed: %s", path,
+ (e == ENOTCONN) ? "Not running" : strerror(e));
+ if (e == ENOTCONN)
+ return 2;
+ else
+ return 1;
+ }
+
+ return 0;
+}
+
+static const char * const cmd_balance_cancel_usage[] = {
+ "btrfs balance cancel <path>",
+ "Cancel running or paused balance",
+ NULL
+};
+
+static int cmd_balance_cancel(int argc, char **argv)
+{
+ const char *path;
+ int fd;
+ int ret;
+ int e;
+ DIR *dirstream = NULL;
+
+ if (check_argc_exact(argc, 2))
+ usage(cmd_balance_cancel_usage);
+
+ path = argv[1];
+
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ ret = ioctl(fd, BTRFS_IOC_BALANCE_CTL, BTRFS_BALANCE_CTL_CANCEL);
+ e = errno;
+ close_file_or_dir(fd, dirstream);
+
+ if (ret < 0) {
+ error("balance cancel on '%s' failed: %s", path,
+ (e == ENOTCONN) ? "Not in progress" : strerror(e));
+ if (e == ENOTCONN)
+ return 2;
+ else
+ return 1;
+ }
+
+ return 0;
+}
+
+static const char * const cmd_balance_resume_usage[] = {
+ "btrfs balance resume <path>",
+ "Resume interrupted balance",
+ NULL
+};
+
+static int cmd_balance_resume(int argc, char **argv)
+{
+ struct btrfs_ioctl_balance_args args;
+ const char *path;
+ DIR *dirstream = NULL;
+ int fd;
+ int ret;
+ int e;
+
+ if (check_argc_exact(argc, 2))
+ usage(cmd_balance_resume_usage);
+
+ path = argv[1];
+
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ memset(&args, 0, sizeof(args));
+ args.flags |= BTRFS_BALANCE_RESUME;
+
+ ret = ioctl(fd, BTRFS_IOC_BALANCE_V2, &args);
+ e = errno;
+ close_file_or_dir(fd, dirstream);
+
+ if (ret < 0) {
+ if (e == ECANCELED) {
+ if (args.state & BTRFS_BALANCE_STATE_PAUSE_REQ)
+ fprintf(stderr, "balance paused by user\n");
+ if (args.state & BTRFS_BALANCE_STATE_CANCEL_REQ)
+ fprintf(stderr, "balance canceled by user\n");
+ } else if (e == ENOTCONN || e == EINPROGRESS) {
+ error("balance resume on '%s' failed: %s", path,
+ (e == ENOTCONN) ? "Not in progress" :
+ "Already running");
+ if (e == ENOTCONN)
+ return 2;
+ else
+ return 1;
+ } else {
+ error("error during balancing '%s': %s\n"
+ "There may be more info in syslog - try dmesg | tail",
+ path, strerror(e));
+ return 1;
+ }
+ } else {
+ printf("Done, had to relocate %llu out of %llu chunks\n",
+ (unsigned long long)args.stat.completed,
+ (unsigned long long)args.stat.considered);
+ }
+
+ return 0;
+}
+
+static const char * const cmd_balance_status_usage[] = {
+ "btrfs balance status [-v] <path>",
+ "Show status of running or paused balance",
+ "",
+ "-v be verbose",
+ NULL
+};
+
+/* Checks the status of the balance if any
+ * return codes:
+ * 2 : Error failed to know if there is any pending balance
+ * 1 : Successful to know status of a pending balance
+ * 0 : When there is no pending balance or completed
+ */
+static int cmd_balance_status(int argc, char **argv)
+{
+ struct btrfs_ioctl_balance_args args;
+ const char *path;
+ DIR *dirstream = NULL;
+ int fd;
+ int verbose = 0;
+ int ret;
+ int e;
+
+ optind = 1;
+ while (1) {
+ int opt;
+ static const struct option longopts[] = {
+ { "verbose", no_argument, NULL, 'v' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ opt = getopt_long(argc, argv, "v", longopts, NULL);
+ if (opt < 0)
+ break;
+
+ switch (opt) {
+ case 'v':
+ verbose = 1;
+ break;
+ default:
+ usage(cmd_balance_status_usage);
+ }
+ }
+
+ if (check_argc_exact(argc - optind, 1))
+ usage(cmd_balance_status_usage);
+
+ path = argv[optind];
+
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ return 2;
+
+ ret = ioctl(fd, BTRFS_IOC_BALANCE_PROGRESS, &args);
+ e = errno;
+ close_file_or_dir(fd, dirstream);
+
+ if (ret < 0) {
+ if (e == ENOTCONN) {
+ printf("No balance found on '%s'\n", path);
+ return 0;
+ }
+ error("balance status on '%s' failed: %s", path, strerror(e));
+ return 2;
+ }
+
+ if (args.state & BTRFS_BALANCE_STATE_RUNNING) {
+ printf("Balance on '%s' is running", path);
+ if (args.state & BTRFS_BALANCE_STATE_CANCEL_REQ)
+ printf(", cancel requested\n");
+ else if (args.state & BTRFS_BALANCE_STATE_PAUSE_REQ)
+ printf(", pause requested\n");
+ else
+ printf("\n");
+ } else {
+ printf("Balance on '%s' is paused\n", path);
+ }
+
+ printf("%llu out of about %llu chunks balanced (%llu considered), "
+ "%3.f%% left\n", (unsigned long long)args.stat.completed,
+ (unsigned long long)args.stat.expected,
+ (unsigned long long)args.stat.considered,
+ 100 * (1 - (float)args.stat.completed/args.stat.expected));
+
+ if (verbose)
+ dump_ioctl_balance_args(&args);
+
+ return 1;
+}
+
+static const char balance_cmd_group_info[] =
+"balance data accross devices, or change block groups using filters";
+
+const struct cmd_group balance_cmd_group = {
+ balance_cmd_group_usage, balance_cmd_group_info, {
+ { "start", cmd_balance_start, cmd_balance_start_usage, NULL, 0 },
+ { "pause", cmd_balance_pause, cmd_balance_pause_usage, NULL, 0 },
+ { "cancel", cmd_balance_cancel, cmd_balance_cancel_usage, NULL, 0 },
+ { "resume", cmd_balance_resume, cmd_balance_resume_usage, NULL, 0 },
+ { "status", cmd_balance_status, cmd_balance_status_usage, NULL, 0 },
+ NULL_CMD_STRUCT
+ }
+};
+
+int cmd_balance(int argc, char **argv)
+{
+ if (argc == 2) {
+ /* old 'btrfs filesystem balance <path>' syntax */
+ struct btrfs_ioctl_balance_args args;
+
+ memset(&args, 0, sizeof(args));
+ args.flags |= BTRFS_BALANCE_TYPE_MASK;
+
+ return do_balance(argv[1], &args, 1);
+ }
+
+ return handle_command_group(&balance_cmd_group, argc, argv);
+}
diff --git a/cmds-check.c b/cmds-check.c
new file mode 100644
index 00000000..0165fba4
--- /dev/null
+++ b/cmds-check.c
@@ -0,0 +1,9844 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <uuid/uuid.h>
+#include "ctree.h"
+#include "volumes.h"
+#include "repair.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "task-utils.h"
+#include "transaction.h"
+#include "utils.h"
+#include "commands.h"
+#include "free-space-cache.h"
+#include "free-space-tree.h"
+#include "btrfsck.h"
+#include "qgroup-verify.h"
+#include "rbtree-utils.h"
+#include "backref.h"
+#include "ulist.h"
+
+enum task_position {
+ TASK_EXTENTS,
+ TASK_FREE_SPACE,
+ TASK_FS_ROOTS,
+ TASK_NOTHING, /* have to be the last element */
+};
+
+struct task_ctx {
+ int progress_enabled;
+ enum task_position tp;
+
+ struct task_info *info;
+};
+
+static u64 bytes_used = 0;
+static u64 total_csum_bytes = 0;
+static u64 total_btree_bytes = 0;
+static u64 total_fs_tree_bytes = 0;
+static u64 total_extent_tree_bytes = 0;
+static u64 btree_space_waste = 0;
+static u64 data_bytes_allocated = 0;
+static u64 data_bytes_referenced = 0;
+static int found_old_backref = 0;
+static LIST_HEAD(duplicate_extents);
+static LIST_HEAD(delete_items);
+static int repair = 0;
+static int no_holes = 0;
+static int init_extent_tree = 0;
+static int check_data_csum = 0;
+static struct btrfs_fs_info *global_info;
+static struct task_ctx ctx = { 0 };
+
+static void *print_status_check(void *p)
+{
+ struct task_ctx *priv = p;
+ const char work_indicator[] = { '.', 'o', 'O', 'o' };
+ uint32_t count = 0;
+ static char *task_position_string[] = {
+ "checking extents",
+ "checking free space cache",
+ "checking fs roots",
+ };
+
+ task_period_start(priv->info, 1000 /* 1s */);
+
+ if (priv->tp == TASK_NOTHING)
+ return NULL;
+
+ while (1) {
+ printf("%s [%c]\r", task_position_string[priv->tp],
+ work_indicator[count % 4]);
+ count++;
+ fflush(stdout);
+ task_period_wait(priv->info);
+ }
+ return NULL;
+}
+
+static int print_status_return(void *p)
+{
+ printf("\n");
+ fflush(stdout);
+
+ return 0;
+}
+
+struct extent_backref {
+ struct list_head list;
+ unsigned int is_data:1;
+ unsigned int found_extent_tree:1;
+ unsigned int full_backref:1;
+ unsigned int found_ref:1;
+ unsigned int broken:1;
+};
+
+struct data_backref {
+ struct extent_backref node;
+ union {
+ u64 parent;
+ u64 root;
+ };
+ u64 owner;
+ u64 offset;
+ u64 disk_bytenr;
+ u64 bytes;
+ u64 ram_bytes;
+ u32 num_refs;
+ u32 found_ref;
+};
+
+/*
+ * Much like data_backref, just removed the undetermined members
+ * and change it to use list_head.
+ * During extent scan, it is stored in root->orphan_data_extent.
+ * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
+ */
+struct orphan_data_extent {
+ struct list_head list;
+ u64 root;
+ u64 objectid;
+ u64 offset;
+ u64 disk_bytenr;
+ u64 disk_len;
+};
+
+struct tree_backref {
+ struct extent_backref node;
+ union {
+ u64 parent;
+ u64 root;
+ };
+};
+
+struct extent_record {
+ struct list_head backrefs;
+ struct list_head dups;
+ struct list_head list;
+ struct cache_extent cache;
+ struct btrfs_disk_key parent_key;
+ u64 start;
+ u64 max_size;
+ u64 nr;
+ u64 refs;
+ u64 extent_item_refs;
+ u64 generation;
+ u64 parent_generation;
+ u64 info_objectid;
+ u32 num_duplicates;
+ u8 info_level;
+ int flag_block_full_backref;
+ unsigned int found_rec:1;
+ unsigned int content_checked:1;
+ unsigned int owner_ref_checked:1;
+ unsigned int is_root:1;
+ unsigned int metadata:1;
+ unsigned int bad_full_backref:1;
+ unsigned int crossing_stripes:1;
+ unsigned int wrong_chunk_type:1;
+};
+
+struct inode_backref {
+ struct list_head list;
+ unsigned int found_dir_item:1;
+ unsigned int found_dir_index:1;
+ unsigned int found_inode_ref:1;
+ unsigned int filetype:8;
+ int errors;
+ unsigned int ref_type;
+ u64 dir;
+ u64 index;
+ u16 namelen;
+ char name[0];
+};
+
+struct root_item_record {
+ struct list_head list;
+ u64 objectid;
+ u64 bytenr;
+ u64 last_snapshot;
+ u8 level;
+ u8 drop_level;
+ int level_size;
+ struct btrfs_key drop_key;
+};
+
+#define REF_ERR_NO_DIR_ITEM (1 << 0)
+#define REF_ERR_NO_DIR_INDEX (1 << 1)
+#define REF_ERR_NO_INODE_REF (1 << 2)
+#define REF_ERR_DUP_DIR_ITEM (1 << 3)
+#define REF_ERR_DUP_DIR_INDEX (1 << 4)
+#define REF_ERR_DUP_INODE_REF (1 << 5)
+#define REF_ERR_INDEX_UNMATCH (1 << 6)
+#define REF_ERR_FILETYPE_UNMATCH (1 << 7)
+#define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
+#define REF_ERR_NO_ROOT_REF (1 << 9)
+#define REF_ERR_NO_ROOT_BACKREF (1 << 10)
+#define REF_ERR_DUP_ROOT_REF (1 << 11)
+#define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
+
+struct file_extent_hole {
+ struct rb_node node;
+ u64 start;
+ u64 len;
+};
+
+/* Compatible function to allow reuse of old codes */
+static u64 first_extent_gap(struct rb_root *holes)
+{
+ struct file_extent_hole *hole;
+
+ if (RB_EMPTY_ROOT(holes))
+ return (u64)-1;
+
+ hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
+ return hole->start;
+}
+
+static int compare_hole(struct rb_node *node1, struct rb_node *node2)
+{
+ struct file_extent_hole *hole1;
+ struct file_extent_hole *hole2;
+
+ hole1 = rb_entry(node1, struct file_extent_hole, node);
+ hole2 = rb_entry(node2, struct file_extent_hole, node);
+
+ if (hole1->start > hole2->start)
+ return -1;
+ if (hole1->start < hole2->start)
+ return 1;
+ /* Now hole1->start == hole2->start */
+ if (hole1->len >= hole2->len)
+ /*
+ * Hole 1 will be merge center
+ * Same hole will be merged later
+ */
+ return -1;
+ /* Hole 2 will be merge center */
+ return 1;
+}
+
+/*
+ * Add a hole to the record
+ *
+ * This will do hole merge for copy_file_extent_holes(),
+ * which will ensure there won't be continuous holes.
+ */
+static int add_file_extent_hole(struct rb_root *holes,
+ u64 start, u64 len)
+{
+ struct file_extent_hole *hole;
+ struct file_extent_hole *prev = NULL;
+ struct file_extent_hole *next = NULL;
+
+ hole = malloc(sizeof(*hole));
+ if (!hole)
+ return -ENOMEM;
+ hole->start = start;
+ hole->len = len;
+ /* Since compare will not return 0, no -EEXIST will happen */
+ rb_insert(holes, &hole->node, compare_hole);
+
+ /* simple merge with previous hole */
+ if (rb_prev(&hole->node))
+ prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
+ node);
+ if (prev && prev->start + prev->len >= hole->start) {
+ hole->len = hole->start + hole->len - prev->start;
+ hole->start = prev->start;
+ rb_erase(&prev->node, holes);
+ free(prev);
+ prev = NULL;
+ }
+
+ /* iterate merge with next holes */
+ while (1) {
+ if (!rb_next(&hole->node))
+ break;
+ next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
+ node);
+ if (hole->start + hole->len >= next->start) {
+ if (hole->start + hole->len <= next->start + next->len)
+ hole->len = next->start + next->len -
+ hole->start;
+ rb_erase(&next->node, holes);
+ free(next);
+ next = NULL;
+ } else
+ break;
+ }
+ return 0;
+}
+
+static int compare_hole_range(struct rb_node *node, void *data)
+{
+ struct file_extent_hole *hole;
+ u64 start;
+
+ hole = (struct file_extent_hole *)data;
+ start = hole->start;
+
+ hole = rb_entry(node, struct file_extent_hole, node);
+ if (start < hole->start)
+ return -1;
+ if (start >= hole->start && start < hole->start + hole->len)
+ return 0;
+ return 1;
+}
+
+/*
+ * Delete a hole in the record
+ *
+ * This will do the hole split and is much restrict than add.
+ */
+static int del_file_extent_hole(struct rb_root *holes,
+ u64 start, u64 len)
+{
+ struct file_extent_hole *hole;
+ struct file_extent_hole tmp;
+ u64 prev_start = 0;
+ u64 prev_len = 0;
+ u64 next_start = 0;
+ u64 next_len = 0;
+ struct rb_node *node;
+ int have_prev = 0;
+ int have_next = 0;
+ int ret = 0;
+
+ tmp.start = start;
+ tmp.len = len;
+ node = rb_search(holes, &tmp, compare_hole_range, NULL);
+ if (!node)
+ return -EEXIST;
+ hole = rb_entry(node, struct file_extent_hole, node);
+ if (start + len > hole->start + hole->len)
+ return -EEXIST;
+
+ /*
+ * Now there will be no overflap, delete the hole and re-add the
+ * split(s) if they exists.
+ */
+ if (start > hole->start) {
+ prev_start = hole->start;
+ prev_len = start - hole->start;
+ have_prev = 1;
+ }
+ if (hole->start + hole->len > start + len) {
+ next_start = start + len;
+ next_len = hole->start + hole->len - start - len;
+ have_next = 1;
+ }
+ rb_erase(node, holes);
+ free(hole);
+ if (have_prev) {
+ ret = add_file_extent_hole(holes, prev_start, prev_len);
+ if (ret < 0)
+ return ret;
+ }
+ if (have_next) {
+ ret = add_file_extent_hole(holes, next_start, next_len);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+static int copy_file_extent_holes(struct rb_root *dst,
+ struct rb_root *src)
+{
+ struct file_extent_hole *hole;
+ struct rb_node *node;
+ int ret = 0;
+
+ node = rb_first(src);
+ while (node) {
+ hole = rb_entry(node, struct file_extent_hole, node);
+ ret = add_file_extent_hole(dst, hole->start, hole->len);
+ if (ret)
+ break;
+ node = rb_next(node);
+ }
+ return ret;
+}
+
+static void free_file_extent_holes(struct rb_root *holes)
+{
+ struct rb_node *node;
+ struct file_extent_hole *hole;
+
+ node = rb_first(holes);
+ while (node) {
+ hole = rb_entry(node, struct file_extent_hole, node);
+ rb_erase(node, holes);
+ free(hole);
+ node = rb_first(holes);
+ }
+}
+
+struct inode_record {
+ struct list_head backrefs;
+ unsigned int checked:1;
+ unsigned int merging:1;
+ unsigned int found_inode_item:1;
+ unsigned int found_dir_item:1;
+ unsigned int found_file_extent:1;
+ unsigned int found_csum_item:1;
+ unsigned int some_csum_missing:1;
+ unsigned int nodatasum:1;
+ int errors;
+
+ u64 ino;
+ u32 nlink;
+ u32 imode;
+ u64 isize;
+ u64 nbytes;
+
+ u32 found_link;
+ u64 found_size;
+ u64 extent_start;
+ u64 extent_end;
+ struct rb_root holes;
+ struct list_head orphan_extents;
+
+ u32 refs;
+};
+
+#define I_ERR_NO_INODE_ITEM (1 << 0)
+#define I_ERR_NO_ORPHAN_ITEM (1 << 1)
+#define I_ERR_DUP_INODE_ITEM (1 << 2)
+#define I_ERR_DUP_DIR_INDEX (1 << 3)
+#define I_ERR_ODD_DIR_ITEM (1 << 4)
+#define I_ERR_ODD_FILE_EXTENT (1 << 5)
+#define I_ERR_BAD_FILE_EXTENT (1 << 6)
+#define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
+#define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
+#define I_ERR_DIR_ISIZE_WRONG (1 << 9)
+#define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
+#define I_ERR_ODD_CSUM_ITEM (1 << 11)
+#define I_ERR_SOME_CSUM_MISSING (1 << 12)
+#define I_ERR_LINK_COUNT_WRONG (1 << 13)
+#define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
+
+struct root_backref {
+ struct list_head list;
+ unsigned int found_dir_item:1;
+ unsigned int found_dir_index:1;
+ unsigned int found_back_ref:1;
+ unsigned int found_forward_ref:1;
+ unsigned int reachable:1;
+ int errors;
+ u64 ref_root;
+ u64 dir;
+ u64 index;
+ u16 namelen;
+ char name[0];
+};
+
+struct root_record {
+ struct list_head backrefs;
+ struct cache_extent cache;
+ unsigned int found_root_item:1;
+ u64 objectid;
+ u32 found_ref;
+};
+
+struct ptr_node {
+ struct cache_extent cache;
+ void *data;
+};
+
+struct shared_node {
+ struct cache_extent cache;
+ struct cache_tree root_cache;
+ struct cache_tree inode_cache;
+ struct inode_record *current;
+ u32 refs;
+};
+
+struct block_info {
+ u64 start;
+ u32 size;
+};
+
+struct walk_control {
+ struct cache_tree shared;
+ struct shared_node *nodes[BTRFS_MAX_LEVEL];
+ int active_node;
+ int root_level;
+};
+
+struct bad_item {
+ struct btrfs_key key;
+ u64 root_id;
+ struct list_head list;
+};
+
+static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
+
+static void record_root_in_trans(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ if (root->last_trans != trans->transid) {
+ root->track_dirty = 1;
+ root->last_trans = trans->transid;
+ root->commit_root = root->node;
+ extent_buffer_get(root->node);
+ }
+}
+
+static u8 imode_to_type(u32 imode)
+{
+#define S_SHIFT 12
+ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
+ [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
+ [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
+ [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
+ [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
+ [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
+ [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
+ [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
+ };
+
+ return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
+#undef S_SHIFT
+}
+
+static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
+{
+ struct device_record *rec1;
+ struct device_record *rec2;
+
+ rec1 = rb_entry(node1, struct device_record, node);
+ rec2 = rb_entry(node2, struct device_record, node);
+ if (rec1->devid > rec2->devid)
+ return -1;
+ else if (rec1->devid < rec2->devid)
+ return 1;
+ else
+ return 0;
+}
+
+static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
+{
+ struct inode_record *rec;
+ struct inode_backref *backref;
+ struct inode_backref *orig;
+ struct inode_backref *tmp;
+ struct orphan_data_extent *src_orphan;
+ struct orphan_data_extent *dst_orphan;
+ size_t size;
+ int ret;
+
+ rec = malloc(sizeof(*rec));
+ if (!rec)
+ return ERR_PTR(-ENOMEM);
+ memcpy(rec, orig_rec, sizeof(*rec));
+ rec->refs = 1;
+ INIT_LIST_HEAD(&rec->backrefs);
+ INIT_LIST_HEAD(&rec->orphan_extents);
+ rec->holes = RB_ROOT;
+
+ list_for_each_entry(orig, &orig_rec->backrefs, list) {
+ size = sizeof(*orig) + orig->namelen + 1;
+ backref = malloc(size);
+ if (!backref) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+ memcpy(backref, orig, size);
+ list_add_tail(&backref->list, &rec->backrefs);
+ }
+ list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
+ dst_orphan = malloc(sizeof(*dst_orphan));
+ if (!dst_orphan) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+ memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
+ list_add_tail(&dst_orphan->list, &rec->orphan_extents);
+ }
+ ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
+ BUG_ON(ret < 0);
+
+ return rec;
+
+cleanup:
+ if (!list_empty(&rec->backrefs))
+ list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
+ list_del(&orig->list);
+ free(orig);
+ }
+
+ if (!list_empty(&rec->orphan_extents))
+ list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
+ list_del(&orig->list);
+ free(orig);
+ }
+
+ free(rec);
+
+ return ERR_PTR(ret);
+}
+
+static void print_orphan_data_extents(struct list_head *orphan_extents,
+ u64 objectid)
+{
+ struct orphan_data_extent *orphan;
+
+ if (list_empty(orphan_extents))
+ return;
+ printf("The following data extent is lost in tree %llu:\n",
+ objectid);
+ list_for_each_entry(orphan, orphan_extents, list) {
+ printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
+ orphan->objectid, orphan->offset, orphan->disk_bytenr,
+ orphan->disk_len);
+ }
+}
+
+static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
+{
+ u64 root_objectid = root->root_key.objectid;
+ int errors = rec->errors;
+
+ if (!errors)
+ return;
+ /* reloc root errors, we print its corresponding fs root objectid*/
+ if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
+ root_objectid = root->root_key.offset;
+ fprintf(stderr, "reloc");
+ }
+ fprintf(stderr, "root %llu inode %llu errors %x",
+ (unsigned long long) root_objectid,
+ (unsigned long long) rec->ino, rec->errors);
+
+ if (errors & I_ERR_NO_INODE_ITEM)
+ fprintf(stderr, ", no inode item");
+ if (errors & I_ERR_NO_ORPHAN_ITEM)
+ fprintf(stderr, ", no orphan item");
+ if (errors & I_ERR_DUP_INODE_ITEM)
+ fprintf(stderr, ", dup inode item");
+ if (errors & I_ERR_DUP_DIR_INDEX)
+ fprintf(stderr, ", dup dir index");
+ if (errors & I_ERR_ODD_DIR_ITEM)
+ fprintf(stderr, ", odd dir item");
+ if (errors & I_ERR_ODD_FILE_EXTENT)
+ fprintf(stderr, ", odd file extent");
+ if (errors & I_ERR_BAD_FILE_EXTENT)
+ fprintf(stderr, ", bad file extent");
+ if (errors & I_ERR_FILE_EXTENT_OVERLAP)
+ fprintf(stderr, ", file extent overlap");
+ if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
+ fprintf(stderr, ", file extent discount");
+ if (errors & I_ERR_DIR_ISIZE_WRONG)
+ fprintf(stderr, ", dir isize wrong");
+ if (errors & I_ERR_FILE_NBYTES_WRONG)
+ fprintf(stderr, ", nbytes wrong");
+ if (errors & I_ERR_ODD_CSUM_ITEM)
+ fprintf(stderr, ", odd csum item");
+ if (errors & I_ERR_SOME_CSUM_MISSING)
+ fprintf(stderr, ", some csum missing");
+ if (errors & I_ERR_LINK_COUNT_WRONG)
+ fprintf(stderr, ", link count wrong");
+ if (errors & I_ERR_FILE_EXTENT_ORPHAN)
+ fprintf(stderr, ", orphan file extent");
+ fprintf(stderr, "\n");
+ /* Print the orphan extents if needed */
+ if (errors & I_ERR_FILE_EXTENT_ORPHAN)
+ print_orphan_data_extents(&rec->orphan_extents, root->objectid);
+
+ /* Print the holes if needed */
+ if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
+ struct file_extent_hole *hole;
+ struct rb_node *node;
+ int found = 0;
+
+ node = rb_first(&rec->holes);
+ fprintf(stderr, "Found file extent holes:\n");
+ while (node) {
+ found = 1;
+ hole = rb_entry(node, struct file_extent_hole, node);
+ fprintf(stderr, "\tstart: %llu, len: %llu\n",
+ hole->start, hole->len);
+ node = rb_next(node);
+ }
+ if (!found)
+ fprintf(stderr, "\tstart: 0, len: %llu\n",
+ round_up(rec->isize, root->sectorsize));
+ }
+}
+
+static void print_ref_error(int errors)
+{
+ if (errors & REF_ERR_NO_DIR_ITEM)
+ fprintf(stderr, ", no dir item");
+ if (errors & REF_ERR_NO_DIR_INDEX)
+ fprintf(stderr, ", no dir index");
+ if (errors & REF_ERR_NO_INODE_REF)
+ fprintf(stderr, ", no inode ref");
+ if (errors & REF_ERR_DUP_DIR_ITEM)
+ fprintf(stderr, ", dup dir item");
+ if (errors & REF_ERR_DUP_DIR_INDEX)
+ fprintf(stderr, ", dup dir index");
+ if (errors & REF_ERR_DUP_INODE_REF)
+ fprintf(stderr, ", dup inode ref");
+ if (errors & REF_ERR_INDEX_UNMATCH)
+ fprintf(stderr, ", index unmatch");
+ if (errors & REF_ERR_FILETYPE_UNMATCH)
+ fprintf(stderr, ", filetype unmatch");
+ if (errors & REF_ERR_NAME_TOO_LONG)
+ fprintf(stderr, ", name too long");
+ if (errors & REF_ERR_NO_ROOT_REF)
+ fprintf(stderr, ", no root ref");
+ if (errors & REF_ERR_NO_ROOT_BACKREF)
+ fprintf(stderr, ", no root backref");
+ if (errors & REF_ERR_DUP_ROOT_REF)
+ fprintf(stderr, ", dup root ref");
+ if (errors & REF_ERR_DUP_ROOT_BACKREF)
+ fprintf(stderr, ", dup root backref");
+ fprintf(stderr, "\n");
+}
+
+static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
+ u64 ino, int mod)
+{
+ struct ptr_node *node;
+ struct cache_extent *cache;
+ struct inode_record *rec = NULL;
+ int ret;
+
+ cache = lookup_cache_extent(inode_cache, ino, 1);
+ if (cache) {
+ node = container_of(cache, struct ptr_node, cache);
+ rec = node->data;
+ if (mod && rec->refs > 1) {
+ node->data = clone_inode_rec(rec);
+ if (IS_ERR(node->data))
+ return node->data;
+ rec->refs--;
+ rec = node->data;
+ }
+ } else if (mod) {
+ rec = calloc(1, sizeof(*rec));
+ if (!rec)
+ return ERR_PTR(-ENOMEM);
+ rec->ino = ino;
+ rec->extent_start = (u64)-1;
+ rec->refs = 1;
+ INIT_LIST_HEAD(&rec->backrefs);
+ INIT_LIST_HEAD(&rec->orphan_extents);
+ rec->holes = RB_ROOT;
+
+ node = malloc(sizeof(*node));
+ if (!node) {
+ free(rec);
+ return ERR_PTR(-ENOMEM);
+ }
+ node->cache.start = ino;
+ node->cache.size = 1;
+ node->data = rec;
+
+ if (ino == BTRFS_FREE_INO_OBJECTID)
+ rec->found_link = 1;
+
+ ret = insert_cache_extent(inode_cache, &node->cache);
+ if (ret)
+ return ERR_PTR(-EEXIST);
+ }
+ return rec;
+}
+
+static void free_orphan_data_extents(struct list_head *orphan_extents)
+{
+ struct orphan_data_extent *orphan;
+
+ while (!list_empty(orphan_extents)) {
+ orphan = list_entry(orphan_extents->next,
+ struct orphan_data_extent, list);
+ list_del(&orphan->list);
+ free(orphan);
+ }
+}
+
+static void free_inode_rec(struct inode_record *rec)
+{
+ struct inode_backref *backref;
+
+ if (--rec->refs > 0)
+ return;
+
+ while (!list_empty(&rec->backrefs)) {
+ backref = list_entry(rec->backrefs.next,
+ struct inode_backref, list);
+ list_del(&backref->list);
+ free(backref);
+ }
+ free_orphan_data_extents(&rec->orphan_extents);
+ free_file_extent_holes(&rec->holes);
+ free(rec);
+}
+
+static int can_free_inode_rec(struct inode_record *rec)
+{
+ if (!rec->errors && rec->checked && rec->found_inode_item &&
+ rec->nlink == rec->found_link && list_empty(&rec->backrefs))
+ return 1;
+ return 0;
+}
+
+static void maybe_free_inode_rec(struct cache_tree *inode_cache,
+ struct inode_record *rec)
+{
+ struct cache_extent *cache;
+ struct inode_backref *tmp, *backref;
+ struct ptr_node *node;
+ unsigned char filetype;
+
+ if (!rec->found_inode_item)
+ return;
+
+ filetype = imode_to_type(rec->imode);
+ list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
+ if (backref->found_dir_item && backref->found_dir_index) {
+ if (backref->filetype != filetype)
+ backref->errors |= REF_ERR_FILETYPE_UNMATCH;
+ if (!backref->errors && backref->found_inode_ref &&
+ rec->nlink == rec->found_link) {
+ list_del(&backref->list);
+ free(backref);
+ }
+ }
+ }
+
+ if (!rec->checked || rec->merging)
+ return;
+
+ if (S_ISDIR(rec->imode)) {
+ if (rec->found_size != rec->isize)
+ rec->errors |= I_ERR_DIR_ISIZE_WRONG;
+ if (rec->found_file_extent)
+ rec->errors |= I_ERR_ODD_FILE_EXTENT;
+ } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
+ if (rec->found_dir_item)
+ rec->errors |= I_ERR_ODD_DIR_ITEM;
+ if (rec->found_size != rec->nbytes)
+ rec->errors |= I_ERR_FILE_NBYTES_WRONG;
+ if (rec->nlink > 0 && !no_holes &&
+ (rec->extent_end < rec->isize ||
+ first_extent_gap(&rec->holes) < rec->isize))
+ rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
+ }
+
+ if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
+ if (rec->found_csum_item && rec->nodatasum)
+ rec->errors |= I_ERR_ODD_CSUM_ITEM;
+ if (rec->some_csum_missing && !rec->nodatasum)
+ rec->errors |= I_ERR_SOME_CSUM_MISSING;
+ }
+
+ BUG_ON(rec->refs != 1);
+ if (can_free_inode_rec(rec)) {
+ cache = lookup_cache_extent(inode_cache, rec->ino, 1);
+ node = container_of(cache, struct ptr_node, cache);
+ BUG_ON(node->data != rec);
+ remove_cache_extent(inode_cache, &node->cache);
+ free(node);
+ free_inode_rec(rec);
+ }
+}
+
+static int check_orphan_item(struct btrfs_root *root, u64 ino)
+{
+ struct btrfs_path path;
+ struct btrfs_key key;
+ int ret;
+
+ key.objectid = BTRFS_ORPHAN_OBJECTID;
+ key.type = BTRFS_ORPHAN_ITEM_KEY;
+ key.offset = ino;
+
+ btrfs_init_path(&path);
+ ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+ btrfs_release_path(&path);
+ if (ret > 0)
+ ret = -ENOENT;
+ return ret;
+}
+
+static int process_inode_item(struct extent_buffer *eb,
+ int slot, struct btrfs_key *key,
+ struct shared_node *active_node)
+{
+ struct inode_record *rec;
+ struct btrfs_inode_item *item;
+
+ rec = active_node->current;
+ BUG_ON(rec->ino != key->objectid || rec->refs > 1);
+ if (rec->found_inode_item) {
+ rec->errors |= I_ERR_DUP_INODE_ITEM;
+ return 1;
+ }
+ item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
+ rec->nlink = btrfs_inode_nlink(eb, item);
+ rec->isize = btrfs_inode_size(eb, item);
+ rec->nbytes = btrfs_inode_nbytes(eb, item);
+ rec->imode = btrfs_inode_mode(eb, item);
+ if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
+ rec->nodatasum = 1;
+ rec->found_inode_item = 1;
+ if (rec->nlink == 0)
+ rec->errors |= I_ERR_NO_ORPHAN_ITEM;
+ maybe_free_inode_rec(&active_node->inode_cache, rec);
+ return 0;
+}
+
+static struct inode_backref *get_inode_backref(struct inode_record *rec,
+ const char *name,
+ int namelen, u64 dir)
+{
+ struct inode_backref *backref;
+
+ list_for_each_entry(backref, &rec->backrefs, list) {
+ if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
+ break;
+ if (backref->dir != dir || backref->namelen != namelen)
+ continue;
+ if (memcmp(name, backref->name, namelen))
+ continue;
+ return backref;
+ }
+
+ backref = malloc(sizeof(*backref) + namelen + 1);
+ if (!backref)
+ return NULL;
+ memset(backref, 0, sizeof(*backref));
+ backref->dir = dir;
+ backref->namelen = namelen;
+ memcpy(backref->name, name, namelen);
+ backref->name[namelen] = '\0';
+ list_add_tail(&backref->list, &rec->backrefs);
+ return backref;
+}
+
+static int add_inode_backref(struct cache_tree *inode_cache,
+ u64 ino, u64 dir, u64 index,
+ const char *name, int namelen,
+ int filetype, int itemtype, int errors)
+{
+ struct inode_record *rec;
+ struct inode_backref *backref;
+
+ rec = get_inode_rec(inode_cache, ino, 1);
+ BUG_ON(IS_ERR(rec));
+ backref = get_inode_backref(rec, name, namelen, dir);
+ BUG_ON(!backref);
+ if (errors)
+ backref->errors |= errors;
+ if (itemtype == BTRFS_DIR_INDEX_KEY) {
+ if (backref->found_dir_index)
+ backref->errors |= REF_ERR_DUP_DIR_INDEX;
+ if (backref->found_inode_ref && backref->index != index)
+ backref->errors |= REF_ERR_INDEX_UNMATCH;
+ if (backref->found_dir_item && backref->filetype != filetype)
+ backref->errors |= REF_ERR_FILETYPE_UNMATCH;
+
+ backref->index = index;
+ backref->filetype = filetype;
+ backref->found_dir_index = 1;
+ } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
+ rec->found_link++;
+ if (backref->found_dir_item)
+ backref->errors |= REF_ERR_DUP_DIR_ITEM;
+ if (backref->found_dir_index && backref->filetype != filetype)
+ backref->errors |= REF_ERR_FILETYPE_UNMATCH;
+
+ backref->filetype = filetype;
+ backref->found_dir_item = 1;
+ } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
+ (itemtype == BTRFS_INODE_EXTREF_KEY)) {
+ if (backref->found_inode_ref)
+ backref->errors |= REF_ERR_DUP_INODE_REF;
+ if (backref->found_dir_index && backref->index != index)
+ backref->errors |= REF_ERR_INDEX_UNMATCH;
+ else
+ backref->index = index;
+
+ backref->ref_type = itemtype;
+ backref->found_inode_ref = 1;
+ } else {
+ BUG_ON(1);
+ }
+
+ maybe_free_inode_rec(inode_cache, rec);
+ return 0;
+}
+
+static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
+ struct cache_tree *dst_cache)
+{
+ struct inode_backref *backref;
+ u32 dir_count = 0;
+ int ret = 0;
+
+ dst->merging = 1;
+ list_for_each_entry(backref, &src->backrefs, list) {
+ if (backref->found_dir_index) {
+ add_inode_backref(dst_cache, dst->ino, backref->dir,
+ backref->index, backref->name,
+ backref->namelen, backref->filetype,
+ BTRFS_DIR_INDEX_KEY, backref->errors);
+ }
+ if (backref->found_dir_item) {
+ dir_count++;
+ add_inode_backref(dst_cache, dst->ino,
+ backref->dir, 0, backref->name,
+ backref->namelen, backref->filetype,
+ BTRFS_DIR_ITEM_KEY, backref->errors);
+ }
+ if (backref->found_inode_ref) {
+ add_inode_backref(dst_cache, dst->ino,
+ backref->dir, backref->index,
+ backref->name, backref->namelen, 0,
+ backref->ref_type, backref->errors);
+ }
+ }
+
+ if (src->found_dir_item)
+ dst->found_dir_item = 1;
+ if (src->found_file_extent)
+ dst->found_file_extent = 1;
+ if (src->found_csum_item)
+ dst->found_csum_item = 1;
+ if (src->some_csum_missing)
+ dst->some_csum_missing = 1;
+ if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
+ ret = copy_file_extent_holes(&dst->holes, &src->holes);
+ if (ret < 0)
+ return ret;
+ }
+
+ BUG_ON(src->found_link < dir_count);
+ dst->found_link += src->found_link - dir_count;
+ dst->found_size += src->found_size;
+ if (src->extent_start != (u64)-1) {
+ if (dst->extent_start == (u64)-1) {
+ dst->extent_start = src->extent_start;
+ dst->extent_end = src->extent_end;
+ } else {
+ if (dst->extent_end > src->extent_start)
+ dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
+ else if (dst->extent_end < src->extent_start) {
+ ret = add_file_extent_hole(&dst->holes,
+ dst->extent_end,
+ src->extent_start - dst->extent_end);
+ }
+ if (dst->extent_end < src->extent_end)
+ dst->extent_end = src->extent_end;
+ }
+ }
+
+ dst->errors |= src->errors;
+ if (src->found_inode_item) {
+ if (!dst->found_inode_item) {
+ dst->nlink = src->nlink;
+ dst->isize = src->isize;
+ dst->nbytes = src->nbytes;
+ dst->imode = src->imode;
+ dst->nodatasum = src->nodatasum;
+ dst->found_inode_item = 1;
+ } else {
+ dst->errors |= I_ERR_DUP_INODE_ITEM;
+ }
+ }
+ dst->merging = 0;
+
+ return 0;
+}
+
+static int splice_shared_node(struct shared_node *src_node,
+ struct shared_node *dst_node)
+{
+ struct cache_extent *cache;
+ struct ptr_node *node, *ins;
+ struct cache_tree *src, *dst;
+ struct inode_record *rec, *conflict;
+ u64 current_ino = 0;
+ int splice = 0;
+ int ret;
+
+ if (--src_node->refs == 0)
+ splice = 1;
+ if (src_node->current)
+ current_ino = src_node->current->ino;
+
+ src = &src_node->root_cache;
+ dst = &dst_node->root_cache;
+again:
+ cache = search_cache_extent(src, 0);
+ while (cache) {
+ node = container_of(cache, struct ptr_node, cache);
+ rec = node->data;
+ cache = next_cache_extent(cache);
+
+ if (splice) {
+ remove_cache_extent(src, &node->cache);
+ ins = node;
+ } else {
+ ins = malloc(sizeof(*ins));
+ BUG_ON(!ins);
+ ins->cache.start = node->cache.start;
+ ins->cache.size = node->cache.size;
+ ins->data = rec;
+ rec->refs++;
+ }
+ ret = insert_cache_extent(dst, &ins->cache);
+ if (ret == -EEXIST) {
+ conflict = get_inode_rec(dst, rec->ino, 1);
+ BUG_ON(IS_ERR(conflict));
+ merge_inode_recs(rec, conflict, dst);
+ if (rec->checked) {
+ conflict->checked = 1;
+ if (dst_node->current == conflict)
+ dst_node->current = NULL;
+ }
+ maybe_free_inode_rec(dst, conflict);
+ free_inode_rec(rec);
+ free(ins);
+ } else {
+ BUG_ON(ret);
+ }
+ }
+
+ if (src == &src_node->root_cache) {
+ src = &src_node->inode_cache;
+ dst = &dst_node->inode_cache;
+ goto again;
+ }
+
+ if (current_ino > 0 && (!dst_node->current ||
+ current_ino > dst_node->current->ino)) {
+ if (dst_node->current) {
+ dst_node->current->checked = 1;
+ maybe_free_inode_rec(dst, dst_node->current);
+ }
+ dst_node->current = get_inode_rec(dst, current_ino, 1);
+ BUG_ON(IS_ERR(dst_node->current));
+ }
+ return 0;
+}
+
+static void free_inode_ptr(struct cache_extent *cache)
+{
+ struct ptr_node *node;
+ struct inode_record *rec;
+
+ node = container_of(cache, struct ptr_node, cache);
+ rec = node->data;
+ free_inode_rec(rec);
+ free(node);
+}
+
+FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
+
+static struct shared_node *find_shared_node(struct cache_tree *shared,
+ u64 bytenr)
+{
+ struct cache_extent *cache;
+ struct shared_node *node;
+
+ cache = lookup_cache_extent(shared, bytenr, 1);
+ if (cache) {
+ node = container_of(cache, struct shared_node, cache);
+ return node;
+ }
+ return NULL;
+}
+
+static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
+{
+ int ret;
+ struct shared_node *node;
+
+ node = calloc(1, sizeof(*node));
+ if (!node)
+ return -ENOMEM;
+ node->cache.start = bytenr;
+ node->cache.size = 1;
+ cache_tree_init(&node->root_cache);
+ cache_tree_init(&node->inode_cache);
+ node->refs = refs;
+
+ ret = insert_cache_extent(shared, &node->cache);
+
+ return ret;
+}
+
+static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
+ struct walk_control *wc, int level)
+{
+ struct shared_node *node;
+ struct shared_node *dest;
+ int ret;
+
+ if (level == wc->active_node)
+ return 0;
+
+ BUG_ON(wc->active_node <= level);
+ node = find_shared_node(&wc->shared, bytenr);
+ if (!node) {
+ ret = add_shared_node(&wc->shared, bytenr, refs);
+ BUG_ON(ret);
+ node = find_shared_node(&wc->shared, bytenr);
+ wc->nodes[level] = node;
+ wc->active_node = level;
+ return 0;
+ }
+
+ if (wc->root_level == wc->active_node &&
+ btrfs_root_refs(&root->root_item) == 0) {
+ if (--node->refs == 0) {
+ free_inode_recs_tree(&node->root_cache);
+ free_inode_recs_tree(&node->inode_cache);
+ remove_cache_extent(&wc->shared, &node->cache);
+ free(node);
+ }
+ return 1;
+ }
+
+ dest = wc->nodes[wc->active_node];
+ splice_shared_node(node, dest);
+ if (node->refs == 0) {
+ remove_cache_extent(&wc->shared, &node->cache);
+ free(node);
+ }
+ return 1;
+}
+
+static int leave_shared_node(struct btrfs_root *root,
+ struct walk_control *wc, int level)
+{
+ struct shared_node *node;
+ struct shared_node *dest;
+ int i;
+
+ if (level == wc->root_level)
+ return 0;
+
+ for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
+ if (wc->nodes[i])
+ break;
+ }
+ BUG_ON(i >= BTRFS_MAX_LEVEL);
+
+ node = wc->nodes[wc->active_node];
+ wc->nodes[wc->active_node] = NULL;
+ wc->active_node = i;
+
+ dest = wc->nodes[wc->active_node];
+ if (wc->active_node < wc->root_level ||
+ btrfs_root_refs(&root->root_item) > 0) {
+ BUG_ON(node->refs <= 1);
+ splice_shared_node(node, dest);
+ } else {
+ BUG_ON(node->refs < 2);
+ node->refs--;
+ }
+ return 0;
+}
+
+/*
+ * Returns:
+ * < 0 - on error
+ * 1 - if the root with id child_root_id is a child of root parent_root_id
+ * 0 - if the root child_root_id isn't a child of the root parent_root_id but
+ * has other root(s) as parent(s)
+ * 2 - if the root child_root_id doesn't have any parent roots
+ */
+static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
+ u64 child_root_id)
+{
+ struct btrfs_path path;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ int has_parent = 0;
+ int ret;
+
+ btrfs_init_path(&path);
+
+ key.objectid = parent_root_id;
+ key.type = BTRFS_ROOT_REF_KEY;
+ key.offset = child_root_id;
+ ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
+ 0, 0);
+ if (ret < 0)
+ return ret;
+ btrfs_release_path(&path);
+ if (!ret)
+ return 1;
+
+ key.objectid = child_root_id;
+ key.type = BTRFS_ROOT_BACKREF_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
+ 0, 0);
+ if (ret < 0)
+ goto out;
+
+ while (1) {
+ leaf = path.nodes[0];
+ if (path.slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
+ if (ret)
+ break;
+ leaf = path.nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+ if (key.objectid != child_root_id ||
+ key.type != BTRFS_ROOT_BACKREF_KEY)
+ break;
+
+ has_parent = 1;
+
+ if (key.offset == parent_root_id) {
+ btrfs_release_path(&path);
+ return 1;
+ }
+
+ path.slots[0]++;
+ }
+out:
+ btrfs_release_path(&path);
+ if (ret < 0)
+ return ret;
+ return has_parent ? 0 : 2;
+}
+
+static int process_dir_item(struct btrfs_root *root,
+ struct extent_buffer *eb,
+ int slot, struct btrfs_key *key,
+ struct shared_node *active_node)
+{
+ u32 total;
+ u32 cur = 0;
+ u32 len;
+ u32 name_len;
+ u32 data_len;
+ int error;
+ int nritems = 0;
+ int filetype;
+ struct btrfs_dir_item *di;
+ struct inode_record *rec;
+ struct cache_tree *root_cache;
+ struct cache_tree *inode_cache;
+ struct btrfs_key location;
+ char namebuf[BTRFS_NAME_LEN];
+
+ root_cache = &active_node->root_cache;
+ inode_cache = &active_node->inode_cache;
+ rec = active_node->current;
+ rec->found_dir_item = 1;
+
+ di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
+ total = btrfs_item_size_nr(eb, slot);
+ while (cur < total) {
+ nritems++;
+ btrfs_dir_item_key_to_cpu(eb, di, &location);
+ name_len = btrfs_dir_name_len(eb, di);
+ data_len = btrfs_dir_data_len(eb, di);
+ filetype = btrfs_dir_type(eb, di);
+
+ rec->found_size += name_len;
+ if (name_len <= BTRFS_NAME_LEN) {
+ len = name_len;
+ error = 0;
+ } else {
+ len = BTRFS_NAME_LEN;
+ error = REF_ERR_NAME_TOO_LONG;
+ }
+ read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
+
+ if (location.type == BTRFS_INODE_ITEM_KEY) {
+ add_inode_backref(inode_cache, location.objectid,
+ key->objectid, key->offset, namebuf,
+ len, filetype, key->type, error);
+ } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
+ add_inode_backref(root_cache, location.objectid,
+ key->objectid, key->offset,
+ namebuf, len, filetype,
+ key->type, error);
+ } else {
+ fprintf(stderr, "invalid location in dir item %u\n",
+ location.type);
+ add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
+ key->objectid, key->offset, namebuf,
+ len, filetype, key->type, error);
+ }
+
+ len = sizeof(*di) + name_len + data_len;
+ di = (struct btrfs_dir_item *)((char *)di + len);
+ cur += len;
+ }
+ if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
+ rec->errors |= I_ERR_DUP_DIR_INDEX;
+
+ return 0;
+}
+
+static int process_inode_ref(struct extent_buffer *eb,
+ int slot, struct btrfs_key *key,
+ struct shared_node *active_node)
+{
+ u32 total;
+ u32 cur = 0;
+ u32 len;
+ u32 name_len;
+ u64 index;
+ int error;
+ struct cache_tree *inode_cache;
+ struct btrfs_inode_ref *ref;
+ char namebuf[BTRFS_NAME_LEN];
+
+ inode_cache = &active_node->inode_cache;
+
+ ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
+ total = btrfs_item_size_nr(eb, slot);
+ while (cur < total) {
+ name_len = btrfs_inode_ref_name_len(eb, ref);
+ index = btrfs_inode_ref_index(eb, ref);
+ if (name_len <= BTRFS_NAME_LEN) {
+ len = name_len;
+ error = 0;
+ } else {
+ len = BTRFS_NAME_LEN;
+ error = REF_ERR_NAME_TOO_LONG;
+ }
+ read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
+ add_inode_backref(inode_cache, key->objectid, key->offset,
+ index, namebuf, len, 0, key->type, error);
+
+ len = sizeof(*ref) + name_len;
+ ref = (struct btrfs_inode_ref *)((char *)ref + len);
+ cur += len;
+ }
+ return 0;
+}
+
+static int process_inode_extref(struct extent_buffer *eb,
+ int slot, struct btrfs_key *key,
+ struct shared_node *active_node)
+{
+ u32 total;
+ u32 cur = 0;
+ u32 len;
+ u32 name_len;
+ u64 index;
+ u64 parent;
+ int error;
+ struct cache_tree *inode_cache;
+ struct btrfs_inode_extref *extref;
+ char namebuf[BTRFS_NAME_LEN];
+
+ inode_cache = &active_node->inode_cache;
+
+ extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
+ total = btrfs_item_size_nr(eb, slot);
+ while (cur < total) {
+ name_len = btrfs_inode_extref_name_len(eb, extref);
+ index = btrfs_inode_extref_index(eb, extref);
+ parent = btrfs_inode_extref_parent(eb, extref);
+ if (name_len <= BTRFS_NAME_LEN) {
+ len = name_len;
+ error = 0;
+ } else {
+ len = BTRFS_NAME_LEN;
+ error = REF_ERR_NAME_TOO_LONG;
+ }
+ read_extent_buffer(eb, namebuf,
+ (unsigned long)(extref + 1), len);
+ add_inode_backref(inode_cache, key->objectid, parent,
+ index, namebuf, len, 0, key->type, error);
+
+ len = sizeof(*extref) + name_len;
+ extref = (struct btrfs_inode_extref *)((char *)extref + len);
+ cur += len;
+ }
+ return 0;
+
+}
+
+static int count_csum_range(struct btrfs_root *root, u64 start,
+ u64 len, u64 *found)
+{
+ struct btrfs_key key;
+ struct btrfs_path path;
+ struct extent_buffer *leaf;
+ int ret;
+ size_t size;
+ *found = 0;
+ u64 csum_end;
+ u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+
+ btrfs_init_path(&path);
+
+ key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
+ key.offset = start;
+ key.type = BTRFS_EXTENT_CSUM_KEY;
+
+ ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
+ &key, &path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (ret > 0 && path.slots[0] > 0) {
+ leaf = path.nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
+ if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
+ key.type == BTRFS_EXTENT_CSUM_KEY)
+ path.slots[0]--;
+ }
+
+ while (len > 0) {
+ leaf = path.nodes[0];
+ if (path.slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
+ if (ret > 0)
+ break;
+ else if (ret < 0)
+ goto out;
+ leaf = path.nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+ if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
+ key.type != BTRFS_EXTENT_CSUM_KEY)
+ break;
+
+ btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+ if (key.offset >= start + len)
+ break;
+
+ if (key.offset > start)
+ start = key.offset;
+
+ size = btrfs_item_size_nr(leaf, path.slots[0]);
+ csum_end = key.offset + (size / csum_size) * root->sectorsize;
+ if (csum_end > start) {
+ size = min(csum_end - start, len);
+ len -= size;
+ start += size;
+ *found += size;
+ }
+
+ path.slots[0]++;
+ }
+out:
+ btrfs_release_path(&path);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+static int process_file_extent(struct btrfs_root *root,
+ struct extent_buffer *eb,
+ int slot, struct btrfs_key *key,
+ struct shared_node *active_node)
+{
+ struct inode_record *rec;
+ struct btrfs_file_extent_item *fi;
+ u64 num_bytes = 0;
+ u64 disk_bytenr = 0;
+ u64 extent_offset = 0;
+ u64 mask = root->sectorsize - 1;
+ int extent_type;
+ int ret;
+
+ rec = active_node->current;
+ BUG_ON(rec->ino != key->objectid || rec->refs > 1);
+ rec->found_file_extent = 1;
+
+ if (rec->extent_start == (u64)-1) {
+ rec->extent_start = key->offset;
+ rec->extent_end = key->offset;
+ }
+
+ if (rec->extent_end > key->offset)
+ rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
+ else if (rec->extent_end < key->offset) {
+ ret = add_file_extent_hole(&rec->holes, rec->extent_end,
+ key->offset - rec->extent_end);
+ if (ret < 0)
+ return ret;
+ }
+
+ fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
+ extent_type = btrfs_file_extent_type(eb, fi);
+
+ if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+ num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
+ if (num_bytes == 0)
+ rec->errors |= I_ERR_BAD_FILE_EXTENT;
+ rec->found_size += num_bytes;
+ num_bytes = (num_bytes + mask) & ~mask;
+ } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
+ extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ num_bytes = btrfs_file_extent_num_bytes(eb, fi);
+ disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
+ extent_offset = btrfs_file_extent_offset(eb, fi);
+ if (num_bytes == 0 || (num_bytes & mask))
+ rec->errors |= I_ERR_BAD_FILE_EXTENT;
+ if (num_bytes + extent_offset >
+ btrfs_file_extent_ram_bytes(eb, fi))
+ rec->errors |= I_ERR_BAD_FILE_EXTENT;
+ if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
+ (btrfs_file_extent_compression(eb, fi) ||
+ btrfs_file_extent_encryption(eb, fi) ||
+ btrfs_file_extent_other_encoding(eb, fi)))
+ rec->errors |= I_ERR_BAD_FILE_EXTENT;
+ if (disk_bytenr > 0)
+ rec->found_size += num_bytes;
+ } else {
+ rec->errors |= I_ERR_BAD_FILE_EXTENT;
+ }
+ rec->extent_end = key->offset + num_bytes;
+
+ /*
+ * The data reloc tree will copy full extents into its inode and then
+ * copy the corresponding csums. Because the extent it copied could be
+ * a preallocated extent that hasn't been written to yet there may be no
+ * csums to copy, ergo we won't have csums for our file extent. This is
+ * ok so just don't bother checking csums if the inode belongs to the
+ * data reloc tree.
+ */
+ if (disk_bytenr > 0 &&
+ btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
+ u64 found;
+ if (btrfs_file_extent_compression(eb, fi))
+ num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
+ else
+ disk_bytenr += extent_offset;
+
+ ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
+ if (ret < 0)
+ return ret;
+ if (extent_type == BTRFS_FILE_EXTENT_REG) {
+ if (found > 0)
+ rec->found_csum_item = 1;
+ if (found < num_bytes)
+ rec->some_csum_missing = 1;
+ } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ if (found > 0)
+ rec->errors |= I_ERR_ODD_CSUM_ITEM;
+ }
+ }
+ return 0;
+}
+
+static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
+ struct walk_control *wc)
+{
+ struct btrfs_key key;
+ u32 nritems;
+ int i;
+ int ret = 0;
+ struct cache_tree *inode_cache;
+ struct shared_node *active_node;
+
+ if (wc->root_level == wc->active_node &&
+ btrfs_root_refs(&root->root_item) == 0)
+ return 0;
+
+ active_node = wc->nodes[wc->active_node];
+ inode_cache = &active_node->inode_cache;
+ nritems = btrfs_header_nritems(eb);
+ for (i = 0; i < nritems; i++) {
+ btrfs_item_key_to_cpu(eb, &key, i);
+
+ if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
+ continue;
+ if (key.type == BTRFS_ORPHAN_ITEM_KEY)
+ continue;
+
+ if (active_node->current == NULL ||
+ active_node->current->ino < key.objectid) {
+ if (active_node->current) {
+ active_node->current->checked = 1;
+ maybe_free_inode_rec(inode_cache,
+ active_node->current);
+ }
+ active_node->current = get_inode_rec(inode_cache,
+ key.objectid, 1);
+ BUG_ON(IS_ERR(active_node->current));
+ }
+ switch (key.type) {
+ case BTRFS_DIR_ITEM_KEY:
+ case BTRFS_DIR_INDEX_KEY:
+ ret = process_dir_item(root, eb, i, &key, active_node);
+ break;
+ case BTRFS_INODE_REF_KEY:
+ ret = process_inode_ref(eb, i, &key, active_node);
+ break;
+ case BTRFS_INODE_EXTREF_KEY:
+ ret = process_inode_extref(eb, i, &key, active_node);
+ break;
+ case BTRFS_INODE_ITEM_KEY:
+ ret = process_inode_item(eb, i, &key, active_node);
+ break;
+ case BTRFS_EXTENT_DATA_KEY:
+ ret = process_file_extent(root, eb, i, &key,
+ active_node);
+ break;
+ default:
+ break;
+ };
+ }
+ return ret;
+}
+
+static void reada_walk_down(struct btrfs_root *root,
+ struct extent_buffer *node, int slot)
+{
+ u64 bytenr;
+ u64 ptr_gen;
+ u32 nritems;
+ u32 blocksize;
+ int i;
+ int level;
+
+ level = btrfs_header_level(node);
+ if (level != 1)
+ return;
+
+ nritems = btrfs_header_nritems(node);
+ blocksize = btrfs_level_size(root, level - 1);
+ for (i = slot; i < nritems; i++) {
+ bytenr = btrfs_node_blockptr(node, i);
+ ptr_gen = btrfs_node_ptr_generation(node, i);
+ readahead_tree_block(root, bytenr, blocksize, ptr_gen);
+ }
+}
+
+/*
+ * Check the child node/leaf by the following condition:
+ * 1. the first item key of the node/leaf should be the same with the one
+ * in parent.
+ * 2. block in parent node should match the child node/leaf.
+ * 3. generation of parent node and child's header should be consistent.
+ *
+ * Or the child node/leaf pointed by the key in parent is not valid.
+ *
+ * We hope to check leaf owner too, but since subvol may share leaves,
+ * which makes leaf owner check not so strong, key check should be
+ * sufficient enough for that case.
+ */
+static int check_child_node(struct btrfs_root *root,
+ struct extent_buffer *parent, int slot,
+ struct extent_buffer *child)
+{
+ struct btrfs_key parent_key;
+ struct btrfs_key child_key;
+ int ret = 0;
+
+ btrfs_node_key_to_cpu(parent, &parent_key, slot);
+ if (btrfs_header_level(child) == 0)
+ btrfs_item_key_to_cpu(child, &child_key, 0);
+ else
+ btrfs_node_key_to_cpu(child, &child_key, 0);
+
+ if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
+ ret = -EINVAL;
+ fprintf(stderr,
+ "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
+ parent_key.objectid, parent_key.type, parent_key.offset,
+ child_key.objectid, child_key.type, child_key.offset);
+ }
+ if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
+ ret = -EINVAL;
+ fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
+ btrfs_node_blockptr(parent, slot),
+ btrfs_header_bytenr(child));
+ }
+ if (btrfs_node_ptr_generation(parent, slot) !=
+ btrfs_header_generation(child)) {
+ ret = -EINVAL;
+ fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
+ btrfs_header_generation(child),
+ btrfs_node_ptr_generation(parent, slot));
+ }
+ return ret;
+}
+
+static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
+ struct walk_control *wc, int *level)
+{
+ enum btrfs_tree_block_status status;
+ u64 bytenr;
+ u64 ptr_gen;
+ struct extent_buffer *next;
+ struct extent_buffer *cur;
+ u32 blocksize;
+ int ret, err = 0;
+ u64 refs;
+
+ WARN_ON(*level < 0);
+ WARN_ON(*level >= BTRFS_MAX_LEVEL);
+ ret = btrfs_lookup_extent_info(NULL, root,
+ path->nodes[*level]->start,
+ *level, 1, &refs, NULL);
+ if (ret < 0) {
+ err = ret;
+ goto out;
+ }
+
+ if (refs > 1) {
+ ret = enter_shared_node(root, path->nodes[*level]->start,
+ refs, wc, *level);
+ if (ret > 0) {
+ err = ret;
+ goto out;
+ }
+ }
+
+ while (*level >= 0) {
+ WARN_ON(*level < 0);
+ WARN_ON(*level >= BTRFS_MAX_LEVEL);
+ cur = path->nodes[*level];
+
+ if (btrfs_header_level(cur) != *level)
+ WARN_ON(1);
+
+ if (path->slots[*level] >= btrfs_header_nritems(cur))
+ break;
+ if (*level == 0) {
+ ret = process_one_leaf(root, cur, wc);
+ if (ret < 0)
+ err = ret;
+ break;
+ }
+ bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
+ ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
+ blocksize = btrfs_level_size(root, *level - 1);
+ ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
+ 1, &refs, NULL);
+ if (ret < 0)
+ refs = 0;
+
+ if (refs > 1) {
+ ret = enter_shared_node(root, bytenr, refs,
+ wc, *level - 1);
+ if (ret > 0) {
+ path->slots[*level]++;
+ continue;
+ }
+ }
+
+ next = btrfs_find_tree_block(root, bytenr, blocksize);
+ if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
+ free_extent_buffer(next);
+ reada_walk_down(root, cur, path->slots[*level]);
+ next = read_tree_block(root, bytenr, blocksize,
+ ptr_gen);
+ if (!extent_buffer_uptodate(next)) {
+ struct btrfs_key node_key;
+
+ btrfs_node_key_to_cpu(path->nodes[*level],
+ &node_key,
+ path->slots[*level]);
+ btrfs_add_corrupt_extent_record(root->fs_info,
+ &node_key,
+ path->nodes[*level]->start,
+ root->leafsize, *level);
+ err = -EIO;
+ goto out;
+ }
+ }
+
+ ret = check_child_node(root, cur, path->slots[*level], next);
+ if (ret) {
+ err = ret;
+ goto out;
+ }
+
+ if (btrfs_is_leaf(next))
+ status = btrfs_check_leaf(root, NULL, next);
+ else
+ status = btrfs_check_node(root, NULL, next);
+ if (status != BTRFS_TREE_BLOCK_CLEAN) {
+ free_extent_buffer(next);
+ err = -EIO;
+ goto out;
+ }
+
+ *level = *level - 1;
+ free_extent_buffer(path->nodes[*level]);
+ path->nodes[*level] = next;
+ path->slots[*level] = 0;
+ }
+out:
+ path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
+ return err;
+}
+
+static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
+ struct walk_control *wc, int *level)
+{
+ int i;
+ struct extent_buffer *leaf;
+
+ for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
+ leaf = path->nodes[i];
+ if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
+ path->slots[i]++;
+ *level = i;
+ return 0;
+ } else {
+ free_extent_buffer(path->nodes[*level]);
+ path->nodes[*level] = NULL;
+ BUG_ON(*level > wc->active_node);
+ if (*level == wc->active_node)
+ leave_shared_node(root, wc, *level);
+ *level = i + 1;
+ }
+ }
+ return 1;
+}
+
+static int check_root_dir(struct inode_record *rec)
+{
+ struct inode_backref *backref;
+ int ret = -1;
+
+ if (!rec->found_inode_item || rec->errors)
+ goto out;
+ if (rec->nlink != 1 || rec->found_link != 0)
+ goto out;
+ if (list_empty(&rec->backrefs))
+ goto out;
+ backref = list_entry(rec->backrefs.next, struct inode_backref, list);
+ if (!backref->found_inode_ref)
+ goto out;
+ if (backref->index != 0 || backref->namelen != 2 ||
+ memcmp(backref->name, "..", 2))
+ goto out;
+ if (backref->found_dir_index || backref->found_dir_item)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
+static int repair_inode_isize(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct inode_record *rec)
+{
+ struct btrfs_inode_item *ei;
+ struct btrfs_key key;
+ int ret;
+
+ key.objectid = rec->ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ if (!path->slots[0]) {
+ ret = -ENOENT;
+ goto out;
+ }
+ path->slots[0]--;
+ ret = 0;
+ }
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.objectid != rec->ino) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
+ printf("reset isize for dir %Lu root %Lu\n", rec->ino,
+ root->root_key.objectid);
+out:
+ btrfs_release_path(path);
+ return ret;
+}
+
+static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct inode_record *rec)
+{
+ int ret;
+
+ ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
+ btrfs_release_path(path);
+ if (!ret)
+ rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
+ return ret;
+}
+
+static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct inode_record *rec)
+{
+ struct btrfs_inode_item *ei;
+ struct btrfs_key key;
+ int ret = 0;
+
+ key.objectid = rec->ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret) {
+ if (ret > 0)
+ ret = -ENOENT;
+ goto out;
+ }
+
+ /* Since ret == 0, no need to check anything */
+ ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
+ printf("reset nbytes for ino %llu root %llu\n",
+ rec->ino, root->root_key.objectid);
+out:
+ btrfs_release_path(path);
+ return ret;
+}
+
+static int add_missing_dir_index(struct btrfs_root *root,
+ struct cache_tree *inode_cache,
+ struct inode_record *rec,
+ struct inode_backref *backref)
+{
+ struct btrfs_path *path;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_dir_item *dir_item;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ struct btrfs_disk_key disk_key;
+ struct inode_record *dir_rec;
+ unsigned long name_ptr;
+ u32 data_size = sizeof(*dir_item) + backref->namelen;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
+ fprintf(stderr, "repairing missing dir index item for inode %llu\n",
+ (unsigned long long)rec->ino);
+ key.objectid = backref->dir;
+ key.type = BTRFS_DIR_INDEX_KEY;
+ key.offset = backref->index;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
+ BUG_ON(ret);
+
+ leaf = path->nodes[0];
+ dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
+
+ disk_key.objectid = cpu_to_le64(rec->ino);
+ disk_key.type = BTRFS_INODE_ITEM_KEY;
+ disk_key.offset = 0;
+
+ btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
+ btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
+ btrfs_set_dir_data_len(leaf, dir_item, 0);
+ btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
+ name_ptr = (unsigned long)(dir_item + 1);
+ write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_free_path(path);
+ btrfs_commit_transaction(trans, root);
+
+ backref->found_dir_index = 1;
+ dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
+ BUG_ON(IS_ERR(dir_rec));
+ if (!dir_rec)
+ return 0;
+ dir_rec->found_size += backref->namelen;
+ if (dir_rec->found_size == dir_rec->isize &&
+ (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
+ dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
+ if (dir_rec->found_size != dir_rec->isize)
+ dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
+
+ return 0;
+}
+
+static int delete_dir_index(struct btrfs_root *root,
+ struct cache_tree *inode_cache,
+ struct inode_record *rec,
+ struct inode_backref *backref)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_dir_item *di;
+ struct btrfs_path *path;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
+
+ fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
+ (unsigned long long)backref->dir,
+ BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
+ (unsigned long long)root->objectid);
+
+ di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
+ backref->name, backref->namelen,
+ backref->index, -1);
+ if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
+ btrfs_free_path(path);
+ btrfs_commit_transaction(trans, root);
+ if (ret == -ENOENT)
+ return 0;
+ return ret;
+ }
+
+ if (!di)
+ ret = btrfs_del_item(trans, root, path);
+ else
+ ret = btrfs_delete_one_dir_name(trans, root, path, di);
+ BUG_ON(ret);
+ btrfs_free_path(path);
+ btrfs_commit_transaction(trans, root);
+ return ret;
+}
+
+static int create_inode_item(struct btrfs_root *root,
+ struct inode_record *rec,
+ struct inode_backref *backref, int root_dir)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_inode_item inode_item;
+ time_t now = time(NULL);
+ int ret;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ return ret;
+ }
+
+ fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
+ "be incomplete, please check permissions and content after "
+ "the fsck completes.\n", (unsigned long long)root->objectid,
+ (unsigned long long)rec->ino);
+
+ memset(&inode_item, 0, sizeof(inode_item));
+ btrfs_set_stack_inode_generation(&inode_item, trans->transid);
+ if (root_dir)
+ btrfs_set_stack_inode_nlink(&inode_item, 1);
+ else
+ btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
+ btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
+ if (rec->found_dir_item) {
+ if (rec->found_file_extent)
+ fprintf(stderr, "root %llu inode %llu has both a dir "
+ "item and extents, unsure if it is a dir or a "
+ "regular file so setting it as a directory\n",
+ (unsigned long long)root->objectid,
+ (unsigned long long)rec->ino);
+ btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
+ btrfs_set_stack_inode_size(&inode_item, rec->found_size);
+ } else if (!rec->found_dir_item) {
+ btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
+ btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
+ }
+ btrfs_set_stack_timespec_sec(&inode_item.atime, now);
+ btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
+ btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
+ btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
+ btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
+ btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
+ btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
+ btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
+
+ ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
+ BUG_ON(ret);
+ btrfs_commit_transaction(trans, root);
+ return 0;
+}
+
+static int repair_inode_backrefs(struct btrfs_root *root,
+ struct inode_record *rec,
+ struct cache_tree *inode_cache,
+ int delete)
+{
+ struct inode_backref *tmp, *backref;
+ u64 root_dirid = btrfs_root_dirid(&root->root_item);
+ int ret = 0;
+ int repaired = 0;
+
+ list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
+ if (!delete && rec->ino == root_dirid) {
+ if (!rec->found_inode_item) {
+ ret = create_inode_item(root, rec, backref, 1);
+ if (ret)
+ break;
+ repaired++;
+ }
+ }
+
+ /* Index 0 for root dir's are special, don't mess with it */
+ if (rec->ino == root_dirid && backref->index == 0)
+ continue;
+
+ if (delete &&
+ ((backref->found_dir_index && !backref->found_inode_ref) ||
+ (backref->found_dir_index && backref->found_inode_ref &&
+ (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
+ ret = delete_dir_index(root, inode_cache, rec, backref);
+ if (ret)
+ break;
+ repaired++;
+ list_del(&backref->list);
+ free(backref);
+ }
+
+ if (!delete && !backref->found_dir_index &&
+ backref->found_dir_item && backref->found_inode_ref) {
+ ret = add_missing_dir_index(root, inode_cache, rec,
+ backref);
+ if (ret)
+ break;
+ repaired++;
+ if (backref->found_dir_item &&
+ backref->found_dir_index &&
+ backref->found_dir_index) {
+ if (!backref->errors &&
+ backref->found_inode_ref) {
+ list_del(&backref->list);
+ free(backref);
+ }
+ }
+ }
+
+ if (!delete && (!backref->found_dir_index &&
+ !backref->found_dir_item &&
+ backref->found_inode_ref)) {
+ struct btrfs_trans_handle *trans;
+ struct btrfs_key location;
+
+ ret = check_dir_conflict(root, backref->name,
+ backref->namelen,
+ backref->dir,
+ backref->index);
+ if (ret) {
+ /*
+ * let nlink fixing routine to handle it,
+ * which can do it better.
+ */
+ ret = 0;
+ break;
+ }
+ location.objectid = rec->ino;
+ location.type = BTRFS_INODE_ITEM_KEY;
+ location.offset = 0;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
+ fprintf(stderr, "adding missing dir index/item pair "
+ "for inode %llu\n",
+ (unsigned long long)rec->ino);
+ ret = btrfs_insert_dir_item(trans, root, backref->name,
+ backref->namelen,
+ backref->dir, &location,
+ imode_to_type(rec->imode),
+ backref->index);
+ BUG_ON(ret);
+ btrfs_commit_transaction(trans, root);
+ repaired++;
+ }
+
+ if (!delete && (backref->found_inode_ref &&
+ backref->found_dir_index &&
+ backref->found_dir_item &&
+ !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
+ !rec->found_inode_item)) {
+ ret = create_inode_item(root, rec, backref, 0);
+ if (ret)
+ break;
+ repaired++;
+ }
+
+ }
+ return ret ? ret : repaired;
+}
+
+/*
+ * To determine the file type for nlink/inode_item repair
+ *
+ * Return 0 if file type is found and BTRFS_FT_* is stored into type.
+ * Return -ENOENT if file type is not found.
+ */
+static int find_file_type(struct inode_record *rec, u8 *type)
+{
+ struct inode_backref *backref;
+
+ /* For inode item recovered case */
+ if (rec->found_inode_item) {
+ *type = imode_to_type(rec->imode);
+ return 0;
+ }
+
+ list_for_each_entry(backref, &rec->backrefs, list) {
+ if (backref->found_dir_index || backref->found_dir_item) {
+ *type = backref->filetype;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+/*
+ * To determine the file name for nlink repair
+ *
+ * Return 0 if file name is found, set name and namelen.
+ * Return -ENOENT if file name is not found.
+ */
+static int find_file_name(struct inode_record *rec,
+ char *name, int *namelen)
+{
+ struct inode_backref *backref;
+
+ list_for_each_entry(backref, &rec->backrefs, list) {
+ if (backref->found_dir_index || backref->found_dir_item ||
+ backref->found_inode_ref) {
+ memcpy(name, backref->name, backref->namelen);
+ *namelen = backref->namelen;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+/* Reset the nlink of the inode to the correct one */
+static int reset_nlink(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct inode_record *rec)
+{
+ struct inode_backref *backref;
+ struct inode_backref *tmp;
+ struct btrfs_key key;
+ struct btrfs_inode_item *inode_item;
+ int ret = 0;
+
+ /* We don't believe this either, reset it and iterate backref */
+ rec->found_link = 0;
+
+ /* Remove all backref including the valid ones */
+ list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
+ ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
+ backref->index, backref->name,
+ backref->namelen, 0);
+ if (ret < 0)
+ goto out;
+
+ /* remove invalid backref, so it won't be added back */
+ if (!(backref->found_dir_index &&
+ backref->found_dir_item &&
+ backref->found_inode_ref)) {
+ list_del(&backref->list);
+ free(backref);
+ } else {
+ rec->found_link++;
+ }
+ }
+
+ /* Set nlink to 0 */
+ key.objectid = rec->ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+ inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ btrfs_release_path(path);
+
+ /*
+ * Add back valid inode_ref/dir_item/dir_index,
+ * add_link() will handle the nlink inc, so new nlink must be correct
+ */
+ list_for_each_entry(backref, &rec->backrefs, list) {
+ ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
+ backref->name, backref->namelen,
+ backref->filetype, &backref->index, 1);
+ if (ret < 0)
+ goto out;
+ }
+out:
+ btrfs_release_path(path);
+ return ret;
+}
+
+static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct inode_record *rec)
+{
+ char *dir_name = "lost+found";
+ char namebuf[BTRFS_NAME_LEN] = {0};
+ u64 lost_found_ino;
+ u32 mode = 0700;
+ u8 type = 0;
+ int namelen = 0;
+ int name_recovered = 0;
+ int type_recovered = 0;
+ int ret = 0;
+
+ /*
+ * Get file name and type first before these invalid inode ref
+ * are deleted by remove_all_invalid_backref()
+ */
+ name_recovered = !find_file_name(rec, namebuf, &namelen);
+ type_recovered = !find_file_type(rec, &type);
+
+ if (!name_recovered) {
+ printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
+ rec->ino, rec->ino);
+ namelen = count_digits(rec->ino);
+ sprintf(namebuf, "%llu", rec->ino);
+ name_recovered = 1;
+ }
+ if (!type_recovered) {
+ printf("Can't get file type for inode %llu, using FILE as fallback\n",
+ rec->ino);
+ type = BTRFS_FT_REG_FILE;
+ type_recovered = 1;
+ }
+
+ ret = reset_nlink(trans, root, path, rec);
+ if (ret < 0) {
+ fprintf(stderr,
+ "Failed to reset nlink for inode %llu: %s\n",
+ rec->ino, strerror(-ret));
+ goto out;
+ }
+
+ if (rec->found_link == 0) {
+ lost_found_ino = root->highest_inode;
+ if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
+ ret = -EOVERFLOW;
+ goto out;
+ }
+ lost_found_ino++;
+ ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
+ BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
+ mode);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to create '%s' dir: %s\n",
+ dir_name, strerror(-ret));
+ goto out;
+ }
+ ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
+ namebuf, namelen, type, NULL, 1);
+ /*
+ * Add ".INO" suffix several times to handle case where
+ * "FILENAME.INO" is already taken by another file.
+ */
+ while (ret == -EEXIST) {
+ /*
+ * Conflicting file name, add ".INO" as suffix * +1 for '.'
+ */
+ if (namelen + count_digits(rec->ino) + 1 >
+ BTRFS_NAME_LEN) {
+ ret = -EFBIG;
+ goto out;
+ }
+ snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
+ ".%llu", rec->ino);
+ namelen += count_digits(rec->ino) + 1;
+ ret = btrfs_add_link(trans, root, rec->ino,
+ lost_found_ino, namebuf,
+ namelen, type, NULL, 1);
+ }
+ if (ret < 0) {
+ fprintf(stderr,
+ "Failed to link the inode %llu to %s dir: %s\n",
+ rec->ino, dir_name, strerror(-ret));
+ goto out;
+ }
+ /*
+ * Just increase the found_link, don't actually add the
+ * backref. This will make things easier and this inode
+ * record will be freed after the repair is done.
+ * So fsck will not report problem about this inode.
+ */
+ rec->found_link++;
+ printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
+ namelen, namebuf, dir_name);
+ }
+ printf("Fixed the nlink of inode %llu\n", rec->ino);
+out:
+ /*
+ * Clear the flag anyway, or we will loop forever for the same inode
+ * as it will not be removed from the bad inode list and the dead loop
+ * happens.
+ */
+ rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
+ btrfs_release_path(path);
+ return ret;
+}
+
+/*
+ * Check if there is any normal(reg or prealloc) file extent for given
+ * ino.
+ * This is used to determine the file type when neither its dir_index/item or
+ * inode_item exists.
+ *
+ * This will *NOT* report error, if any error happens, just consider it does
+ * not have any normal file extent.
+ */
+static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct btrfs_file_extent_item *fi;
+ u8 type;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ goto out;
+ key.objectid = ino;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ ret = 0;
+ goto out;
+ }
+ if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ }
+ while (1) {
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+ path->slots[0]);
+ if (found_key.objectid != ino ||
+ found_key.type != BTRFS_EXTENT_DATA_KEY)
+ break;
+ fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+ type = btrfs_file_extent_type(path->nodes[0], fi);
+ if (type != BTRFS_FILE_EXTENT_INLINE) {
+ ret = 1;
+ goto out;
+ }
+ }
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static u32 btrfs_type_to_imode(u8 type)
+{
+ static u32 imode_by_btrfs_type[] = {
+ [BTRFS_FT_REG_FILE] = S_IFREG,
+ [BTRFS_FT_DIR] = S_IFDIR,
+ [BTRFS_FT_CHRDEV] = S_IFCHR,
+ [BTRFS_FT_BLKDEV] = S_IFBLK,
+ [BTRFS_FT_FIFO] = S_IFIFO,
+ [BTRFS_FT_SOCK] = S_IFSOCK,
+ [BTRFS_FT_SYMLINK] = S_IFLNK,
+ };
+
+ return imode_by_btrfs_type[(type)];
+}
+
+static int repair_inode_no_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct inode_record *rec)
+{
+ u8 filetype;
+ u32 mode = 0700;
+ int type_recovered = 0;
+ int ret = 0;
+
+ printf("Trying to rebuild inode:%llu\n", rec->ino);
+
+ type_recovered = !find_file_type(rec, &filetype);
+
+ /*
+ * Try to determine inode type if type not found.
+ *
+ * For found regular file extent, it must be FILE.
+ * For found dir_item/index, it must be DIR.
+ *
+ * For undetermined one, use FILE as fallback.
+ *
+ * TODO:
+ * 1. If found backref(inode_index/item is already handled) to it,
+ * it must be DIR.
+ * Need new inode-inode ref structure to allow search for that.
+ */
+ if (!type_recovered) {
+ if (rec->found_file_extent &&
+ find_normal_file_extent(root, rec->ino)) {
+ type_recovered = 1;
+ filetype = BTRFS_FT_REG_FILE;
+ } else if (rec->found_dir_item) {
+ type_recovered = 1;
+ filetype = BTRFS_FT_DIR;
+ } else if (!list_empty(&rec->orphan_extents)) {
+ type_recovered = 1;
+ filetype = BTRFS_FT_REG_FILE;
+ } else{
+ printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
+ rec->ino);
+ type_recovered = 1;
+ filetype = BTRFS_FT_REG_FILE;
+ }
+ }
+
+ ret = btrfs_new_inode(trans, root, rec->ino,
+ mode | btrfs_type_to_imode(filetype));
+ if (ret < 0)
+ goto out;
+
+ /*
+ * Here inode rebuild is done, we only rebuild the inode item,
+ * don't repair the nlink(like move to lost+found).
+ * That is the job of nlink repair.
+ *
+ * We just fill the record and return
+ */
+ rec->found_dir_item = 1;
+ rec->imode = mode | btrfs_type_to_imode(filetype);
+ rec->nlink = 0;
+ rec->errors &= ~I_ERR_NO_INODE_ITEM;
+ /* Ensure the inode_nlinks repair function will be called */
+ rec->errors |= I_ERR_LINK_COUNT_WRONG;
+out:
+ return ret;
+}
+
+static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct inode_record *rec)
+{
+ struct orphan_data_extent *orphan;
+ struct orphan_data_extent *tmp;
+ int ret = 0;
+
+ list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
+ /*
+ * Check for conflicting file extents
+ *
+ * Here we don't know whether the extents is compressed or not,
+ * so we can only assume it not compressed nor data offset,
+ * and use its disk_len as extent length.
+ */
+ ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
+ orphan->offset, orphan->disk_len, 0);
+ btrfs_release_path(path);
+ if (ret < 0)
+ goto out;
+ if (!ret) {
+ fprintf(stderr,
+ "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
+ orphan->disk_bytenr, orphan->disk_len);
+ ret = btrfs_free_extent(trans,
+ root->fs_info->extent_root,
+ orphan->disk_bytenr, orphan->disk_len,
+ 0, root->objectid, orphan->objectid,
+ orphan->offset);
+ if (ret < 0)
+ goto out;
+ }
+ ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
+ orphan->offset, orphan->disk_bytenr,
+ orphan->disk_len, orphan->disk_len);
+ if (ret < 0)
+ goto out;
+
+ /* Update file size info */
+ rec->found_size += orphan->disk_len;
+ if (rec->found_size == rec->nbytes)
+ rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
+
+ /* Update the file extent hole info too */
+ ret = del_file_extent_hole(&rec->holes, orphan->offset,
+ orphan->disk_len);
+ if (ret < 0)
+ goto out;
+ if (RB_EMPTY_ROOT(&rec->holes))
+ rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
+
+ list_del(&orphan->list);
+ free(orphan);
+ }
+ rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
+out:
+ return ret;
+}
+
+static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct inode_record *rec)
+{
+ struct rb_node *node;
+ struct file_extent_hole *hole;
+ int found = 0;
+ int ret = 0;
+
+ node = rb_first(&rec->holes);
+
+ while (node) {
+ found = 1;
+ hole = rb_entry(node, struct file_extent_hole, node);
+ ret = btrfs_punch_hole(trans, root, rec->ino,
+ hole->start, hole->len);
+ if (ret < 0)
+ goto out;
+ ret = del_file_extent_hole(&rec->holes, hole->start,
+ hole->len);
+ if (ret < 0)
+ goto out;
+ if (RB_EMPTY_ROOT(&rec->holes))
+ rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
+ node = rb_first(&rec->holes);
+ }
+ /* special case for a file losing all its file extent */
+ if (!found) {
+ ret = btrfs_punch_hole(trans, root, rec->ino, 0,
+ round_up(rec->isize, root->sectorsize));
+ if (ret < 0)
+ goto out;
+ }
+ printf("Fixed discount file extents for inode: %llu in root: %llu\n",
+ rec->ino, root->objectid);
+out:
+ return ret;
+}
+
+static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ int ret = 0;
+
+ if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
+ I_ERR_NO_ORPHAN_ITEM |
+ I_ERR_LINK_COUNT_WRONG |
+ I_ERR_NO_INODE_ITEM |
+ I_ERR_FILE_EXTENT_ORPHAN |
+ I_ERR_FILE_EXTENT_DISCOUNT|
+ I_ERR_FILE_NBYTES_WRONG)))
+ return rec->errors;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ /*
+ * For nlink repair, it may create a dir and add link, so
+ * 2 for parent(256)'s dir_index and dir_item
+ * 2 for lost+found dir's inode_item and inode_ref
+ * 1 for the new inode_ref of the file
+ * 2 for lost+found dir's dir_index and dir_item for the file
+ */
+ trans = btrfs_start_transaction(root, 7);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
+ if (rec->errors & I_ERR_NO_INODE_ITEM)
+ ret = repair_inode_no_item(trans, root, path, rec);
+ if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
+ ret = repair_inode_orphan_extent(trans, root, path, rec);
+ if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
+ ret = repair_inode_discount_extent(trans, root, path, rec);
+ if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
+ ret = repair_inode_isize(trans, root, path, rec);
+ if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
+ ret = repair_inode_orphan_item(trans, root, path, rec);
+ if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
+ ret = repair_inode_nlinks(trans, root, path, rec);
+ if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
+ ret = repair_inode_nbytes(trans, root, path, rec);
+ btrfs_commit_transaction(trans, root);
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int check_inode_recs(struct btrfs_root *root,
+ struct cache_tree *inode_cache)
+{
+ struct cache_extent *cache;
+ struct ptr_node *node;
+ struct inode_record *rec;
+ struct inode_backref *backref;
+ int stage = 0;
+ int ret = 0;
+ int err = 0;
+ u64 error = 0;
+ u64 root_dirid = btrfs_root_dirid(&root->root_item);
+
+ if (btrfs_root_refs(&root->root_item) == 0) {
+ if (!cache_tree_empty(inode_cache))
+ fprintf(stderr, "warning line %d\n", __LINE__);
+ return 0;
+ }
+
+ /*
+ * We need to record the highest inode number for later 'lost+found'
+ * dir creation.
+ * We must select a ino not used/refered by any existing inode, or
+ * 'lost+found' ino may be a missing ino in a corrupted leaf,
+ * this may cause 'lost+found' dir has wrong nlinks.
+ */
+ cache = last_cache_extent(inode_cache);
+ if (cache) {
+ node = container_of(cache, struct ptr_node, cache);
+ rec = node->data;
+ if (rec->ino > root->highest_inode)
+ root->highest_inode = rec->ino;
+ }
+
+ /*
+ * We need to repair backrefs first because we could change some of the
+ * errors in the inode recs.
+ *
+ * We also need to go through and delete invalid backrefs first and then
+ * add the correct ones second. We do this because we may get EEXIST
+ * when adding back the correct index because we hadn't yet deleted the
+ * invalid index.
+ *
+ * For example, if we were missing a dir index then the directories
+ * isize would be wrong, so if we fixed the isize to what we thought it
+ * would be and then fixed the backref we'd still have a invalid fs, so
+ * we need to add back the dir index and then check to see if the isize
+ * is still wrong.
+ */
+ while (stage < 3) {
+ stage++;
+ if (stage == 3 && !err)
+ break;
+
+ cache = search_cache_extent(inode_cache, 0);
+ while (repair && cache) {
+ node = container_of(cache, struct ptr_node, cache);
+ rec = node->data;
+ cache = next_cache_extent(cache);
+
+ /* Need to free everything up and rescan */
+ if (stage == 3) {
+ remove_cache_extent(inode_cache, &node->cache);
+ free(node);
+ free_inode_rec(rec);
+ continue;
+ }
+
+ if (list_empty(&rec->backrefs))
+ continue;
+
+ ret = repair_inode_backrefs(root, rec, inode_cache,
+ stage == 1);
+ if (ret < 0) {
+ err = ret;
+ stage = 2;
+ break;
+ } if (ret > 0) {
+ err = -EAGAIN;
+ }
+ }
+ }
+ if (err)
+ return err;
+
+ rec = get_inode_rec(inode_cache, root_dirid, 0);
+ BUG_ON(IS_ERR(rec));
+ if (rec) {
+ ret = check_root_dir(rec);
+ if (ret) {
+ fprintf(stderr, "root %llu root dir %llu error\n",
+ (unsigned long long)root->root_key.objectid,
+ (unsigned long long)root_dirid);
+ print_inode_error(root, rec);
+ error++;
+ }
+ } else {
+ if (repair) {
+ struct btrfs_trans_handle *trans;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ return err;
+ }
+
+ fprintf(stderr,
+ "root %llu missing its root dir, recreating\n",
+ (unsigned long long)root->objectid);
+
+ ret = btrfs_make_root_dir(trans, root, root_dirid);
+ BUG_ON(ret);
+
+ btrfs_commit_transaction(trans, root);
+ return -EAGAIN;
+ }
+
+ fprintf(stderr, "root %llu root dir %llu not found\n",
+ (unsigned long long)root->root_key.objectid,
+ (unsigned long long)root_dirid);
+ }
+
+ while (1) {
+ cache = search_cache_extent(inode_cache, 0);
+ if (!cache)
+ break;
+ node = container_of(cache, struct ptr_node, cache);
+ rec = node->data;
+ remove_cache_extent(inode_cache, &node->cache);
+ free(node);
+ if (rec->ino == root_dirid ||
+ rec->ino == BTRFS_ORPHAN_OBJECTID) {
+ free_inode_rec(rec);
+ continue;
+ }
+
+ if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
+ ret = check_orphan_item(root, rec->ino);
+ if (ret == 0)
+ rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
+ if (can_free_inode_rec(rec)) {
+ free_inode_rec(rec);
+ continue;
+ }
+ }
+
+ if (!rec->found_inode_item)
+ rec->errors |= I_ERR_NO_INODE_ITEM;
+ if (rec->found_link != rec->nlink)
+ rec->errors |= I_ERR_LINK_COUNT_WRONG;
+ if (repair) {
+ ret = try_repair_inode(root, rec);
+ if (ret == 0 && can_free_inode_rec(rec)) {
+ free_inode_rec(rec);
+ continue;
+ }
+ ret = 0;
+ }
+
+ if (!(repair && ret == 0))
+ error++;
+ print_inode_error(root, rec);
+ list_for_each_entry(backref, &rec->backrefs, list) {
+ if (!backref->found_dir_item)
+ backref->errors |= REF_ERR_NO_DIR_ITEM;
+ if (!backref->found_dir_index)
+ backref->errors |= REF_ERR_NO_DIR_INDEX;
+ if (!backref->found_inode_ref)
+ backref->errors |= REF_ERR_NO_INODE_REF;
+ fprintf(stderr, "\tunresolved ref dir %llu index %llu"
+ " namelen %u name %s filetype %d errors %x",
+ (unsigned long long)backref->dir,
+ (unsigned long long)backref->index,
+ backref->namelen, backref->name,
+ backref->filetype, backref->errors);
+ print_ref_error(backref->errors);
+ }
+ free_inode_rec(rec);
+ }
+ return (error > 0) ? -1 : 0;
+}
+
+static struct root_record *get_root_rec(struct cache_tree *root_cache,
+ u64 objectid)
+{
+ struct cache_extent *cache;
+ struct root_record *rec = NULL;
+ int ret;
+
+ cache = lookup_cache_extent(root_cache, objectid, 1);
+ if (cache) {
+ rec = container_of(cache, struct root_record, cache);
+ } else {
+ rec = calloc(1, sizeof(*rec));
+ if (!rec)
+ return ERR_PTR(-ENOMEM);
+ rec->objectid = objectid;
+ INIT_LIST_HEAD(&rec->backrefs);
+ rec->cache.start = objectid;
+ rec->cache.size = 1;
+
+ ret = insert_cache_extent(root_cache, &rec->cache);
+ if (ret)
+ return ERR_PTR(-EEXIST);
+ }
+ return rec;
+}
+
+static struct root_backref *get_root_backref(struct root_record *rec,
+ u64 ref_root, u64 dir, u64 index,
+ const char *name, int namelen)
+{
+ struct root_backref *backref;
+
+ list_for_each_entry(backref, &rec->backrefs, list) {
+ if (backref->ref_root != ref_root || backref->dir != dir ||
+ backref->namelen != namelen)
+ continue;
+ if (memcmp(name, backref->name, namelen))
+ continue;
+ return backref;
+ }
+
+ backref = calloc(1, sizeof(*backref) + namelen + 1);
+ if (!backref)
+ return NULL;
+ backref->ref_root = ref_root;
+ backref->dir = dir;
+ backref->index = index;
+ backref->namelen = namelen;
+ memcpy(backref->name, name, namelen);
+ backref->name[namelen] = '\0';
+ list_add_tail(&backref->list, &rec->backrefs);
+ return backref;
+}
+
+static void free_root_record(struct cache_extent *cache)
+{
+ struct root_record *rec;
+ struct root_backref *backref;
+
+ rec = container_of(cache, struct root_record, cache);
+ while (!list_empty(&rec->backrefs)) {
+ backref = list_entry(rec->backrefs.next,
+ struct root_backref, list);
+ list_del(&backref->list);
+ free(backref);
+ }
+
+ kfree(rec);
+}
+
+FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
+
+static int add_root_backref(struct cache_tree *root_cache,
+ u64 root_id, u64 ref_root, u64 dir, u64 index,
+ const char *name, int namelen,
+ int item_type, int errors)
+{
+ struct root_record *rec;
+ struct root_backref *backref;
+
+ rec = get_root_rec(root_cache, root_id);
+ BUG_ON(IS_ERR(rec));
+ backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
+ BUG_ON(!backref);
+
+ backref->errors |= errors;
+
+ if (item_type != BTRFS_DIR_ITEM_KEY) {
+ if (backref->found_dir_index || backref->found_back_ref ||
+ backref->found_forward_ref) {
+ if (backref->index != index)
+ backref->errors |= REF_ERR_INDEX_UNMATCH;
+ } else {
+ backref->index = index;
+ }
+ }
+
+ if (item_type == BTRFS_DIR_ITEM_KEY) {
+ if (backref->found_forward_ref)
+ rec->found_ref++;
+ backref->found_dir_item = 1;
+ } else if (item_type == BTRFS_DIR_INDEX_KEY) {
+ backref->found_dir_index = 1;
+ } else if (item_type == BTRFS_ROOT_REF_KEY) {
+ if (backref->found_forward_ref)
+ backref->errors |= REF_ERR_DUP_ROOT_REF;
+ else if (backref->found_dir_item)
+ rec->found_ref++;
+ backref->found_forward_ref = 1;
+ } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
+ if (backref->found_back_ref)
+ backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
+ backref->found_back_ref = 1;
+ } else {
+ BUG_ON(1);
+ }
+
+ if (backref->found_forward_ref && backref->found_dir_item)
+ backref->reachable = 1;
+ return 0;
+}
+
+static int merge_root_recs(struct btrfs_root *root,
+ struct cache_tree *src_cache,
+ struct cache_tree *dst_cache)
+{
+ struct cache_extent *cache;
+ struct ptr_node *node;
+ struct inode_record *rec;
+ struct inode_backref *backref;
+ int ret = 0;
+
+ if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
+ free_inode_recs_tree(src_cache);
+ return 0;
+ }
+
+ while (1) {
+ cache = search_cache_extent(src_cache, 0);
+ if (!cache)
+ break;
+ node = container_of(cache, struct ptr_node, cache);
+ rec = node->data;
+ remove_cache_extent(src_cache, &node->cache);
+ free(node);
+
+ ret = is_child_root(root, root->objectid, rec->ino);
+ if (ret < 0)
+ break;
+ else if (ret == 0)
+ goto skip;
+
+ list_for_each_entry(backref, &rec->backrefs, list) {
+ BUG_ON(backref->found_inode_ref);
+ if (backref->found_dir_item)
+ add_root_backref(dst_cache, rec->ino,
+ root->root_key.objectid, backref->dir,
+ backref->index, backref->name,
+ backref->namelen, BTRFS_DIR_ITEM_KEY,
+ backref->errors);
+ if (backref->found_dir_index)
+ add_root_backref(dst_cache, rec->ino,
+ root->root_key.objectid, backref->dir,
+ backref->index, backref->name,
+ backref->namelen, BTRFS_DIR_INDEX_KEY,
+ backref->errors);
+ }
+skip:
+ free_inode_rec(rec);
+ }
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+static int check_root_refs(struct btrfs_root *root,
+ struct cache_tree *root_cache)
+{
+ struct root_record *rec;
+ struct root_record *ref_root;
+ struct root_backref *backref;
+ struct cache_extent *cache;
+ int loop = 1;
+ int ret;
+ int error;
+ int errors = 0;
+
+ rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
+ BUG_ON(IS_ERR(rec));
+ rec->found_ref = 1;
+
+ /* fixme: this can not detect circular references */
+ while (loop) {
+ loop = 0;
+ cache = search_cache_extent(root_cache, 0);
+ while (1) {
+ if (!cache)
+ break;
+ rec = container_of(cache, struct root_record, cache);
+ cache = next_cache_extent(cache);
+
+ if (rec->found_ref == 0)
+ continue;
+
+ list_for_each_entry(backref, &rec->backrefs, list) {
+ if (!backref->reachable)
+ continue;
+
+ ref_root = get_root_rec(root_cache,
+ backref->ref_root);
+ BUG_ON(IS_ERR(ref_root));
+ if (ref_root->found_ref > 0)
+ continue;
+
+ backref->reachable = 0;
+ rec->found_ref--;
+ if (rec->found_ref == 0)
+ loop = 1;
+ }
+ }
+ }
+
+ cache = search_cache_extent(root_cache, 0);
+ while (1) {
+ if (!cache)
+ break;
+ rec = container_of(cache, struct root_record, cache);
+ cache = next_cache_extent(cache);
+
+ if (rec->found_ref == 0 &&
+ rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
+ rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
+ ret = check_orphan_item(root->fs_info->tree_root,
+ rec->objectid);
+ if (ret == 0)
+ continue;
+
+ /*
+ * If we don't have a root item then we likely just have
+ * a dir item in a snapshot for this root but no actual
+ * ref key or anything so it's meaningless.
+ */
+ if (!rec->found_root_item)
+ continue;
+ errors++;
+ fprintf(stderr, "fs tree %llu not referenced\n",
+ (unsigned long long)rec->objectid);
+ }
+
+ error = 0;
+ if (rec->found_ref > 0 && !rec->found_root_item)
+ error = 1;
+ list_for_each_entry(backref, &rec->backrefs, list) {
+ if (!backref->found_dir_item)
+ backref->errors |= REF_ERR_NO_DIR_ITEM;
+ if (!backref->found_dir_index)
+ backref->errors |= REF_ERR_NO_DIR_INDEX;
+ if (!backref->found_back_ref)
+ backref->errors |= REF_ERR_NO_ROOT_BACKREF;
+ if (!backref->found_forward_ref)
+ backref->errors |= REF_ERR_NO_ROOT_REF;
+ if (backref->reachable && backref->errors)
+ error = 1;
+ }
+ if (!error)
+ continue;
+
+ errors++;
+ fprintf(stderr, "fs tree %llu refs %u %s\n",
+ (unsigned long long)rec->objectid, rec->found_ref,
+ rec->found_root_item ? "" : "not found");
+
+ list_for_each_entry(backref, &rec->backrefs, list) {
+ if (!backref->reachable)
+ continue;
+ if (!backref->errors && rec->found_root_item)
+ continue;
+ fprintf(stderr, "\tunresolved ref root %llu dir %llu"
+ " index %llu namelen %u name %s errors %x\n",
+ (unsigned long long)backref->ref_root,
+ (unsigned long long)backref->dir,
+ (unsigned long long)backref->index,
+ backref->namelen, backref->name,
+ backref->errors);
+ print_ref_error(backref->errors);
+ }
+ }
+ return errors > 0 ? 1 : 0;
+}
+
+static int process_root_ref(struct extent_buffer *eb, int slot,
+ struct btrfs_key *key,
+ struct cache_tree *root_cache)
+{
+ u64 dirid;
+ u64 index;
+ u32 len;
+ u32 name_len;
+ struct btrfs_root_ref *ref;
+ char namebuf[BTRFS_NAME_LEN];
+ int error;
+
+ ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
+
+ dirid = btrfs_root_ref_dirid(eb, ref);
+ index = btrfs_root_ref_sequence(eb, ref);
+ name_len = btrfs_root_ref_name_len(eb, ref);
+
+ if (name_len <= BTRFS_NAME_LEN) {
+ len = name_len;
+ error = 0;
+ } else {
+ len = BTRFS_NAME_LEN;
+ error = REF_ERR_NAME_TOO_LONG;
+ }
+ read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
+
+ if (key->type == BTRFS_ROOT_REF_KEY) {
+ add_root_backref(root_cache, key->offset, key->objectid, dirid,
+ index, namebuf, len, key->type, error);
+ } else {
+ add_root_backref(root_cache, key->objectid, key->offset, dirid,
+ index, namebuf, len, key->type, error);
+ }
+ return 0;
+}
+
+static void free_corrupt_block(struct cache_extent *cache)
+{
+ struct btrfs_corrupt_block *corrupt;
+
+ corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
+ free(corrupt);
+}
+
+FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
+
+/*
+ * Repair the btree of the given root.
+ *
+ * The fix is to remove the node key in corrupt_blocks cache_tree.
+ * and rebalance the tree.
+ * After the fix, the btree should be writeable.
+ */
+static int repair_btree(struct btrfs_root *root,
+ struct cache_tree *corrupt_blocks)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ struct btrfs_corrupt_block *corrupt;
+ struct cache_extent *cache;
+ struct btrfs_key key;
+ u64 offset;
+ int level;
+ int ret = 0;
+
+ if (cache_tree_empty(corrupt_blocks))
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ fprintf(stderr, "Error starting transaction: %s\n",
+ strerror(-ret));
+ goto out_free_path;
+ }
+ cache = first_cache_extent(corrupt_blocks);
+ while (cache) {
+ corrupt = container_of(cache, struct btrfs_corrupt_block,
+ cache);
+ level = corrupt->level;
+ path->lowest_level = level;
+ key.objectid = corrupt->key.objectid;
+ key.type = corrupt->key.type;
+ key.offset = corrupt->key.offset;
+
+ /*
+ * Here we don't want to do any tree balance, since it may
+ * cause a balance with corrupted brother leaf/node,
+ * so ins_len set to 0 here.
+ * Balance will be done after all corrupt node/leaf is deleted.
+ */
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret < 0)
+ goto out;
+ offset = btrfs_node_blockptr(path->nodes[level],
+ path->slots[level]);
+
+ /* Remove the ptr */
+ ret = btrfs_del_ptr(trans, root, path, level,
+ path->slots[level]);
+ if (ret < 0)
+ goto out;
+ /*
+ * Remove the corresponding extent
+ * return value is not concerned.
+ */
+ btrfs_release_path(path);
+ ret = btrfs_free_extent(trans, root, offset, root->nodesize,
+ 0, root->root_key.objectid,
+ level - 1, 0);
+ cache = next_cache_extent(cache);
+ }
+
+ /* Balance the btree using btrfs_search_slot() */
+ cache = first_cache_extent(corrupt_blocks);
+ while (cache) {
+ corrupt = container_of(cache, struct btrfs_corrupt_block,
+ cache);
+ memcpy(&key, &corrupt->key, sizeof(key));
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret < 0)
+ goto out;
+ /* return will always >0 since it won't find the item */
+ ret = 0;
+ btrfs_release_path(path);
+ cache = next_cache_extent(cache);
+ }
+out:
+ btrfs_commit_transaction(trans, root);
+out_free_path:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int check_fs_root(struct btrfs_root *root,
+ struct cache_tree *root_cache,
+ struct walk_control *wc)
+{
+ int ret = 0;
+ int err = 0;
+ int wret;
+ int level;
+ struct btrfs_path path;
+ struct shared_node root_node;
+ struct root_record *rec;
+ struct btrfs_root_item *root_item = &root->root_item;
+ struct cache_tree corrupt_blocks;
+ struct orphan_data_extent *orphan;
+ struct orphan_data_extent *tmp;
+ enum btrfs_tree_block_status status;
+
+ /*
+ * Reuse the corrupt_block cache tree to record corrupted tree block
+ *
+ * Unlike the usage in extent tree check, here we do it in a per
+ * fs/subvol tree base.
+ */
+ cache_tree_init(&corrupt_blocks);
+ root->fs_info->corrupt_blocks = &corrupt_blocks;
+
+ if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+ rec = get_root_rec(root_cache, root->root_key.objectid);
+ BUG_ON(IS_ERR(rec));
+ if (btrfs_root_refs(root_item) > 0)
+ rec->found_root_item = 1;
+ }
+
+ btrfs_init_path(&path);
+ memset(&root_node, 0, sizeof(root_node));
+ cache_tree_init(&root_node.root_cache);
+ cache_tree_init(&root_node.inode_cache);
+
+ /* Move the orphan extent record to corresponding inode_record */
+ list_for_each_entry_safe(orphan, tmp,
+ &root->orphan_data_extents, list) {
+ struct inode_record *inode;
+
+ inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
+ 1);
+ BUG_ON(IS_ERR(inode));
+ inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
+ list_move(&orphan->list, &inode->orphan_extents);
+ }
+
+ level = btrfs_header_level(root->node);
+ memset(wc->nodes, 0, sizeof(wc->nodes));
+ wc->nodes[level] = &root_node;
+ wc->active_node = level;
+ wc->root_level = level;
+
+ /* We may not have checked the root block, lets do that now */
+ if (btrfs_is_leaf(root->node))
+ status = btrfs_check_leaf(root, NULL, root->node);
+ else
+ status = btrfs_check_node(root, NULL, root->node);
+ if (status != BTRFS_TREE_BLOCK_CLEAN)
+ return -EIO;
+
+ if (btrfs_root_refs(root_item) > 0 ||
+ btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
+ path.nodes[level] = root->node;
+ extent_buffer_get(root->node);
+ path.slots[level] = 0;
+ } else {
+ struct btrfs_key key;
+ struct btrfs_disk_key found_key;
+
+ btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
+ level = root_item->drop_level;
+ path.lowest_level = level;
+ wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+ if (wret < 0)
+ goto skip_walking;
+ btrfs_node_key(path.nodes[level], &found_key,
+ path.slots[level]);
+ WARN_ON(memcmp(&found_key, &root_item->drop_progress,
+ sizeof(found_key)));
+ }
+
+ while (1) {
+ wret = walk_down_tree(root, &path, wc, &level);
+ if (wret < 0)
+ ret = wret;
+ if (wret != 0)
+ break;
+
+ wret = walk_up_tree(root, &path, wc, &level);
+ if (wret < 0)
+ ret = wret;
+ if (wret != 0)
+ break;
+ }
+skip_walking:
+ btrfs_release_path(&path);
+
+ if (!cache_tree_empty(&corrupt_blocks)) {
+ struct cache_extent *cache;
+ struct btrfs_corrupt_block *corrupt;
+
+ printf("The following tree block(s) is corrupted in tree %llu:\n",
+ root->root_key.objectid);
+ cache = first_cache_extent(&corrupt_blocks);
+ while (cache) {
+ corrupt = container_of(cache,
+ struct btrfs_corrupt_block,
+ cache);
+ printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
+ cache->start, corrupt->level,
+ corrupt->key.objectid, corrupt->key.type,
+ corrupt->key.offset);
+ cache = next_cache_extent(cache);
+ }
+ if (repair) {
+ printf("Try to repair the btree for root %llu\n",
+ root->root_key.objectid);
+ ret = repair_btree(root, &corrupt_blocks);
+ if (ret < 0)
+ fprintf(stderr, "Failed to repair btree: %s\n",
+ strerror(-ret));
+ if (!ret)
+ printf("Btree for root %llu is fixed\n",
+ root->root_key.objectid);
+ }
+ }
+
+ err = merge_root_recs(root, &root_node.root_cache, root_cache);
+ if (err < 0)
+ ret = err;
+
+ if (root_node.current) {
+ root_node.current->checked = 1;
+ maybe_free_inode_rec(&root_node.inode_cache,
+ root_node.current);
+ }
+
+ err = check_inode_recs(root, &root_node.inode_cache);
+ if (!ret)
+ ret = err;
+
+ free_corrupt_blocks_tree(&corrupt_blocks);
+ root->fs_info->corrupt_blocks = NULL;
+ free_orphan_data_extents(&root->orphan_data_extents);
+ return ret;
+}
+
+static int fs_root_objectid(u64 objectid)
+{
+ if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
+ objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ return 1;
+ return is_fstree(objectid);
+}
+
+static int check_fs_roots(struct btrfs_root *root,
+ struct cache_tree *root_cache)
+{
+ struct btrfs_path path;
+ struct btrfs_key key;
+ struct walk_control wc;
+ struct extent_buffer *leaf, *tree_node;
+ struct btrfs_root *tmp_root;
+ struct btrfs_root *tree_root = root->fs_info->tree_root;
+ int ret;
+ int err = 0;
+
+ if (ctx.progress_enabled) {
+ ctx.tp = TASK_FS_ROOTS;
+ task_start(ctx.info);
+ }
+
+ /*
+ * Just in case we made any changes to the extent tree that weren't
+ * reflected into the free space cache yet.
+ */
+ if (repair)
+ reset_cached_block_groups(root->fs_info);
+ memset(&wc, 0, sizeof(wc));
+ cache_tree_init(&wc.shared);
+ btrfs_init_path(&path);
+
+again:
+ key.offset = 0;
+ key.objectid = 0;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
+ if (ret < 0) {
+ err = 1;
+ goto out;
+ }
+ tree_node = tree_root->node;
+ while (1) {
+ if (tree_node != tree_root->node) {
+ free_root_recs_tree(root_cache);
+ btrfs_release_path(&path);
+ goto again;
+ }
+ leaf = path.nodes[0];
+ if (path.slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(tree_root, &path);
+ if (ret) {
+ if (ret < 0)
+ err = 1;
+ break;
+ }
+ leaf = path.nodes[0];
+ }
+ btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+ if (key.type == BTRFS_ROOT_ITEM_KEY &&
+ fs_root_objectid(key.objectid)) {
+ if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
+ tmp_root = btrfs_read_fs_root_no_cache(
+ root->fs_info, &key);
+ } else {
+ key.offset = (u64)-1;
+ tmp_root = btrfs_read_fs_root(
+ root->fs_info, &key);
+ }
+ if (IS_ERR(tmp_root)) {
+ err = 1;
+ goto next;
+ }
+ ret = check_fs_root(tmp_root, root_cache, &wc);
+ if (ret == -EAGAIN) {
+ free_root_recs_tree(root_cache);
+ btrfs_release_path(&path);
+ goto again;
+ }
+ if (ret)
+ err = 1;
+ if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+ btrfs_free_fs_root(tmp_root);
+ } else if (key.type == BTRFS_ROOT_REF_KEY ||
+ key.type == BTRFS_ROOT_BACKREF_KEY) {
+ process_root_ref(leaf, path.slots[0], &key,
+ root_cache);
+ }
+next:
+ path.slots[0]++;
+ }
+out:
+ btrfs_release_path(&path);
+ if (err)
+ free_extent_cache_tree(&wc.shared);
+ if (!cache_tree_empty(&wc.shared))
+ fprintf(stderr, "warning line %d\n", __LINE__);
+
+ task_stop(ctx.info);
+
+ return err;
+}
+
+static int all_backpointers_checked(struct extent_record *rec, int print_errs)
+{
+ struct list_head *cur = rec->backrefs.next;
+ struct extent_backref *back;
+ struct tree_backref *tback;
+ struct data_backref *dback;
+ u64 found = 0;
+ int err = 0;
+
+ while(cur != &rec->backrefs) {
+ back = list_entry(cur, struct extent_backref, list);
+ cur = cur->next;
+ if (!back->found_extent_tree) {
+ err = 1;
+ if (!print_errs)
+ goto out;
+ if (back->is_data) {
+ dback = (struct data_backref *)back;
+ fprintf(stderr, "Backref %llu %s %llu"
+ " owner %llu offset %llu num_refs %lu"
+ " not found in extent tree\n",
+ (unsigned long long)rec->start,
+ back->full_backref ?
+ "parent" : "root",
+ back->full_backref ?
+ (unsigned long long)dback->parent:
+ (unsigned long long)dback->root,
+ (unsigned long long)dback->owner,
+ (unsigned long long)dback->offset,
+ (unsigned long)dback->num_refs);
+ } else {
+ tback = (struct tree_backref *)back;
+ fprintf(stderr, "Backref %llu parent %llu"
+ " root %llu not found in extent tree\n",
+ (unsigned long long)rec->start,
+ (unsigned long long)tback->parent,
+ (unsigned long long)tback->root);
+ }
+ }
+ if (!back->is_data && !back->found_ref) {
+ err = 1;
+ if (!print_errs)
+ goto out;
+ tback = (struct tree_backref *)back;
+ fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
+ (unsigned long long)rec->start,
+ back->full_backref ? "parent" : "root",
+ back->full_backref ?
+ (unsigned long long)tback->parent :
+ (unsigned long long)tback->root, back);
+ }
+ if (back->is_data) {
+ dback = (struct data_backref *)back;
+ if (dback->found_ref != dback->num_refs) {
+ err = 1;
+ if (!print_errs)
+ goto out;
+ fprintf(stderr, "Incorrect local backref count"
+ " on %llu %s %llu owner %llu"
+ " offset %llu found %u wanted %u back %p\n",
+ (unsigned long long)rec->start,
+ back->full_backref ?
+ "parent" : "root",
+ back->full_backref ?
+ (unsigned long long)dback->parent:
+ (unsigned long long)dback->root,
+ (unsigned long long)dback->owner,
+ (unsigned long long)dback->offset,
+ dback->found_ref, dback->num_refs, back);
+ }
+ if (dback->disk_bytenr != rec->start) {
+ err = 1;
+ if (!print_errs)
+ goto out;
+ fprintf(stderr, "Backref disk bytenr does not"
+ " match extent record, bytenr=%llu, "
+ "ref bytenr=%llu\n",
+ (unsigned long long)rec->start,
+ (unsigned long long)dback->disk_bytenr);
+ }
+
+ if (dback->bytes != rec->nr) {
+ err = 1;
+ if (!print_errs)
+ goto out;
+ fprintf(stderr, "Backref bytes do not match "
+ "extent backref, bytenr=%llu, ref "
+ "bytes=%llu, backref bytes=%llu\n",
+ (unsigned long long)rec->start,
+ (unsigned long long)rec->nr,
+ (unsigned long long)dback->bytes);
+ }
+ }
+ if (!back->is_data) {
+ found += 1;
+ } else {
+ dback = (struct data_backref *)back;
+ found += dback->found_ref;
+ }
+ }
+ if (found != rec->refs) {
+ err = 1;
+ if (!print_errs)
+ goto out;
+ fprintf(stderr, "Incorrect global backref count "
+ "on %llu found %llu wanted %llu\n",
+ (unsigned long long)rec->start,
+ (unsigned long long)found,
+ (unsigned long long)rec->refs);
+ }
+out:
+ return err;
+}
+
+static int free_all_extent_backrefs(struct extent_record *rec)
+{
+ struct extent_backref *back;
+ struct list_head *cur;
+ while (!list_empty(&rec->backrefs)) {
+ cur = rec->backrefs.next;
+ back = list_entry(cur, struct extent_backref, list);
+ list_del(cur);
+ free(back);
+ }
+ return 0;
+}
+
+static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
+ struct cache_tree *extent_cache)
+{
+ struct cache_extent *cache;
+ struct extent_record *rec;
+
+ while (1) {
+ cache = first_cache_extent(extent_cache);
+ if (!cache)
+ break;
+ rec = container_of(cache, struct extent_record, cache);
+ remove_cache_extent(extent_cache, cache);
+ free_all_extent_backrefs(rec);
+ free(rec);
+ }
+}
+
+static int maybe_free_extent_rec(struct cache_tree *extent_cache,
+ struct extent_record *rec)
+{
+ if (rec->content_checked && rec->owner_ref_checked &&
+ rec->extent_item_refs == rec->refs && rec->refs > 0 &&
+ rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
+ !rec->bad_full_backref && !rec->crossing_stripes &&
+ !rec->wrong_chunk_type) {
+ remove_cache_extent(extent_cache, &rec->cache);
+ free_all_extent_backrefs(rec);
+ list_del_init(&rec->list);
+ free(rec);
+ }
+ return 0;
+}
+
+static int check_owner_ref(struct btrfs_root *root,
+ struct extent_record *rec,
+ struct extent_buffer *buf)
+{
+ struct extent_backref *node;
+ struct tree_backref *back;
+ struct btrfs_root *ref_root;
+ struct btrfs_key key;
+ struct btrfs_path path;
+ struct extent_buffer *parent;
+ int level;
+ int found = 0;
+ int ret;
+
+ list_for_each_entry(node, &rec->backrefs, list) {
+ if (node->is_data)
+ continue;
+ if (!node->found_ref)
+ continue;
+ if (node->full_backref)
+ continue;
+ back = (struct tree_backref *)node;
+ if (btrfs_header_owner(buf) == back->root)
+ return 0;
+ }
+ BUG_ON(rec->is_root);
+
+ /* try to find the block by search corresponding fs tree */
+ key.objectid = btrfs_header_owner(buf);
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ ref_root = btrfs_read_fs_root(root->fs_info, &key);
+ if (IS_ERR(ref_root))
+ return 1;
+
+ level = btrfs_header_level(buf);
+ if (level == 0)
+ btrfs_item_key_to_cpu(buf, &key, 0);
+ else
+ btrfs_node_key_to_cpu(buf, &key, 0);
+
+ btrfs_init_path(&path);
+ path.lowest_level = level + 1;
+ ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
+ if (ret < 0)
+ return 0;
+
+ parent = path.nodes[level + 1];
+ if (parent && buf->start == btrfs_node_blockptr(parent,
+ path.slots[level + 1]))
+ found = 1;
+
+ btrfs_release_path(&path);
+ return found ? 0 : 1;
+}
+
+static int is_extent_tree_record(struct extent_record *rec)
+{
+ struct list_head *cur = rec->backrefs.next;
+ struct extent_backref *node;
+ struct tree_backref *back;
+ int is_extent = 0;
+
+ while(cur != &rec->backrefs) {
+ node = list_entry(cur, struct extent_backref, list);
+ cur = cur->next;
+ if (node->is_data)
+ return 0;
+ back = (struct tree_backref *)node;
+ if (node->full_backref)
+ return 0;
+ if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
+ is_extent = 1;
+ }
+ return is_extent;
+}
+
+
+static int record_bad_block_io(struct btrfs_fs_info *info,
+ struct cache_tree *extent_cache,
+ u64 start, u64 len)
+{
+ struct extent_record *rec;
+ struct cache_extent *cache;
+ struct btrfs_key key;
+
+ cache = lookup_cache_extent(extent_cache, start, len);
+ if (!cache)
+ return 0;
+
+ rec = container_of(cache, struct extent_record, cache);
+ if (!is_extent_tree_record(rec))
+ return 0;
+
+ btrfs_disk_key_to_cpu(&key, &rec->parent_key);
+ return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
+}
+
+static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
+ struct extent_buffer *buf, int slot)
+{
+ if (btrfs_header_level(buf)) {
+ struct btrfs_key_ptr ptr1, ptr2;
+
+ read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
+ sizeof(struct btrfs_key_ptr));
+ read_extent_buffer(buf, &ptr2,
+ btrfs_node_key_ptr_offset(slot + 1),
+ sizeof(struct btrfs_key_ptr));
+ write_extent_buffer(buf, &ptr1,
+ btrfs_node_key_ptr_offset(slot + 1),
+ sizeof(struct btrfs_key_ptr));
+ write_extent_buffer(buf, &ptr2,
+ btrfs_node_key_ptr_offset(slot),
+ sizeof(struct btrfs_key_ptr));
+ if (slot == 0) {
+ struct btrfs_disk_key key;
+ btrfs_node_key(buf, &key, 0);
+ btrfs_fixup_low_keys(root, path, &key,
+ btrfs_header_level(buf) + 1);
+ }
+ } else {
+ struct btrfs_item *item1, *item2;
+ struct btrfs_key k1, k2;
+ char *item1_data, *item2_data;
+ u32 item1_offset, item2_offset, item1_size, item2_size;
+
+ item1 = btrfs_item_nr(slot);
+ item2 = btrfs_item_nr(slot + 1);
+ btrfs_item_key_to_cpu(buf, &k1, slot);
+ btrfs_item_key_to_cpu(buf, &k2, slot + 1);
+ item1_offset = btrfs_item_offset(buf, item1);
+ item2_offset = btrfs_item_offset(buf, item2);
+ item1_size = btrfs_item_size(buf, item1);
+ item2_size = btrfs_item_size(buf, item2);
+
+ item1_data = malloc(item1_size);
+ if (!item1_data)
+ return -ENOMEM;
+ item2_data = malloc(item2_size);
+ if (!item2_data) {
+ free(item1_data);
+ return -ENOMEM;
+ }
+
+ read_extent_buffer(buf, item1_data, item1_offset, item1_size);
+ read_extent_buffer(buf, item2_data, item2_offset, item2_size);
+
+ write_extent_buffer(buf, item1_data, item2_offset, item2_size);
+ write_extent_buffer(buf, item2_data, item1_offset, item1_size);
+ free(item1_data);
+ free(item2_data);
+
+ btrfs_set_item_offset(buf, item1, item2_offset);
+ btrfs_set_item_offset(buf, item2, item1_offset);
+ btrfs_set_item_size(buf, item1, item2_size);
+ btrfs_set_item_size(buf, item2, item1_size);
+
+ path->slots[0] = slot;
+ btrfs_set_item_key_unsafe(root, path, &k2);
+ path->slots[0] = slot + 1;
+ btrfs_set_item_key_unsafe(root, path, &k1);
+ }
+ return 0;
+}
+
+static int fix_key_order(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path)
+{
+ struct extent_buffer *buf;
+ struct btrfs_key k1, k2;
+ int i;
+ int level = path->lowest_level;
+ int ret = -EIO;
+
+ buf = path->nodes[level];
+ for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
+ if (level) {
+ btrfs_node_key_to_cpu(buf, &k1, i);
+ btrfs_node_key_to_cpu(buf, &k2, i + 1);
+ } else {
+ btrfs_item_key_to_cpu(buf, &k1, i);
+ btrfs_item_key_to_cpu(buf, &k2, i + 1);
+ }
+ if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
+ continue;
+ ret = swap_values(root, path, buf, i);
+ if (ret)
+ break;
+ btrfs_mark_buffer_dirty(buf);
+ i = 0;
+ }
+ return ret;
+}
+
+static int delete_bogus_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct extent_buffer *buf, int slot)
+{
+ struct btrfs_key key;
+ int nritems = btrfs_header_nritems(buf);
+
+ btrfs_item_key_to_cpu(buf, &key, slot);
+
+ /* These are all the keys we can deal with missing. */
+ if (key.type != BTRFS_DIR_INDEX_KEY &&
+ key.type != BTRFS_EXTENT_ITEM_KEY &&
+ key.type != BTRFS_METADATA_ITEM_KEY &&
+ key.type != BTRFS_TREE_BLOCK_REF_KEY &&
+ key.type != BTRFS_EXTENT_DATA_REF_KEY)
+ return -1;
+
+ printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
+ (unsigned long long)key.objectid, key.type,
+ (unsigned long long)key.offset, slot, buf->start);
+ memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
+ btrfs_item_nr_offset(slot + 1),
+ sizeof(struct btrfs_item) *
+ (nritems - slot - 1));
+ btrfs_set_header_nritems(buf, nritems - 1);
+ if (slot == 0) {
+ struct btrfs_disk_key disk_key;
+
+ btrfs_item_key(buf, &disk_key, 0);
+ btrfs_fixup_low_keys(root, path, &disk_key, 1);
+ }
+ btrfs_mark_buffer_dirty(buf);
+ return 0;
+}
+
+static int fix_item_offset(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path)
+{
+ struct extent_buffer *buf;
+ int i;
+ int ret = 0;
+
+ /* We should only get this for leaves */
+ BUG_ON(path->lowest_level);
+ buf = path->nodes[0];
+again:
+ for (i = 0; i < btrfs_header_nritems(buf); i++) {
+ unsigned int shift = 0, offset;
+
+ if (i == 0 && btrfs_item_end_nr(buf, i) !=
+ BTRFS_LEAF_DATA_SIZE(root)) {
+ if (btrfs_item_end_nr(buf, i) >
+ BTRFS_LEAF_DATA_SIZE(root)) {
+ ret = delete_bogus_item(trans, root, path,
+ buf, i);
+ if (!ret)
+ goto again;
+ fprintf(stderr, "item is off the end of the "
+ "leaf, can't fix\n");
+ ret = -EIO;
+ break;
+ }
+ shift = BTRFS_LEAF_DATA_SIZE(root) -
+ btrfs_item_end_nr(buf, i);
+ } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
+ btrfs_item_offset_nr(buf, i - 1)) {
+ if (btrfs_item_end_nr(buf, i) >
+ btrfs_item_offset_nr(buf, i - 1)) {
+ ret = delete_bogus_item(trans, root, path,
+ buf, i);
+ if (!ret)
+ goto again;
+ fprintf(stderr, "items overlap, can't fix\n");
+ ret = -EIO;
+ break;
+ }
+ shift = btrfs_item_offset_nr(buf, i - 1) -
+ btrfs_item_end_nr(buf, i);
+ }
+ if (!shift)
+ continue;
+
+ printf("Shifting item nr %d by %u bytes in block %llu\n",
+ i, shift, (unsigned long long)buf->start);
+ offset = btrfs_item_offset_nr(buf, i);
+ memmove_extent_buffer(buf,
+ btrfs_leaf_data(buf) + offset + shift,
+ btrfs_leaf_data(buf) + offset,
+ btrfs_item_size_nr(buf, i));
+ btrfs_set_item_offset(buf, btrfs_item_nr(i),
+ offset + shift);
+ btrfs_mark_buffer_dirty(buf);
+ }
+
+ /*
+ * We may have moved things, in which case we want to exit so we don't
+ * write those changes out. Once we have proper abort functionality in
+ * progs this can be changed to something nicer.
+ */
+ BUG_ON(ret);
+ return ret;
+}
+
+/*
+ * Attempt to fix basic block failures. If we can't fix it for whatever reason
+ * then just return -EIO.
+ */
+static int try_to_fix_bad_block(struct btrfs_root *root,
+ struct extent_buffer *buf,
+ enum btrfs_tree_block_status status)
+{
+ struct btrfs_trans_handle *trans;
+ struct ulist *roots;
+ struct ulist_node *node;
+ struct btrfs_root *search_root;
+ struct btrfs_path *path;
+ struct ulist_iterator iter;
+ struct btrfs_key root_key, key;
+ int ret;
+
+ if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
+ status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
+ return -EIO;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -EIO;
+
+ ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
+ 0, &roots);
+ if (ret) {
+ btrfs_free_path(path);
+ return -EIO;
+ }
+
+ ULIST_ITER_INIT(&iter);
+ while ((node = ulist_next(roots, &iter))) {
+ root_key.objectid = node->val;
+ root_key.type = BTRFS_ROOT_ITEM_KEY;
+ root_key.offset = (u64)-1;
+
+ search_root = btrfs_read_fs_root(root->fs_info, &root_key);
+ if (IS_ERR(root)) {
+ ret = -EIO;
+ break;
+ }
+
+
+ trans = btrfs_start_transaction(search_root, 0);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
+
+ path->lowest_level = btrfs_header_level(buf);
+ path->skip_check_block = 1;
+ if (path->lowest_level)
+ btrfs_node_key_to_cpu(buf, &key, 0);
+ else
+ btrfs_item_key_to_cpu(buf, &key, 0);
+ ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
+ if (ret) {
+ ret = -EIO;
+ btrfs_commit_transaction(trans, search_root);
+ break;
+ }
+ if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
+ ret = fix_key_order(trans, search_root, path);
+ else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
+ ret = fix_item_offset(trans, search_root, path);
+ if (ret) {
+ btrfs_commit_transaction(trans, search_root);
+ break;
+ }
+ btrfs_release_path(path);
+ btrfs_commit_transaction(trans, search_root);
+ }
+ ulist_free(roots);
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int check_block(struct btrfs_root *root,
+ struct cache_tree *extent_cache,
+ struct extent_buffer *buf, u64 flags)
+{
+ struct extent_record *rec;
+ struct cache_extent *cache;
+ struct btrfs_key key;
+ enum btrfs_tree_block_status status;
+ int ret = 0;
+ int level;
+
+ cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
+ if (!cache)
+ return 1;
+ rec = container_of(cache, struct extent_record, cache);
+ rec->generation = btrfs_header_generation(buf);
+
+ level = btrfs_header_level(buf);
+ if (btrfs_header_nritems(buf) > 0) {
+
+ if (level == 0)
+ btrfs_item_key_to_cpu(buf, &key, 0);
+ else
+ btrfs_node_key_to_cpu(buf, &key, 0);
+
+ rec->info_objectid = key.objectid;
+ }
+ rec->info_level = level;
+
+ if (btrfs_is_leaf(buf))
+ status = btrfs_check_leaf(root, &rec->parent_key, buf);
+ else
+ status = btrfs_check_node(root, &rec->parent_key, buf);
+
+ if (status != BTRFS_TREE_BLOCK_CLEAN) {
+ if (repair)
+ status = try_to_fix_bad_block(root, buf, status);
+ if (status != BTRFS_TREE_BLOCK_CLEAN) {
+ ret = -EIO;
+ fprintf(stderr, "bad block %llu\n",
+ (unsigned long long)buf->start);
+ } else {
+ /*
+ * Signal to callers we need to start the scan over
+ * again since we'll have cow'ed blocks.
+ */
+ ret = -EAGAIN;
+ }
+ } else {
+ rec->content_checked = 1;
+ if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+ rec->owner_ref_checked = 1;
+ else {
+ ret = check_owner_ref(root, rec, buf);
+ if (!ret)
+ rec->owner_ref_checked = 1;
+ }
+ }
+ if (!ret)
+ maybe_free_extent_rec(extent_cache, rec);
+ return ret;
+}
+
+static struct tree_backref *find_tree_backref(struct extent_record *rec,
+ u64 parent, u64 root)
+{
+ struct list_head *cur = rec->backrefs.next;
+ struct extent_backref *node;
+ struct tree_backref *back;
+
+ while(cur != &rec->backrefs) {
+ node = list_entry(cur, struct extent_backref, list);
+ cur = cur->next;
+ if (node->is_data)
+ continue;
+ back = (struct tree_backref *)node;
+ if (parent > 0) {
+ if (!node->full_backref)
+ continue;
+ if (parent == back->parent)
+ return back;
+ } else {
+ if (node->full_backref)
+ continue;
+ if (back->root == root)
+ return back;
+ }
+ }
+ return NULL;
+}
+
+static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
+ u64 parent, u64 root)
+{
+ struct tree_backref *ref = malloc(sizeof(*ref));
+
+ if (!ref)
+ return NULL;
+ memset(&ref->node, 0, sizeof(ref->node));
+ if (parent > 0) {
+ ref->parent = parent;
+ ref->node.full_backref = 1;
+ } else {
+ ref->root = root;
+ ref->node.full_backref = 0;
+ }
+ list_add_tail(&ref->node.list, &rec->backrefs);
+
+ return ref;
+}
+
+static struct data_backref *find_data_backref(struct extent_record *rec,
+ u64 parent, u64 root,
+ u64 owner, u64 offset,
+ int found_ref,
+ u64 disk_bytenr, u64 bytes)
+{
+ struct list_head *cur = rec->backrefs.next;
+ struct extent_backref *node;
+ struct data_backref *back;
+
+ while(cur != &rec->backrefs) {
+ node = list_entry(cur, struct extent_backref, list);
+ cur = cur->next;
+ if (!node->is_data)
+ continue;
+ back = (struct data_backref *)node;
+ if (parent > 0) {
+ if (!node->full_backref)
+ continue;
+ if (parent == back->parent)
+ return back;
+ } else {
+ if (node->full_backref)
+ continue;
+ if (back->root == root && back->owner == owner &&
+ back->offset == offset) {
+ if (found_ref && node->found_ref &&
+ (back->bytes != bytes ||
+ back->disk_bytenr != disk_bytenr))
+ continue;
+ return back;
+ }
+ }
+ }
+ return NULL;
+}
+
+static struct data_backref *alloc_data_backref(struct extent_record *rec,
+ u64 parent, u64 root,
+ u64 owner, u64 offset,
+ u64 max_size)
+{
+ struct data_backref *ref = malloc(sizeof(*ref));
+
+ if (!ref)
+ return NULL;
+ memset(&ref->node, 0, sizeof(ref->node));
+ ref->node.is_data = 1;
+
+ if (parent > 0) {
+ ref->parent = parent;
+ ref->owner = 0;
+ ref->offset = 0;
+ ref->node.full_backref = 1;
+ } else {
+ ref->root = root;
+ ref->owner = owner;
+ ref->offset = offset;
+ ref->node.full_backref = 0;
+ }
+ ref->bytes = max_size;
+ ref->found_ref = 0;
+ ref->num_refs = 0;
+ list_add_tail(&ref->node.list, &rec->backrefs);
+ if (max_size > rec->max_size)
+ rec->max_size = max_size;
+ return ref;
+}
+
+/* Check if the type of extent matches with its chunk */
+static void check_extent_type(struct extent_record *rec)
+{
+ struct btrfs_block_group_cache *bg_cache;
+
+ bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
+ if (!bg_cache)
+ return;
+
+ /* data extent, check chunk directly*/
+ if (!rec->metadata) {
+ if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
+ rec->wrong_chunk_type = 1;
+ return;
+ }
+
+ /* metadata extent, check the obvious case first */
+ if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
+ BTRFS_BLOCK_GROUP_METADATA))) {
+ rec->wrong_chunk_type = 1;
+ return;
+ }
+
+ /*
+ * Check SYSTEM extent, as it's also marked as metadata, we can only
+ * make sure it's a SYSTEM extent by its backref
+ */
+ if (!list_empty(&rec->backrefs)) {
+ struct extent_backref *node;
+ struct tree_backref *tback;
+ u64 bg_type;
+
+ node = list_entry(rec->backrefs.next, struct extent_backref,
+ list);
+ if (node->is_data) {
+ /* tree block shouldn't have data backref */
+ rec->wrong_chunk_type = 1;
+ return;
+ }
+ tback = container_of(node, struct tree_backref, node);
+
+ if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
+ bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
+ else
+ bg_type = BTRFS_BLOCK_GROUP_METADATA;
+ if (!(bg_cache->flags & bg_type))
+ rec->wrong_chunk_type = 1;
+ }
+}
+
+static int add_extent_rec(struct cache_tree *extent_cache,
+ struct btrfs_key *parent_key, u64 parent_gen,
+ u64 start, u64 nr, u64 extent_item_refs,
+ int is_root, int inc_ref, int set_checked,
+ int metadata, int extent_rec, u64 max_size)
+{
+ struct extent_record *rec;
+ struct cache_extent *cache;
+ int ret = 0;
+ int dup = 0;
+
+ cache = lookup_cache_extent(extent_cache, start, nr);
+ if (cache) {
+ rec = container_of(cache, struct extent_record, cache);
+ if (inc_ref)
+ rec->refs++;
+ if (rec->nr == 1)
+ rec->nr = max(nr, max_size);
+
+ /*
+ * We need to make sure to reset nr to whatever the extent
+ * record says was the real size, this way we can compare it to
+ * the backrefs.
+ */
+ if (extent_rec) {
+ if (start != rec->start || rec->found_rec) {
+ struct extent_record *tmp;
+
+ dup = 1;
+ if (list_empty(&rec->list))
+ list_add_tail(&rec->list,
+ &duplicate_extents);
+
+ /*
+ * We have to do this song and dance in case we
+ * find an extent record that falls inside of
+ * our current extent record but does not have
+ * the same objectid.
+ */
+ tmp = malloc(sizeof(*tmp));
+ if (!tmp)
+ return -ENOMEM;
+ tmp->start = start;
+ tmp->max_size = max_size;
+ tmp->nr = nr;
+ tmp->found_rec = 1;
+ tmp->metadata = metadata;
+ tmp->extent_item_refs = extent_item_refs;
+ INIT_LIST_HEAD(&tmp->list);
+ list_add_tail(&tmp->list, &rec->dups);
+ rec->num_duplicates++;
+ } else {
+ rec->nr = nr;
+ rec->found_rec = 1;
+ }
+ }
+
+ if (extent_item_refs && !dup) {
+ if (rec->extent_item_refs) {
+ fprintf(stderr, "block %llu rec "
+ "extent_item_refs %llu, passed %llu\n",
+ (unsigned long long)start,
+ (unsigned long long)
+ rec->extent_item_refs,
+ (unsigned long long)extent_item_refs);
+ }
+ rec->extent_item_refs = extent_item_refs;
+ }
+ if (is_root)
+ rec->is_root = 1;
+ if (set_checked) {
+ rec->content_checked = 1;
+ rec->owner_ref_checked = 1;
+ }
+
+ if (parent_key)
+ btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
+ if (parent_gen)
+ rec->parent_generation = parent_gen;
+
+ if (rec->max_size < max_size)
+ rec->max_size = max_size;
+
+ /*
+ * A metadata extent can't cross stripe_len boundary, otherwise
+ * kernel scrub won't be able to handle it.
+ * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
+ * it.
+ */
+ if (metadata && check_crossing_stripes(rec->start,
+ rec->max_size))
+ rec->crossing_stripes = 1;
+ check_extent_type(rec);
+ maybe_free_extent_rec(extent_cache, rec);
+ return ret;
+ }
+ rec = malloc(sizeof(*rec));
+ if (!rec)
+ return -ENOMEM;
+ rec->start = start;
+ rec->max_size = max_size;
+ rec->nr = max(nr, max_size);
+ rec->found_rec = !!extent_rec;
+ rec->content_checked = 0;
+ rec->owner_ref_checked = 0;
+ rec->num_duplicates = 0;
+ rec->metadata = metadata;
+ rec->flag_block_full_backref = -1;
+ rec->bad_full_backref = 0;
+ rec->crossing_stripes = 0;
+ rec->wrong_chunk_type = 0;
+ INIT_LIST_HEAD(&rec->backrefs);
+ INIT_LIST_HEAD(&rec->dups);
+ INIT_LIST_HEAD(&rec->list);
+
+ if (is_root)
+ rec->is_root = 1;
+ else
+ rec->is_root = 0;
+
+ if (inc_ref)
+ rec->refs = 1;
+ else
+ rec->refs = 0;
+
+ if (extent_item_refs)
+ rec->extent_item_refs = extent_item_refs;
+ else
+ rec->extent_item_refs = 0;
+
+ if (parent_key)
+ btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
+ else
+ memset(&rec->parent_key, 0, sizeof(*parent_key));
+
+ if (parent_gen)
+ rec->parent_generation = parent_gen;
+ else
+ rec->parent_generation = 0;
+
+ rec->cache.start = start;
+ rec->cache.size = nr;
+ ret = insert_cache_extent(extent_cache, &rec->cache);
+ BUG_ON(ret);
+ bytes_used += nr;
+ if (set_checked) {
+ rec->content_checked = 1;
+ rec->owner_ref_checked = 1;
+ }
+
+ if (metadata)
+ if (check_crossing_stripes(rec->start, rec->max_size))
+ rec->crossing_stripes = 1;
+ check_extent_type(rec);
+ return ret;
+}
+
+static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
+ u64 parent, u64 root, int found_ref)
+{
+ struct extent_record *rec;
+ struct tree_backref *back;
+ struct cache_extent *cache;
+
+ cache = lookup_cache_extent(extent_cache, bytenr, 1);
+ if (!cache) {
+ add_extent_rec(extent_cache, NULL, 0, bytenr,
+ 1, 0, 0, 0, 0, 1, 0, 0);
+ cache = lookup_cache_extent(extent_cache, bytenr, 1);
+ if (!cache)
+ abort();
+ }
+
+ rec = container_of(cache, struct extent_record, cache);
+ if (rec->start != bytenr) {
+ abort();
+ }
+
+ back = find_tree_backref(rec, parent, root);
+ if (!back) {
+ back = alloc_tree_backref(rec, parent, root);
+ BUG_ON(!back);
+ }
+
+ if (found_ref) {
+ if (back->node.found_ref) {
+ fprintf(stderr, "Extent back ref already exists "
+ "for %llu parent %llu root %llu \n",
+ (unsigned long long)bytenr,
+ (unsigned long long)parent,
+ (unsigned long long)root);
+ }
+ back->node.found_ref = 1;
+ } else {
+ if (back->node.found_extent_tree) {
+ fprintf(stderr, "Extent back ref already exists "
+ "for %llu parent %llu root %llu \n",
+ (unsigned long long)bytenr,
+ (unsigned long long)parent,
+ (unsigned long long)root);
+ }
+ back->node.found_extent_tree = 1;
+ }
+ check_extent_type(rec);
+ maybe_free_extent_rec(extent_cache, rec);
+ return 0;
+}
+
+static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
+ u64 parent, u64 root, u64 owner, u64 offset,
+ u32 num_refs, int found_ref, u64 max_size)
+{
+ struct extent_record *rec;
+ struct data_backref *back;
+ struct cache_extent *cache;
+
+ cache = lookup_cache_extent(extent_cache, bytenr, 1);
+ if (!cache) {
+ add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
+ 0, 0, max_size);
+ cache = lookup_cache_extent(extent_cache, bytenr, 1);
+ if (!cache)
+ abort();
+ }
+
+ rec = container_of(cache, struct extent_record, cache);
+ if (rec->max_size < max_size)
+ rec->max_size = max_size;
+
+ /*
+ * If found_ref is set then max_size is the real size and must match the
+ * existing refs. So if we have already found a ref then we need to
+ * make sure that this ref matches the existing one, otherwise we need
+ * to add a new backref so we can notice that the backrefs don't match
+ * and we need to figure out who is telling the truth. This is to
+ * account for that awful fsync bug I introduced where we'd end up with
+ * a btrfs_file_extent_item that would have its length include multiple
+ * prealloc extents or point inside of a prealloc extent.
+ */
+ back = find_data_backref(rec, parent, root, owner, offset, found_ref,
+ bytenr, max_size);
+ if (!back) {
+ back = alloc_data_backref(rec, parent, root, owner, offset,
+ max_size);
+ BUG_ON(!back);
+ }
+
+ if (found_ref) {
+ BUG_ON(num_refs != 1);
+ if (back->node.found_ref)
+ BUG_ON(back->bytes != max_size);
+ back->node.found_ref = 1;
+ back->found_ref += 1;
+ back->bytes = max_size;
+ back->disk_bytenr = bytenr;
+ rec->refs += 1;
+ rec->content_checked = 1;
+ rec->owner_ref_checked = 1;
+ } else {
+ if (back->node.found_extent_tree) {
+ fprintf(stderr, "Extent back ref already exists "
+ "for %llu parent %llu root %llu "
+ "owner %llu offset %llu num_refs %lu\n",
+ (unsigned long long)bytenr,
+ (unsigned long long)parent,
+ (unsigned long long)root,
+ (unsigned long long)owner,
+ (unsigned long long)offset,
+ (unsigned long)num_refs);
+ }
+ back->num_refs = num_refs;
+ back->node.found_extent_tree = 1;
+ }
+ maybe_free_extent_rec(extent_cache, rec);
+ return 0;
+}
+
+static int add_pending(struct cache_tree *pending,
+ struct cache_tree *seen, u64 bytenr, u32 size)
+{
+ int ret;
+ ret = add_cache_extent(seen, bytenr, size);
+ if (ret)
+ return ret;
+ add_cache_extent(pending, bytenr, size);
+ return 0;
+}
+
+static int pick_next_pending(struct cache_tree *pending,
+ struct cache_tree *reada,
+ struct cache_tree *nodes,
+ u64 last, struct block_info *bits, int bits_nr,
+ int *reada_bits)
+{
+ unsigned long node_start = last;
+ struct cache_extent *cache;
+ int ret;
+
+ cache = search_cache_extent(reada, 0);
+ if (cache) {
+ bits[0].start = cache->start;
+ bits[0].size = cache->size;
+ *reada_bits = 1;
+ return 1;
+ }
+ *reada_bits = 0;
+ if (node_start > 32768)
+ node_start -= 32768;
+
+ cache = search_cache_extent(nodes, node_start);
+ if (!cache)
+ cache = search_cache_extent(nodes, 0);
+
+ if (!cache) {
+ cache = search_cache_extent(pending, 0);
+ if (!cache)
+ return 0;
+ ret = 0;
+ do {
+ bits[ret].start = cache->start;
+ bits[ret].size = cache->size;
+ cache = next_cache_extent(cache);
+ ret++;
+ } while (cache && ret < bits_nr);
+ return ret;
+ }
+
+ ret = 0;
+ do {
+ bits[ret].start = cache->start;
+ bits[ret].size = cache->size;
+ cache = next_cache_extent(cache);
+ ret++;
+ } while (cache && ret < bits_nr);
+
+ if (bits_nr - ret > 8) {
+ u64 lookup = bits[0].start + bits[0].size;
+ struct cache_extent *next;
+ next = search_cache_extent(pending, lookup);
+ while(next) {
+ if (next->start - lookup > 32768)
+ break;
+ bits[ret].start = next->start;
+ bits[ret].size = next->size;
+ lookup = next->start + next->size;
+ ret++;
+ if (ret == bits_nr)
+ break;
+ next = next_cache_extent(next);
+ if (!next)
+ break;
+ }
+ }
+ return ret;
+}
+
+static void free_chunk_record(struct cache_extent *cache)
+{
+ struct chunk_record *rec;
+
+ rec = container_of(cache, struct chunk_record, cache);
+ list_del_init(&rec->list);
+ list_del_init(&rec->dextents);
+ free(rec);
+}
+
+void free_chunk_cache_tree(struct cache_tree *chunk_cache)
+{
+ cache_tree_free_extents(chunk_cache, free_chunk_record);
+}
+
+static void free_device_record(struct rb_node *node)
+{
+ struct device_record *rec;
+
+ rec = container_of(node, struct device_record, node);
+ free(rec);
+}
+
+FREE_RB_BASED_TREE(device_cache, free_device_record);
+
+int insert_block_group_record(struct block_group_tree *tree,
+ struct block_group_record *bg_rec)
+{
+ int ret;
+
+ ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
+ if (ret)
+ return ret;
+
+ list_add_tail(&bg_rec->list, &tree->block_groups);
+ return 0;
+}
+
+static void free_block_group_record(struct cache_extent *cache)
+{
+ struct block_group_record *rec;
+
+ rec = container_of(cache, struct block_group_record, cache);
+ list_del_init(&rec->list);
+ free(rec);
+}
+
+void free_block_group_tree(struct block_group_tree *tree)
+{
+ cache_tree_free_extents(&tree->tree, free_block_group_record);
+}
+
+int insert_device_extent_record(struct device_extent_tree *tree,
+ struct device_extent_record *de_rec)
+{
+ int ret;
+
+ /*
+ * Device extent is a bit different from the other extents, because
+ * the extents which belong to the different devices may have the
+ * same start and size, so we need use the special extent cache
+ * search/insert functions.
+ */
+ ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
+ if (ret)
+ return ret;
+
+ list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
+ list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
+ return 0;
+}
+
+static void free_device_extent_record(struct cache_extent *cache)
+{
+ struct device_extent_record *rec;
+
+ rec = container_of(cache, struct device_extent_record, cache);
+ if (!list_empty(&rec->chunk_list))
+ list_del_init(&rec->chunk_list);
+ if (!list_empty(&rec->device_list))
+ list_del_init(&rec->device_list);
+ free(rec);
+}
+
+void free_device_extent_tree(struct device_extent_tree *tree)
+{
+ cache_tree_free_extents(&tree->tree, free_device_extent_record);
+}
+
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+static int process_extent_ref_v0(struct cache_tree *extent_cache,
+ struct extent_buffer *leaf, int slot)
+{
+ struct btrfs_extent_ref_v0 *ref0;
+ struct btrfs_key key;
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
+ if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
+ add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
+ } else {
+ add_data_backref(extent_cache, key.objectid, key.offset, 0,
+ 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
+ }
+ return 0;
+}
+#endif
+
+struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
+ struct btrfs_key *key,
+ int slot)
+{
+ struct btrfs_chunk *ptr;
+ struct chunk_record *rec;
+ int num_stripes, i;
+
+ ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
+ num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
+
+ rec = calloc(1, btrfs_chunk_record_size(num_stripes));
+ if (!rec) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(-1);
+ }
+
+ INIT_LIST_HEAD(&rec->list);
+ INIT_LIST_HEAD(&rec->dextents);
+ rec->bg_rec = NULL;
+
+ rec->cache.start = key->offset;
+ rec->cache.size = btrfs_chunk_length(leaf, ptr);
+
+ rec->generation = btrfs_header_generation(leaf);
+
+ rec->objectid = key->objectid;
+ rec->type = key->type;
+ rec->offset = key->offset;
+
+ rec->length = rec->cache.size;
+ rec->owner = btrfs_chunk_owner(leaf, ptr);
+ rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
+ rec->type_flags = btrfs_chunk_type(leaf, ptr);
+ rec->io_width = btrfs_chunk_io_width(leaf, ptr);
+ rec->io_align = btrfs_chunk_io_align(leaf, ptr);
+ rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
+ rec->num_stripes = num_stripes;
+ rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
+
+ for (i = 0; i < rec->num_stripes; ++i) {
+ rec->stripes[i].devid =
+ btrfs_stripe_devid_nr(leaf, ptr, i);
+ rec->stripes[i].offset =
+ btrfs_stripe_offset_nr(leaf, ptr, i);
+ read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
+ (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
+ BTRFS_UUID_SIZE);
+ }
+
+ return rec;
+}
+
+static int process_chunk_item(struct cache_tree *chunk_cache,
+ struct btrfs_key *key, struct extent_buffer *eb,
+ int slot)
+{
+ struct chunk_record *rec;
+ int ret = 0;
+
+ rec = btrfs_new_chunk_record(eb, key, slot);
+ ret = insert_cache_extent(chunk_cache, &rec->cache);
+ if (ret) {
+ fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
+ rec->offset, rec->length);
+ free(rec);
+ }
+
+ return ret;
+}
+
+static int process_device_item(struct rb_root *dev_cache,
+ struct btrfs_key *key, struct extent_buffer *eb, int slot)
+{
+ struct btrfs_dev_item *ptr;
+ struct device_record *rec;
+ int ret = 0;
+
+ ptr = btrfs_item_ptr(eb,
+ slot, struct btrfs_dev_item);
+
+ rec = malloc(sizeof(*rec));
+ if (!rec) {
+ fprintf(stderr, "memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ rec->devid = key->offset;
+ rec->generation = btrfs_header_generation(eb);
+
+ rec->objectid = key->objectid;
+ rec->type = key->type;
+ rec->offset = key->offset;
+
+ rec->devid = btrfs_device_id(eb, ptr);
+ rec->total_byte = btrfs_device_total_bytes(eb, ptr);
+ rec->byte_used = btrfs_device_bytes_used(eb, ptr);
+
+ ret = rb_insert(dev_cache, &rec->node, device_record_compare);
+ if (ret) {
+ fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
+ free(rec);
+ }
+
+ return ret;
+}
+
+struct block_group_record *
+btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
+ int slot)
+{
+ struct btrfs_block_group_item *ptr;
+ struct block_group_record *rec;
+
+ rec = calloc(1, sizeof(*rec));
+ if (!rec) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(-1);
+ }
+
+ rec->cache.start = key->objectid;
+ rec->cache.size = key->offset;
+
+ rec->generation = btrfs_header_generation(leaf);
+
+ rec->objectid = key->objectid;
+ rec->type = key->type;
+ rec->offset = key->offset;
+
+ ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
+ rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
+
+ INIT_LIST_HEAD(&rec->list);
+
+ return rec;
+}
+
+static int process_block_group_item(struct block_group_tree *block_group_cache,
+ struct btrfs_key *key,
+ struct extent_buffer *eb, int slot)
+{
+ struct block_group_record *rec;
+ int ret = 0;
+
+ rec = btrfs_new_block_group_record(eb, key, slot);
+ ret = insert_block_group_record(block_group_cache, rec);
+ if (ret) {
+ fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
+ rec->objectid, rec->offset);
+ free(rec);
+ }
+
+ return ret;
+}
+
+struct device_extent_record *
+btrfs_new_device_extent_record(struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct device_extent_record *rec;
+ struct btrfs_dev_extent *ptr;
+
+ rec = calloc(1, sizeof(*rec));
+ if (!rec) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(-1);
+ }
+
+ rec->cache.objectid = key->objectid;
+ rec->cache.start = key->offset;
+
+ rec->generation = btrfs_header_generation(leaf);
+
+ rec->objectid = key->objectid;
+ rec->type = key->type;
+ rec->offset = key->offset;
+
+ ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
+ rec->chunk_objecteid =
+ btrfs_dev_extent_chunk_objectid(leaf, ptr);
+ rec->chunk_offset =
+ btrfs_dev_extent_chunk_offset(leaf, ptr);
+ rec->length = btrfs_dev_extent_length(leaf, ptr);
+ rec->cache.size = rec->length;
+
+ INIT_LIST_HEAD(&rec->chunk_list);
+ INIT_LIST_HEAD(&rec->device_list);
+
+ return rec;
+}
+
+static int
+process_device_extent_item(struct device_extent_tree *dev_extent_cache,
+ struct btrfs_key *key, struct extent_buffer *eb,
+ int slot)
+{
+ struct device_extent_record *rec;
+ int ret;
+
+ rec = btrfs_new_device_extent_record(eb, key, slot);
+ ret = insert_device_extent_record(dev_extent_cache, rec);
+ if (ret) {
+ fprintf(stderr,
+ "Device extent[%llu, %llu, %llu] existed.\n",
+ rec->objectid, rec->offset, rec->length);
+ free(rec);
+ }
+
+ return ret;
+}
+
+static int process_extent_item(struct btrfs_root *root,
+ struct cache_tree *extent_cache,
+ struct extent_buffer *eb, int slot)
+{
+ struct btrfs_extent_item *ei;
+ struct btrfs_extent_inline_ref *iref;
+ struct btrfs_extent_data_ref *dref;
+ struct btrfs_shared_data_ref *sref;
+ struct btrfs_key key;
+ unsigned long end;
+ unsigned long ptr;
+ int type;
+ u32 item_size = btrfs_item_size_nr(eb, slot);
+ u64 refs = 0;
+ u64 offset;
+ u64 num_bytes;
+ int metadata = 0;
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+
+ if (key.type == BTRFS_METADATA_ITEM_KEY) {
+ metadata = 1;
+ num_bytes = root->leafsize;
+ } else {
+ num_bytes = key.offset;
+ }
+
+ if (item_size < sizeof(*ei)) {
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ struct btrfs_extent_item_v0 *ei0;
+ BUG_ON(item_size != sizeof(*ei0));
+ ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
+ refs = btrfs_extent_refs_v0(eb, ei0);
+#else
+ BUG();
+#endif
+ return add_extent_rec(extent_cache, NULL, 0, key.objectid,
+ num_bytes, refs, 0, 0, 0, metadata, 1,
+ num_bytes);
+ }
+
+ ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
+ refs = btrfs_extent_refs(eb, ei);
+ if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
+ metadata = 1;
+ else
+ metadata = 0;
+
+ add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
+ refs, 0, 0, 0, metadata, 1, num_bytes);
+
+ ptr = (unsigned long)(ei + 1);
+ if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
+ key.type == BTRFS_EXTENT_ITEM_KEY)
+ ptr += sizeof(struct btrfs_tree_block_info);
+
+ end = (unsigned long)ei + item_size;
+ while (ptr < end) {
+ iref = (struct btrfs_extent_inline_ref *)ptr;
+ type = btrfs_extent_inline_ref_type(eb, iref);
+ offset = btrfs_extent_inline_ref_offset(eb, iref);
+ switch (type) {
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ add_tree_backref(extent_cache, key.objectid,
+ 0, offset, 0);
+ break;
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ add_tree_backref(extent_cache, key.objectid,
+ offset, 0, 0);
+ break;
+ case BTRFS_EXTENT_DATA_REF_KEY:
+ dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+ add_data_backref(extent_cache, key.objectid, 0,
+ btrfs_extent_data_ref_root(eb, dref),
+ btrfs_extent_data_ref_objectid(eb,
+ dref),
+ btrfs_extent_data_ref_offset(eb, dref),
+ btrfs_extent_data_ref_count(eb, dref),
+ 0, num_bytes);
+ break;
+ case BTRFS_SHARED_DATA_REF_KEY:
+ sref = (struct btrfs_shared_data_ref *)(iref + 1);
+ add_data_backref(extent_cache, key.objectid, offset,
+ 0, 0, 0,
+ btrfs_shared_data_ref_count(eb, sref),
+ 0, num_bytes);
+ break;
+ default:
+ fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
+ key.objectid, key.type, num_bytes);
+ goto out;
+ }
+ ptr += btrfs_extent_inline_ref_size(type);
+ }
+ WARN_ON(ptr > end);
+out:
+ return 0;
+}
+
+static int check_cache_range(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache,
+ u64 offset, u64 bytes)
+{
+ struct btrfs_free_space *entry;
+ u64 *logical;
+ u64 bytenr;
+ int stripe_len;
+ int i, nr, ret;
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
+ cache->key.objectid, bytenr, 0,
+ &logical, &nr, &stripe_len);
+ if (ret)
+ return ret;
+
+ while (nr--) {
+ if (logical[nr] + stripe_len <= offset)
+ continue;
+ if (offset + bytes <= logical[nr])
+ continue;
+ if (logical[nr] == offset) {
+ if (stripe_len >= bytes) {
+ kfree(logical);
+ return 0;
+ }
+ bytes -= stripe_len;
+ offset += stripe_len;
+ } else if (logical[nr] < offset) {
+ if (logical[nr] + stripe_len >=
+ offset + bytes) {
+ kfree(logical);
+ return 0;
+ }
+ bytes = (offset + bytes) -
+ (logical[nr] + stripe_len);
+ offset = logical[nr] + stripe_len;
+ } else {
+ /*
+ * Could be tricky, the super may land in the
+ * middle of the area we're checking. First
+ * check the easiest case, it's at the end.
+ */
+ if (logical[nr] + stripe_len >=
+ bytes + offset) {
+ bytes = logical[nr] - offset;
+ continue;
+ }
+
+ /* Check the left side */
+ ret = check_cache_range(root, cache,
+ offset,
+ logical[nr] - offset);
+ if (ret) {
+ kfree(logical);
+ return ret;
+ }
+
+ /* Now we continue with the right side */
+ bytes = (offset + bytes) -
+ (logical[nr] + stripe_len);
+ offset = logical[nr] + stripe_len;
+ }
+ }
+
+ kfree(logical);
+ }
+
+ entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
+ if (!entry) {
+ fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
+ offset, offset+bytes);
+ return -EINVAL;
+ }
+
+ if (entry->offset != offset) {
+ fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
+ entry->offset);
+ return -EINVAL;
+ }
+
+ if (entry->bytes != bytes) {
+ fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
+ bytes, entry->bytes, offset);
+ return -EINVAL;
+ }
+
+ unlink_free_space(cache->free_space_ctl, entry);
+ free(entry);
+ return 0;
+}
+
+static int verify_space_cache(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
+{
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ u64 last;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ root = root->fs_info->extent_root;
+
+ last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+
+ key.objectid = last;
+ key.offset = 0;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ ret = 0;
+ while (1) {
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ }
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid >= cache->key.offset + cache->key.objectid)
+ break;
+ if (key.type != BTRFS_EXTENT_ITEM_KEY &&
+ key.type != BTRFS_METADATA_ITEM_KEY) {
+ path->slots[0]++;
+ continue;
+ }
+
+ if (last == key.objectid) {
+ if (key.type == BTRFS_EXTENT_ITEM_KEY)
+ last = key.objectid + key.offset;
+ else
+ last = key.objectid + root->leafsize;
+ path->slots[0]++;
+ continue;
+ }
+
+ ret = check_cache_range(root, cache, last,
+ key.objectid - last);
+ if (ret)
+ break;
+ if (key.type == BTRFS_EXTENT_ITEM_KEY)
+ last = key.objectid + key.offset;
+ else
+ last = key.objectid + root->leafsize;
+ path->slots[0]++;
+ }
+
+ if (last < cache->key.objectid + cache->key.offset)
+ ret = check_cache_range(root, cache, last,
+ cache->key.objectid +
+ cache->key.offset - last);
+
+out:
+ btrfs_free_path(path);
+
+ if (!ret &&
+ !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
+ fprintf(stderr, "There are still entries left in the space "
+ "cache\n");
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int check_space_cache(struct btrfs_root *root)
+{
+ struct btrfs_block_group_cache *cache;
+ u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
+ int ret;
+ int error = 0;
+
+ if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
+ btrfs_super_generation(root->fs_info->super_copy) !=
+ btrfs_super_cache_generation(root->fs_info->super_copy)) {
+ printf("cache and super generation don't match, space cache "
+ "will be invalidated\n");
+ return 0;
+ }
+
+ if (ctx.progress_enabled) {
+ ctx.tp = TASK_FREE_SPACE;
+ task_start(ctx.info);
+ }
+
+ while (1) {
+ cache = btrfs_lookup_first_block_group(root->fs_info, start);
+ if (!cache)
+ break;
+
+ start = cache->key.objectid + cache->key.offset;
+ if (!cache->free_space_ctl) {
+ if (btrfs_init_free_space_ctl(cache,
+ root->sectorsize)) {
+ ret = -ENOMEM;
+ break;
+ }
+ } else {
+ btrfs_remove_free_space_cache(cache);
+ }
+
+ if (btrfs_fs_compat_ro(root->fs_info,
+ BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
+ ret = exclude_super_stripes(root, cache);
+ if (ret) {
+ fprintf(stderr, "could not exclude super stripes: %s\n",
+ strerror(-ret));
+ error++;
+ continue;
+ }
+ ret = load_free_space_tree(root->fs_info, cache);
+ free_excluded_extents(root, cache);
+ if (ret < 0) {
+ fprintf(stderr, "could not load free space tree: %s\n",
+ strerror(-ret));
+ error++;
+ continue;
+ }
+ error += ret;
+ } else {
+ ret = load_free_space_cache(root->fs_info, cache);
+ if (!ret)
+ continue;
+ }
+
+ ret = verify_space_cache(root, cache);
+ if (ret) {
+ fprintf(stderr, "cache appears valid but isnt %Lu\n",
+ cache->key.objectid);
+ error++;
+ }
+ }
+
+ task_stop(ctx.info);
+
+ return error ? -EINVAL : 0;
+}
+
+static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes, unsigned long leaf_offset,
+ struct extent_buffer *eb) {
+
+ u64 offset = 0;
+ u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+ char *data;
+ unsigned long csum_offset;
+ u32 csum;
+ u32 csum_expected;
+ u64 read_len;
+ u64 data_checked = 0;
+ u64 tmp;
+ int ret = 0;
+ int mirror;
+ int num_copies;
+
+ if (num_bytes % root->sectorsize)
+ return -EINVAL;
+
+ data = malloc(num_bytes);
+ if (!data)
+ return -ENOMEM;
+
+ while (offset < num_bytes) {
+ mirror = 0;
+again:
+ read_len = num_bytes - offset;
+ /* read as much space once a time */
+ ret = read_extent_data(root, data + offset,
+ bytenr + offset, &read_len, mirror);
+ if (ret)
+ goto out;
+ data_checked = 0;
+ /* verify every 4k data's checksum */
+ while (data_checked < read_len) {
+ csum = ~(u32)0;
+ tmp = offset + data_checked;
+
+ csum = btrfs_csum_data(NULL, (char *)data + tmp,
+ csum, root->sectorsize);
+ btrfs_csum_final(csum, (char *)&csum);
+
+ csum_offset = leaf_offset +
+ tmp / root->sectorsize * csum_size;
+ read_extent_buffer(eb, (char *)&csum_expected,
+ csum_offset, csum_size);
+ /* try another mirror */
+ if (csum != csum_expected) {
+ fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
+ mirror, bytenr + tmp,
+ csum, csum_expected);
+ num_copies = btrfs_num_copies(
+ &root->fs_info->mapping_tree,
+ bytenr, num_bytes);
+ if (mirror < num_copies - 1) {
+ mirror += 1;
+ goto again;
+ }
+ }
+ data_checked += root->sectorsize;
+ }
+ offset += read_len;
+ }
+out:
+ free(data);
+ return ret;
+}
+
+static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes)
+{
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ fprintf(stderr, "Error allocing path\n");
+ return -ENOMEM;
+ }
+
+ key.objectid = bytenr;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+again:
+ ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
+ 0, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Error looking up extent record %d\n", ret);
+ btrfs_free_path(path);
+ return ret;
+ } else if (ret) {
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ } else {
+ ret = btrfs_prev_leaf(root, path);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ }
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ /*
+ * Block group items come before extent items if they have the same
+ * bytenr, so walk back one more just in case. Dear future traveler,
+ * first congrats on mastering time travel. Now if it's not too much
+ * trouble could you go back to 2006 and tell Chris to make the
+ * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
+ * EXTENT_ITEM_KEY please?
+ */
+ while (key.type > BTRFS_EXTENT_ITEM_KEY) {
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ } else {
+ ret = btrfs_prev_leaf(root, path);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ }
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ }
+
+ while (num_bytes) {
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0) {
+ fprintf(stderr, "Error going to next leaf "
+ "%d\n", ret);
+ btrfs_free_path(path);
+ return ret;
+ } else if (ret) {
+ break;
+ }
+ }
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.type != BTRFS_EXTENT_ITEM_KEY) {
+ path->slots[0]++;
+ continue;
+ }
+ if (key.objectid + key.offset < bytenr) {
+ path->slots[0]++;
+ continue;
+ }
+ if (key.objectid > bytenr + num_bytes)
+ break;
+
+ if (key.objectid == bytenr) {
+ if (key.offset >= num_bytes) {
+ num_bytes = 0;
+ break;
+ }
+ num_bytes -= key.offset;
+ bytenr += key.offset;
+ } else if (key.objectid < bytenr) {
+ if (key.objectid + key.offset >= bytenr + num_bytes) {
+ num_bytes = 0;
+ break;
+ }
+ num_bytes = (bytenr + num_bytes) -
+ (key.objectid + key.offset);
+ bytenr = key.objectid + key.offset;
+ } else {
+ if (key.objectid + key.offset < bytenr + num_bytes) {
+ u64 new_start = key.objectid + key.offset;
+ u64 new_bytes = bytenr + num_bytes - new_start;
+
+ /*
+ * Weird case, the extent is in the middle of
+ * our range, we'll have to search one side
+ * and then the other. Not sure if this happens
+ * in real life, but no harm in coding it up
+ * anyway just in case.
+ */
+ btrfs_release_path(path);
+ ret = check_extent_exists(root, new_start,
+ new_bytes);
+ if (ret) {
+ fprintf(stderr, "Right section didn't "
+ "have a record\n");
+ break;
+ }
+ num_bytes = key.objectid - bytenr;
+ goto again;
+ }
+ num_bytes = key.objectid - bytenr;
+ }
+ path->slots[0]++;
+ }
+ ret = 0;
+
+out:
+ if (num_bytes && !ret) {
+ fprintf(stderr, "There are no extents for csum range "
+ "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
+ ret = 1;
+ }
+
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int check_csums(struct btrfs_root *root)
+{
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ u64 offset = 0, num_bytes = 0;
+ u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+ int errors = 0;
+ int ret;
+ u64 data_len;
+ unsigned long leaf_offset;
+
+ root = root->fs_info->csum_root;
+ if (!extent_buffer_uptodate(root->node)) {
+ fprintf(stderr, "No valid csum tree found\n");
+ return -ENOENT;
+ }
+
+ key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
+ key.type = BTRFS_EXTENT_CSUM_KEY;
+ key.offset = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Error searching csum tree %d\n", ret);
+ btrfs_free_path(path);
+ return ret;
+ }
+
+ if (ret > 0 && path->slots[0])
+ path->slots[0]--;
+ ret = 0;
+
+ while (1) {
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0) {
+ fprintf(stderr, "Error going to next leaf "
+ "%d\n", ret);
+ break;
+ }
+ if (ret)
+ break;
+ }
+ leaf = path->nodes[0];
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.type != BTRFS_EXTENT_CSUM_KEY) {
+ path->slots[0]++;
+ continue;
+ }
+
+ data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
+ csum_size) * root->sectorsize;
+ if (!check_data_csum)
+ goto skip_csum_check;
+ leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ ret = check_extent_csums(root, key.offset, data_len,
+ leaf_offset, leaf);
+ if (ret)
+ break;
+skip_csum_check:
+ if (!num_bytes) {
+ offset = key.offset;
+ } else if (key.offset != offset + num_bytes) {
+ ret = check_extent_exists(root, offset, num_bytes);
+ if (ret) {
+ fprintf(stderr, "Csum exists for %Lu-%Lu but "
+ "there is no extent record\n",
+ offset, offset+num_bytes);
+ errors++;
+ }
+ offset = key.offset;
+ num_bytes = 0;
+ }
+ num_bytes += data_len;
+ path->slots[0]++;
+ }
+
+ btrfs_free_path(path);
+ return errors;
+}
+
+static int is_dropped_key(struct btrfs_key *key,
+ struct btrfs_key *drop_key) {
+ if (key->objectid < drop_key->objectid)
+ return 1;
+ else if (key->objectid == drop_key->objectid) {
+ if (key->type < drop_key->type)
+ return 1;
+ else if (key->type == drop_key->type) {
+ if (key->offset < drop_key->offset)
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Here are the rules for FULL_BACKREF.
+ *
+ * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
+ * 2) If btrfs_header_owner(buf) no longer points to buf then we have
+ * FULL_BACKREF set.
+ * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
+ * if it happened after the relocation occurred since we'll have dropped the
+ * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
+ * have no real way to know for sure.
+ *
+ * We process the blocks one root at a time, and we start from the lowest root
+ * objectid and go to the highest. So we can just lookup the owner backref for
+ * the record and if we don't find it then we know it doesn't exist and we have
+ * a FULL BACKREF.
+ *
+ * FIXME: if we ever start reclaiming root objectid's then we need to fix this
+ * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
+ * be set or not and then we can check later once we've gathered all the refs.
+ */
+static int calc_extent_flag(struct btrfs_root *root,
+ struct cache_tree *extent_cache,
+ struct extent_buffer *buf,
+ struct root_item_record *ri,
+ u64 *flags)
+{
+ struct extent_record *rec;
+ struct cache_extent *cache;
+ struct tree_backref *tback;
+ u64 owner = 0;
+
+ cache = lookup_cache_extent(extent_cache, buf->start, 1);
+ /* we have added this extent before */
+ BUG_ON(!cache);
+ rec = container_of(cache, struct extent_record, cache);
+
+ /*
+ * Except file/reloc tree, we can not have
+ * FULL BACKREF MODE
+ */
+ if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
+ goto normal;
+ /*
+ * root node
+ */
+ if (buf->start == ri->bytenr)
+ goto normal;
+
+ if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
+ goto full_backref;
+
+ owner = btrfs_header_owner(buf);
+ if (owner == ri->objectid)
+ goto normal;
+
+ tback = find_tree_backref(rec, 0, owner);
+ if (!tback)
+ goto full_backref;
+normal:
+ *flags = 0;
+ if (rec->flag_block_full_backref != -1 &&
+ rec->flag_block_full_backref != 0)
+ rec->bad_full_backref = 1;
+ return 0;
+full_backref:
+ *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+ if (rec->flag_block_full_backref != -1 &&
+ rec->flag_block_full_backref != 1)
+ rec->bad_full_backref = 1;
+ return 0;
+}
+
+static int run_next_block(struct btrfs_root *root,
+ struct block_info *bits,
+ int bits_nr,
+ u64 *last,
+ struct cache_tree *pending,
+ struct cache_tree *seen,
+ struct cache_tree *reada,
+ struct cache_tree *nodes,
+ struct cache_tree *extent_cache,
+ struct cache_tree *chunk_cache,
+ struct rb_root *dev_cache,
+ struct block_group_tree *block_group_cache,
+ struct device_extent_tree *dev_extent_cache,
+ struct root_item_record *ri)
+{
+ struct extent_buffer *buf;
+ struct extent_record *rec = NULL;
+ u64 bytenr;
+ u32 size;
+ u64 parent;
+ u64 owner;
+ u64 flags;
+ u64 ptr;
+ u64 gen = 0;
+ int ret = 0;
+ int i;
+ int nritems;
+ struct btrfs_key key;
+ struct cache_extent *cache;
+ int reada_bits;
+
+ nritems = pick_next_pending(pending, reada, nodes, *last, bits,
+ bits_nr, &reada_bits);
+ if (nritems == 0)
+ return 1;
+
+ if (!reada_bits) {
+ for(i = 0; i < nritems; i++) {
+ ret = add_cache_extent(reada, bits[i].start,
+ bits[i].size);
+ if (ret == -EEXIST)
+ continue;
+
+ /* fixme, get the parent transid */
+ readahead_tree_block(root, bits[i].start,
+ bits[i].size, 0);
+ }
+ }
+ *last = bits[0].start;
+ bytenr = bits[0].start;
+ size = bits[0].size;
+
+ cache = lookup_cache_extent(pending, bytenr, size);
+ if (cache) {
+ remove_cache_extent(pending, cache);
+ free(cache);
+ }
+ cache = lookup_cache_extent(reada, bytenr, size);
+ if (cache) {
+ remove_cache_extent(reada, cache);
+ free(cache);
+ }
+ cache = lookup_cache_extent(nodes, bytenr, size);
+ if (cache) {
+ remove_cache_extent(nodes, cache);
+ free(cache);
+ }
+ cache = lookup_cache_extent(extent_cache, bytenr, size);
+ if (cache) {
+ rec = container_of(cache, struct extent_record, cache);
+ gen = rec->parent_generation;
+ }
+
+ /* fixme, get the real parent transid */
+ buf = read_tree_block(root, bytenr, size, gen);
+ if (!extent_buffer_uptodate(buf)) {
+ record_bad_block_io(root->fs_info,
+ extent_cache, bytenr, size);
+ goto out;
+ }
+
+ nritems = btrfs_header_nritems(buf);
+
+ flags = 0;
+ if (!init_extent_tree) {
+ ret = btrfs_lookup_extent_info(NULL, root, bytenr,
+ btrfs_header_level(buf), 1, NULL,
+ &flags);
+ if (ret < 0) {
+ ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
+ if (ret < 0) {
+ fprintf(stderr, "Couldn't calc extent flags\n");
+ flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+ }
+ }
+ } else {
+ flags = 0;
+ ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
+ if (ret < 0) {
+ fprintf(stderr, "Couldn't calc extent flags\n");
+ flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+ }
+ }
+
+ if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
+ if (ri != NULL &&
+ ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
+ ri->objectid == btrfs_header_owner(buf)) {
+ /*
+ * Ok we got to this block from it's original owner and
+ * we have FULL_BACKREF set. Relocation can leave
+ * converted blocks over so this is altogether possible,
+ * however it's not possible if the generation > the
+ * last snapshot, so check for this case.
+ */
+ if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
+ btrfs_header_generation(buf) > ri->last_snapshot) {
+ flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
+ rec->bad_full_backref = 1;
+ }
+ }
+ } else {
+ if (ri != NULL &&
+ (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
+ btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
+ flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+ rec->bad_full_backref = 1;
+ }
+ }
+
+ if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
+ rec->flag_block_full_backref = 1;
+ parent = bytenr;
+ owner = 0;
+ } else {
+ rec->flag_block_full_backref = 0;
+ parent = 0;
+ owner = btrfs_header_owner(buf);
+ }
+
+ ret = check_block(root, extent_cache, buf, flags);
+ if (ret)
+ goto out;
+
+ if (btrfs_is_leaf(buf)) {
+ btree_space_waste += btrfs_leaf_free_space(root, buf);
+ for (i = 0; i < nritems; i++) {
+ struct btrfs_file_extent_item *fi;
+ btrfs_item_key_to_cpu(buf, &key, i);
+ if (key.type == BTRFS_EXTENT_ITEM_KEY) {
+ process_extent_item(root, extent_cache, buf,
+ i);
+ continue;
+ }
+ if (key.type == BTRFS_METADATA_ITEM_KEY) {
+ process_extent_item(root, extent_cache, buf,
+ i);
+ continue;
+ }
+ if (key.type == BTRFS_EXTENT_CSUM_KEY) {
+ total_csum_bytes +=
+ btrfs_item_size_nr(buf, i);
+ continue;
+ }
+ if (key.type == BTRFS_CHUNK_ITEM_KEY) {
+ process_chunk_item(chunk_cache, &key, buf, i);
+ continue;
+ }
+ if (key.type == BTRFS_DEV_ITEM_KEY) {
+ process_device_item(dev_cache, &key, buf, i);
+ continue;
+ }
+ if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
+ process_block_group_item(block_group_cache,
+ &key, buf, i);
+ continue;
+ }
+ if (key.type == BTRFS_DEV_EXTENT_KEY) {
+ process_device_extent_item(dev_extent_cache,
+ &key, buf, i);
+ continue;
+
+ }
+ if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ process_extent_ref_v0(extent_cache, buf, i);
+#else
+ BUG();
+#endif
+ continue;
+ }
+
+ if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
+ add_tree_backref(extent_cache, key.objectid, 0,
+ key.offset, 0);
+ continue;
+ }
+ if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
+ add_tree_backref(extent_cache, key.objectid,
+ key.offset, 0, 0);
+ continue;
+ }
+ if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
+ struct btrfs_extent_data_ref *ref;
+ ref = btrfs_item_ptr(buf, i,
+ struct btrfs_extent_data_ref);
+ add_data_backref(extent_cache,
+ key.objectid, 0,
+ btrfs_extent_data_ref_root(buf, ref),
+ btrfs_extent_data_ref_objectid(buf,
+ ref),
+ btrfs_extent_data_ref_offset(buf, ref),
+ btrfs_extent_data_ref_count(buf, ref),
+ 0, root->sectorsize);
+ continue;
+ }
+ if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
+ struct btrfs_shared_data_ref *ref;
+ ref = btrfs_item_ptr(buf, i,
+ struct btrfs_shared_data_ref);
+ add_data_backref(extent_cache,
+ key.objectid, key.offset, 0, 0, 0,
+ btrfs_shared_data_ref_count(buf, ref),
+ 0, root->sectorsize);
+ continue;
+ }
+ if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
+ struct bad_item *bad;
+
+ if (key.objectid == BTRFS_ORPHAN_OBJECTID)
+ continue;
+ if (!owner)
+ continue;
+ bad = malloc(sizeof(struct bad_item));
+ if (!bad)
+ continue;
+ INIT_LIST_HEAD(&bad->list);
+ memcpy(&bad->key, &key,
+ sizeof(struct btrfs_key));
+ bad->root_id = owner;
+ list_add_tail(&bad->list, &delete_items);
+ continue;
+ }
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ continue;
+ fi = btrfs_item_ptr(buf, i,
+ struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(buf, fi) ==
+ BTRFS_FILE_EXTENT_INLINE)
+ continue;
+ if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
+ continue;
+
+ data_bytes_allocated +=
+ btrfs_file_extent_disk_num_bytes(buf, fi);
+ if (data_bytes_allocated < root->sectorsize) {
+ abort();
+ }
+ data_bytes_referenced +=
+ btrfs_file_extent_num_bytes(buf, fi);
+ add_data_backref(extent_cache,
+ btrfs_file_extent_disk_bytenr(buf, fi),
+ parent, owner, key.objectid, key.offset -
+ btrfs_file_extent_offset(buf, fi), 1, 1,
+ btrfs_file_extent_disk_num_bytes(buf, fi));
+ }
+ } else {
+ int level;
+ struct btrfs_key first_key;
+
+ first_key.objectid = 0;
+
+ if (nritems > 0)
+ btrfs_item_key_to_cpu(buf, &first_key, 0);
+ level = btrfs_header_level(buf);
+ for (i = 0; i < nritems; i++) {
+ ptr = btrfs_node_blockptr(buf, i);
+ size = btrfs_level_size(root, level - 1);
+ btrfs_node_key_to_cpu(buf, &key, i);
+ if (ri != NULL) {
+ if ((level == ri->drop_level)
+ && is_dropped_key(&key, &ri->drop_key)) {
+ continue;
+ }
+ }
+ ret = add_extent_rec(extent_cache, &key,
+ btrfs_node_ptr_generation(buf, i),
+ ptr, size, 0, 0, 1, 0, 1, 0,
+ size);
+ BUG_ON(ret);
+
+ add_tree_backref(extent_cache, ptr, parent, owner, 1);
+
+ if (level > 1) {
+ add_pending(nodes, seen, ptr, size);
+ } else {
+ add_pending(pending, seen, ptr, size);
+ }
+ }
+ btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
+ nritems) * sizeof(struct btrfs_key_ptr);
+ }
+ total_btree_bytes += buf->len;
+ if (fs_root_objectid(btrfs_header_owner(buf)))
+ total_fs_tree_bytes += buf->len;
+ if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
+ total_extent_tree_bytes += buf->len;
+ if (!found_old_backref &&
+ btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
+ btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
+ !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
+ found_old_backref = 1;
+out:
+ free_extent_buffer(buf);
+ return ret;
+}
+
+static int add_root_to_pending(struct extent_buffer *buf,
+ struct cache_tree *extent_cache,
+ struct cache_tree *pending,
+ struct cache_tree *seen,
+ struct cache_tree *nodes,
+ u64 objectid)
+{
+ if (btrfs_header_level(buf) > 0)
+ add_pending(nodes, seen, buf->start, buf->len);
+ else
+ add_pending(pending, seen, buf->start, buf->len);
+ add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
+ 0, 1, 1, 0, 1, 0, buf->len);
+
+ if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
+ btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
+ add_tree_backref(extent_cache, buf->start, buf->start,
+ 0, 1);
+ else
+ add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
+ return 0;
+}
+
+/* as we fix the tree, we might be deleting blocks that
+ * we're tracking for repair. This hook makes sure we
+ * remove any backrefs for blocks as we are fixing them.
+ */
+static int free_extent_hook(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 owner, u64 offset,
+ int refs_to_drop)
+{
+ struct extent_record *rec;
+ struct cache_extent *cache;
+ int is_data;
+ struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
+
+ is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
+ cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
+ if (!cache)
+ return 0;
+
+ rec = container_of(cache, struct extent_record, cache);
+ if (is_data) {
+ struct data_backref *back;
+ back = find_data_backref(rec, parent, root_objectid, owner,
+ offset, 1, bytenr, num_bytes);
+ if (!back)
+ goto out;
+ if (back->node.found_ref) {
+ back->found_ref -= refs_to_drop;
+ if (rec->refs)
+ rec->refs -= refs_to_drop;
+ }
+ if (back->node.found_extent_tree) {
+ back->num_refs -= refs_to_drop;
+ if (rec->extent_item_refs)
+ rec->extent_item_refs -= refs_to_drop;
+ }
+ if (back->found_ref == 0)
+ back->node.found_ref = 0;
+ if (back->num_refs == 0)
+ back->node.found_extent_tree = 0;
+
+ if (!back->node.found_extent_tree && back->node.found_ref) {
+ list_del(&back->node.list);
+ free(back);
+ }
+ } else {
+ struct tree_backref *back;
+ back = find_tree_backref(rec, parent, root_objectid);
+ if (!back)
+ goto out;
+ if (back->node.found_ref) {
+ if (rec->refs)
+ rec->refs--;
+ back->node.found_ref = 0;
+ }
+ if (back->node.found_extent_tree) {
+ if (rec->extent_item_refs)
+ rec->extent_item_refs--;
+ back->node.found_extent_tree = 0;
+ }
+ if (!back->node.found_extent_tree && back->node.found_ref) {
+ list_del(&back->node.list);
+ free(back);
+ }
+ }
+ maybe_free_extent_rec(extent_cache, rec);
+out:
+ return 0;
+}
+
+static int delete_extent_records(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 bytenr, u64 new_len)
+{
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
+ int ret;
+ int slot;
+
+
+ key.objectid = bytenr;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ while(1) {
+ ret = btrfs_search_slot(trans, root->fs_info->extent_root,
+ &key, path, 0, 1);
+ if (ret < 0)
+ break;
+
+ if (ret > 0) {
+ ret = 0;
+ if (path->slots[0] == 0)
+ break;
+ path->slots[0]--;
+ }
+ ret = 0;
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+ if (found_key.objectid != bytenr)
+ break;
+
+ if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
+ found_key.type != BTRFS_METADATA_ITEM_KEY &&
+ found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
+ found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
+ found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
+ found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
+ found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
+ btrfs_release_path(path);
+ if (found_key.type == 0) {
+ if (found_key.offset == 0)
+ break;
+ key.offset = found_key.offset - 1;
+ key.type = found_key.type;
+ }
+ key.type = found_key.type - 1;
+ key.offset = (u64)-1;
+ continue;
+ }
+
+ fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
+ found_key.objectid, found_key.type, found_key.offset);
+
+ ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
+ if (ret)
+ break;
+ btrfs_release_path(path);
+
+ if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
+ found_key.type == BTRFS_METADATA_ITEM_KEY) {
+ u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
+ found_key.offset : root->leafsize;
+
+ ret = btrfs_update_block_group(trans, root, bytenr,
+ bytes, 0, 0);
+ if (ret)
+ break;
+ }
+ }
+
+ btrfs_release_path(path);
+ return ret;
+}
+
+/*
+ * for a single backref, this will allocate a new extent
+ * and add the backref to it.
+ */
+static int record_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *info,
+ struct btrfs_path *path,
+ struct extent_record *rec,
+ struct extent_backref *back,
+ int allocated, u64 flags)
+{
+ int ret;
+ struct btrfs_root *extent_root = info->extent_root;
+ struct extent_buffer *leaf;
+ struct btrfs_key ins_key;
+ struct btrfs_extent_item *ei;
+ struct tree_backref *tback;
+ struct data_backref *dback;
+ struct btrfs_tree_block_info *bi;
+
+ if (!back->is_data)
+ rec->max_size = max_t(u64, rec->max_size,
+ info->extent_root->leafsize);
+
+ if (!allocated) {
+ u32 item_size = sizeof(*ei);
+
+ if (!back->is_data)
+ item_size += sizeof(*bi);
+
+ ins_key.objectid = rec->start;
+ ins_key.offset = rec->max_size;
+ ins_key.type = BTRFS_EXTENT_ITEM_KEY;
+
+ ret = btrfs_insert_empty_item(trans, extent_root, path,
+ &ins_key, item_size);
+ if (ret)
+ goto fail;
+
+ leaf = path->nodes[0];
+ ei = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_item);
+
+ btrfs_set_extent_refs(leaf, ei, 0);
+ btrfs_set_extent_generation(leaf, ei, rec->generation);
+
+ if (back->is_data) {
+ btrfs_set_extent_flags(leaf, ei,
+ BTRFS_EXTENT_FLAG_DATA);
+ } else {
+ struct btrfs_disk_key copy_key;;
+
+ tback = (struct tree_backref *)back;
+ bi = (struct btrfs_tree_block_info *)(ei + 1);
+ memset_extent_buffer(leaf, 0, (unsigned long)bi,
+ sizeof(*bi));
+
+ btrfs_set_disk_key_objectid(&copy_key,
+ rec->info_objectid);
+ btrfs_set_disk_key_type(&copy_key, 0);
+ btrfs_set_disk_key_offset(&copy_key, 0);
+
+ btrfs_set_tree_block_level(leaf, bi, rec->info_level);
+ btrfs_set_tree_block_key(leaf, bi, &copy_key);
+
+ btrfs_set_extent_flags(leaf, ei,
+ BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
+ }
+
+ btrfs_mark_buffer_dirty(leaf);
+ ret = btrfs_update_block_group(trans, extent_root, rec->start,
+ rec->max_size, 1, 0);
+ if (ret)
+ goto fail;
+ btrfs_release_path(path);
+ }
+
+ if (back->is_data) {
+ u64 parent;
+ int i;
+
+ dback = (struct data_backref *)back;
+ if (back->full_backref)
+ parent = dback->parent;
+ else
+ parent = 0;
+
+ for (i = 0; i < dback->found_ref; i++) {
+ /* if parent != 0, we're doing a full backref
+ * passing BTRFS_FIRST_FREE_OBJECTID as the owner
+ * just makes the backref allocator create a data
+ * backref
+ */
+ ret = btrfs_inc_extent_ref(trans, info->extent_root,
+ rec->start, rec->max_size,
+ parent,
+ dback->root,
+ parent ?
+ BTRFS_FIRST_FREE_OBJECTID :
+ dback->owner,
+ dback->offset);
+ if (ret)
+ break;
+ }
+ fprintf(stderr, "adding new data backref"
+ " on %llu %s %llu owner %llu"
+ " offset %llu found %d\n",
+ (unsigned long long)rec->start,
+ back->full_backref ?
+ "parent" : "root",
+ back->full_backref ?
+ (unsigned long long)parent :
+ (unsigned long long)dback->root,
+ (unsigned long long)dback->owner,
+ (unsigned long long)dback->offset,
+ dback->found_ref);
+ } else {
+ u64 parent;
+
+ tback = (struct tree_backref *)back;
+ if (back->full_backref)
+ parent = tback->parent;
+ else
+ parent = 0;
+
+ ret = btrfs_inc_extent_ref(trans, info->extent_root,
+ rec->start, rec->max_size,
+ parent, tback->root, 0, 0);
+ fprintf(stderr, "adding new tree backref on "
+ "start %llu len %llu parent %llu root %llu\n",
+ rec->start, rec->max_size, parent, tback->root);
+ }
+fail:
+ btrfs_release_path(path);
+ return ret;
+}
+
+struct extent_entry {
+ u64 bytenr;
+ u64 bytes;
+ int count;
+ int broken;
+ struct list_head list;
+};
+
+static struct extent_entry *find_entry(struct list_head *entries,
+ u64 bytenr, u64 bytes)
+{
+ struct extent_entry *entry = NULL;
+
+ list_for_each_entry(entry, entries, list) {
+ if (entry->bytenr == bytenr && entry->bytes == bytes)
+ return entry;
+ }
+
+ return NULL;
+}
+
+static struct extent_entry *find_most_right_entry(struct list_head *entries)
+{
+ struct extent_entry *entry, *best = NULL, *prev = NULL;
+
+ list_for_each_entry(entry, entries, list) {
+ if (!prev) {
+ prev = entry;
+ continue;
+ }
+
+ /*
+ * If there are as many broken entries as entries then we know
+ * not to trust this particular entry.
+ */
+ if (entry->broken == entry->count)
+ continue;
+
+ /*
+ * If our current entry == best then we can't be sure our best
+ * is really the best, so we need to keep searching.
+ */
+ if (best && best->count == entry->count) {
+ prev = entry;
+ best = NULL;
+ continue;
+ }
+
+ /* Prev == entry, not good enough, have to keep searching */
+ if (!prev->broken && prev->count == entry->count)
+ continue;
+
+ if (!best)
+ best = (prev->count > entry->count) ? prev : entry;
+ else if (best->count < entry->count)
+ best = entry;
+ prev = entry;
+ }
+
+ return best;
+}
+
+static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
+ struct data_backref *dback, struct extent_entry *entry)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root;
+ struct btrfs_file_extent_item *fi;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ u64 bytenr, bytes;
+ int ret, err;
+
+ key.objectid = dback->root;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ root = btrfs_read_fs_root(info, &key);
+ if (IS_ERR(root)) {
+ fprintf(stderr, "Couldn't find root for our ref\n");
+ return -EINVAL;
+ }
+
+ /*
+ * The backref points to the original offset of the extent if it was
+ * split, so we need to search down to the offset we have and then walk
+ * forward until we find the backref we're looking for.
+ */
+ key.objectid = dback->owner;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = dback->offset;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Error looking up ref %d\n", ret);
+ return ret;
+ }
+
+ while (1) {
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret) {
+ fprintf(stderr, "Couldn't find our ref, next\n");
+ return -EINVAL;
+ }
+ }
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid != dback->owner ||
+ key.type != BTRFS_EXTENT_DATA_KEY) {
+ fprintf(stderr, "Couldn't find our ref, search\n");
+ return -EINVAL;
+ }
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
+
+ if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
+ break;
+ path->slots[0]++;
+ }
+
+ btrfs_release_path(path);
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ /*
+ * Ok we have the key of the file extent we want to fix, now we can cow
+ * down to the thing and fix it.
+ */
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret < 0) {
+ fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
+ key.objectid, key.type, key.offset, ret);
+ goto out;
+ }
+ if (ret > 0) {
+ fprintf(stderr, "Well that's odd, we just found this key "
+ "[%Lu, %u, %Lu]\n", key.objectid, key.type,
+ key.offset);
+ ret = -EINVAL;
+ goto out;
+ }
+ leaf = path->nodes[0];
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+
+ if (btrfs_file_extent_compression(leaf, fi) &&
+ dback->disk_bytenr != entry->bytenr) {
+ fprintf(stderr, "Ref doesn't match the record start and is "
+ "compressed, please take a btrfs-image of this file "
+ "system and send it to a btrfs developer so they can "
+ "complete this functionality for bytenr %Lu\n",
+ dback->disk_bytenr);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
+ btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
+ } else if (dback->disk_bytenr > entry->bytenr) {
+ u64 off_diff, offset;
+
+ off_diff = dback->disk_bytenr - entry->bytenr;
+ offset = btrfs_file_extent_offset(leaf, fi);
+ if (dback->disk_bytenr + offset +
+ btrfs_file_extent_num_bytes(leaf, fi) >
+ entry->bytenr + entry->bytes) {
+ fprintf(stderr, "Ref is past the entry end, please "
+ "take a btrfs-image of this file system and "
+ "send it to a btrfs developer, ref %Lu\n",
+ dback->disk_bytenr);
+ ret = -EINVAL;
+ goto out;
+ }
+ offset += off_diff;
+ btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
+ btrfs_set_file_extent_offset(leaf, fi, offset);
+ } else if (dback->disk_bytenr < entry->bytenr) {
+ u64 offset;
+
+ offset = btrfs_file_extent_offset(leaf, fi);
+ if (dback->disk_bytenr + offset < entry->bytenr) {
+ fprintf(stderr, "Ref is before the entry start, please"
+ " take a btrfs-image of this file system and "
+ "send it to a btrfs developer, ref %Lu\n",
+ dback->disk_bytenr);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ offset += dback->disk_bytenr;
+ offset -= entry->bytenr;
+ btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
+ btrfs_set_file_extent_offset(leaf, fi, offset);
+ }
+
+ btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
+
+ /*
+ * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
+ * only do this if we aren't using compression, otherwise it's a
+ * trickier case.
+ */
+ if (!btrfs_file_extent_compression(leaf, fi))
+ btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
+ else
+ printf("ram bytes may be wrong?\n");
+ btrfs_mark_buffer_dirty(leaf);
+out:
+ err = btrfs_commit_transaction(trans, root);
+ btrfs_release_path(path);
+ return ret ? ret : err;
+}
+
+static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
+ struct extent_record *rec)
+{
+ struct extent_backref *back;
+ struct data_backref *dback;
+ struct extent_entry *entry, *best = NULL;
+ LIST_HEAD(entries);
+ int nr_entries = 0;
+ int broken_entries = 0;
+ int ret = 0;
+ short mismatch = 0;
+
+ /*
+ * Metadata is easy and the backrefs should always agree on bytenr and
+ * size, if not we've got bigger issues.
+ */
+ if (rec->metadata)
+ return 0;
+
+ list_for_each_entry(back, &rec->backrefs, list) {
+ if (back->full_backref || !back->is_data)
+ continue;
+
+ dback = (struct data_backref *)back;
+
+ /*
+ * We only pay attention to backrefs that we found a real
+ * backref for.
+ */
+ if (dback->found_ref == 0)
+ continue;
+
+ /*
+ * For now we only catch when the bytes don't match, not the
+ * bytenr. We can easily do this at the same time, but I want
+ * to have a fs image to test on before we just add repair
+ * functionality willy-nilly so we know we won't screw up the
+ * repair.
+ */
+
+ entry = find_entry(&entries, dback->disk_bytenr,
+ dback->bytes);
+ if (!entry) {
+ entry = malloc(sizeof(struct extent_entry));
+ if (!entry) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ memset(entry, 0, sizeof(*entry));
+ entry->bytenr = dback->disk_bytenr;
+ entry->bytes = dback->bytes;
+ list_add_tail(&entry->list, &entries);
+ nr_entries++;
+ }
+
+ /*
+ * If we only have on entry we may think the entries agree when
+ * in reality they don't so we have to do some extra checking.
+ */
+ if (dback->disk_bytenr != rec->start ||
+ dback->bytes != rec->nr || back->broken)
+ mismatch = 1;
+
+ if (back->broken) {
+ entry->broken++;
+ broken_entries++;
+ }
+
+ entry->count++;
+ }
+
+ /* Yay all the backrefs agree, carry on good sir */
+ if (nr_entries <= 1 && !mismatch)
+ goto out;
+
+ fprintf(stderr, "attempting to repair backref discrepency for bytenr "
+ "%Lu\n", rec->start);
+
+ /*
+ * First we want to see if the backrefs can agree amongst themselves who
+ * is right, so figure out which one of the entries has the highest
+ * count.
+ */
+ best = find_most_right_entry(&entries);
+
+ /*
+ * Ok so we may have an even split between what the backrefs think, so
+ * this is where we use the extent ref to see what it thinks.
+ */
+ if (!best) {
+ entry = find_entry(&entries, rec->start, rec->nr);
+ if (!entry && (!broken_entries || !rec->found_rec)) {
+ fprintf(stderr, "Backrefs don't agree with each other "
+ "and extent record doesn't agree with anybody,"
+ " so we can't fix bytenr %Lu bytes %Lu\n",
+ rec->start, rec->nr);
+ ret = -EINVAL;
+ goto out;
+ } else if (!entry) {
+ /*
+ * Ok our backrefs were broken, we'll assume this is the
+ * correct value and add an entry for this range.
+ */
+ entry = malloc(sizeof(struct extent_entry));
+ if (!entry) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ memset(entry, 0, sizeof(*entry));
+ entry->bytenr = rec->start;
+ entry->bytes = rec->nr;
+ list_add_tail(&entry->list, &entries);
+ nr_entries++;
+ }
+ entry->count++;
+ best = find_most_right_entry(&entries);
+ if (!best) {
+ fprintf(stderr, "Backrefs and extent record evenly "
+ "split on who is right, this is going to "
+ "require user input to fix bytenr %Lu bytes "
+ "%Lu\n", rec->start, rec->nr);
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ /*
+ * I don't think this can happen currently as we'll abort() if we catch
+ * this case higher up, but in case somebody removes that we still can't
+ * deal with it properly here yet, so just bail out of that's the case.
+ */
+ if (best->bytenr != rec->start) {
+ fprintf(stderr, "Extent start and backref starts don't match, "
+ "please use btrfs-image on this file system and send "
+ "it to a btrfs developer so they can make fsck fix "
+ "this particular case. bytenr is %Lu, bytes is %Lu\n",
+ rec->start, rec->nr);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Ok great we all agreed on an extent record, let's go find the real
+ * references and fix up the ones that don't match.
+ */
+ list_for_each_entry(back, &rec->backrefs, list) {
+ if (back->full_backref || !back->is_data)
+ continue;
+
+ dback = (struct data_backref *)back;
+
+ /*
+ * Still ignoring backrefs that don't have a real ref attached
+ * to them.
+ */
+ if (dback->found_ref == 0)
+ continue;
+
+ if (dback->bytes == best->bytes &&
+ dback->disk_bytenr == best->bytenr)
+ continue;
+
+ ret = repair_ref(info, path, dback, best);
+ if (ret)
+ goto out;
+ }
+
+ /*
+ * Ok we messed with the actual refs, which means we need to drop our
+ * entire cache and go back and rescan. I know this is a huge pain and
+ * adds a lot of extra work, but it's the only way to be safe. Once all
+ * the backrefs agree we may not need to do anything to the extent
+ * record itself.
+ */
+ ret = -EAGAIN;
+out:
+ while (!list_empty(&entries)) {
+ entry = list_entry(entries.next, struct extent_entry, list);
+ list_del_init(&entry->list);
+ free(entry);
+ }
+ return ret;
+}
+
+static int process_duplicates(struct btrfs_root *root,
+ struct cache_tree *extent_cache,
+ struct extent_record *rec)
+{
+ struct extent_record *good, *tmp;
+ struct cache_extent *cache;
+ int ret;
+
+ /*
+ * If we found a extent record for this extent then return, or if we
+ * have more than one duplicate we are likely going to need to delete
+ * something.
+ */
+ if (rec->found_rec || rec->num_duplicates > 1)
+ return 0;
+
+ /* Shouldn't happen but just in case */
+ BUG_ON(!rec->num_duplicates);
+
+ /*
+ * So this happens if we end up with a backref that doesn't match the
+ * actual extent entry. So either the backref is bad or the extent
+ * entry is bad. Either way we want to have the extent_record actually
+ * reflect what we found in the extent_tree, so we need to take the
+ * duplicate out and use that as the extent_record since the only way we
+ * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
+ */
+ remove_cache_extent(extent_cache, &rec->cache);
+
+ good = list_entry(rec->dups.next, struct extent_record, list);
+ list_del_init(&good->list);
+ INIT_LIST_HEAD(&good->backrefs);
+ INIT_LIST_HEAD(&good->dups);
+ good->cache.start = good->start;
+ good->cache.size = good->nr;
+ good->content_checked = 0;
+ good->owner_ref_checked = 0;
+ good->num_duplicates = 0;
+ good->refs = rec->refs;
+ list_splice_init(&rec->backrefs, &good->backrefs);
+ while (1) {
+ cache = lookup_cache_extent(extent_cache, good->start,
+ good->nr);
+ if (!cache)
+ break;
+ tmp = container_of(cache, struct extent_record, cache);
+
+ /*
+ * If we find another overlapping extent and it's found_rec is
+ * set then it's a duplicate and we need to try and delete
+ * something.
+ */
+ if (tmp->found_rec || tmp->num_duplicates > 0) {
+ if (list_empty(&good->list))
+ list_add_tail(&good->list,
+ &duplicate_extents);
+ good->num_duplicates += tmp->num_duplicates + 1;
+ list_splice_init(&tmp->dups, &good->dups);
+ list_del_init(&tmp->list);
+ list_add_tail(&tmp->list, &good->dups);
+ remove_cache_extent(extent_cache, &tmp->cache);
+ continue;
+ }
+
+ /*
+ * Ok we have another non extent item backed extent rec, so lets
+ * just add it to this extent and carry on like we did above.
+ */
+ good->refs += tmp->refs;
+ list_splice_init(&tmp->backrefs, &good->backrefs);
+ remove_cache_extent(extent_cache, &tmp->cache);
+ free(tmp);
+ }
+ ret = insert_cache_extent(extent_cache, &good->cache);
+ BUG_ON(ret);
+ free(rec);
+ return good->num_duplicates ? 0 : 1;
+}
+
+static int delete_duplicate_records(struct btrfs_root *root,
+ struct extent_record *rec)
+{
+ struct btrfs_trans_handle *trans;
+ LIST_HEAD(delete_list);
+ struct btrfs_path *path;
+ struct extent_record *tmp, *good, *n;
+ int nr_del = 0;
+ int ret = 0, err;
+ struct btrfs_key key;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ good = rec;
+ /* Find the record that covers all of the duplicates. */
+ list_for_each_entry(tmp, &rec->dups, list) {
+ if (good->start < tmp->start)
+ continue;
+ if (good->nr > tmp->nr)
+ continue;
+
+ if (tmp->start + tmp->nr < good->start + good->nr) {
+ fprintf(stderr, "Ok we have overlapping extents that "
+ "aren't completely covered by eachother, this "
+ "is going to require more careful thought. "
+ "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
+ tmp->start, tmp->nr, good->start, good->nr);
+ abort();
+ }
+ good = tmp;
+ }
+
+ if (good != rec)
+ list_add_tail(&rec->list, &delete_list);
+
+ list_for_each_entry_safe(tmp, n, &rec->dups, list) {
+ if (tmp == good)
+ continue;
+ list_move_tail(&tmp->list, &delete_list);
+ }
+
+ root = root->fs_info->extent_root;
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+
+ list_for_each_entry(tmp, &delete_list, list) {
+ if (tmp->found_rec == 0)
+ continue;
+ key.objectid = tmp->start;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = tmp->nr;
+
+ /* Shouldn't happen but just in case */
+ if (tmp->metadata) {
+ fprintf(stderr, "Well this shouldn't happen, extent "
+ "record overlaps but is metadata? "
+ "[%Lu, %Lu]\n", tmp->start, tmp->nr);
+ abort();
+ }
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret) {
+ if (ret > 0)
+ ret = -EINVAL;
+ break;
+ }
+ ret = btrfs_del_item(trans, root, path);
+ if (ret)
+ break;
+ btrfs_release_path(path);
+ nr_del++;
+ }
+ err = btrfs_commit_transaction(trans, root);
+ if (err && !ret)
+ ret = err;
+out:
+ while (!list_empty(&delete_list)) {
+ tmp = list_entry(delete_list.next, struct extent_record, list);
+ list_del_init(&tmp->list);
+ if (tmp == rec)
+ continue;
+ free(tmp);
+ }
+
+ while (!list_empty(&rec->dups)) {
+ tmp = list_entry(rec->dups.next, struct extent_record, list);
+ list_del_init(&tmp->list);
+ free(tmp);
+ }
+
+ btrfs_free_path(path);
+
+ if (!ret && !nr_del)
+ rec->num_duplicates = 0;
+
+ return ret ? ret : nr_del;
+}
+
+static int find_possible_backrefs(struct btrfs_fs_info *info,
+ struct btrfs_path *path,
+ struct cache_tree *extent_cache,
+ struct extent_record *rec)
+{
+ struct btrfs_root *root;
+ struct extent_backref *back;
+ struct data_backref *dback;
+ struct cache_extent *cache;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_key key;
+ u64 bytenr, bytes;
+ int ret;
+
+ list_for_each_entry(back, &rec->backrefs, list) {
+ /* Don't care about full backrefs (poor unloved backrefs) */
+ if (back->full_backref || !back->is_data)
+ continue;
+
+ dback = (struct data_backref *)back;
+
+ /* We found this one, we don't need to do a lookup */
+ if (dback->found_ref)
+ continue;
+
+ key.objectid = dback->root;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ root = btrfs_read_fs_root(info, &key);
+
+ /* No root, definitely a bad ref, skip */
+ if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
+ continue;
+ /* Other err, exit */
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+
+ key.objectid = dback->owner;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = dback->offset;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret) {
+ btrfs_release_path(path);
+ if (ret < 0)
+ return ret;
+ /* Didn't find it, we can carry on */
+ ret = 0;
+ continue;
+ }
+
+ fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+ bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
+ bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
+ btrfs_release_path(path);
+ cache = lookup_cache_extent(extent_cache, bytenr, 1);
+ if (cache) {
+ struct extent_record *tmp;
+ tmp = container_of(cache, struct extent_record, cache);
+
+ /*
+ * If we found an extent record for the bytenr for this
+ * particular backref then we can't add it to our
+ * current extent record. We only want to add backrefs
+ * that don't have a corresponding extent item in the
+ * extent tree since they likely belong to this record
+ * and we need to fix it if it doesn't match bytenrs.
+ */
+ if (tmp->found_rec)
+ continue;
+ }
+
+ dback->found_ref += 1;
+ dback->disk_bytenr = bytenr;
+ dback->bytes = bytes;
+
+ /*
+ * Set this so the verify backref code knows not to trust the
+ * values in this backref.
+ */
+ back->broken = 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Record orphan data ref into corresponding root.
+ *
+ * Return 0 if the extent item contains data ref and recorded.
+ * Return 1 if the extent item contains no useful data ref
+ * On that case, it may contains only shared_dataref or metadata backref
+ * or the file extent exists(this should be handled by the extent bytenr
+ * recovery routine)
+ * Return <0 if something goes wrong.
+ */
+static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
+ struct extent_record *rec)
+{
+ struct btrfs_key key;
+ struct btrfs_root *dest_root;
+ struct extent_backref *back;
+ struct data_backref *dback;
+ struct orphan_data_extent *orphan;
+ struct btrfs_path *path;
+ int recorded_data_ref = 0;
+ int ret = 0;
+
+ if (rec->metadata)
+ return 1;
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ list_for_each_entry(back, &rec->backrefs, list) {
+ if (back->full_backref || !back->is_data ||
+ !back->found_extent_tree)
+ continue;
+ dback = (struct data_backref *)back;
+ if (dback->found_ref)
+ continue;
+ key.objectid = dback->root;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ dest_root = btrfs_read_fs_root(fs_info, &key);
+
+ /* For non-exist root we just skip it */
+ if (IS_ERR(dest_root) || !dest_root)
+ continue;
+
+ key.objectid = dback->owner;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = dback->offset;
+
+ ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
+ /*
+ * For ret < 0, it's OK since the fs-tree may be corrupted,
+ * we need to record it for inode/file extent rebuild.
+ * For ret > 0, we record it only for file extent rebuild.
+ * For ret == 0, the file extent exists but only bytenr
+ * mismatch, let the original bytenr fix routine to handle,
+ * don't record it.
+ */
+ if (ret == 0)
+ continue;
+ ret = 0;
+ orphan = malloc(sizeof(*orphan));
+ if (!orphan) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ INIT_LIST_HEAD(&orphan->list);
+ orphan->root = dback->root;
+ orphan->objectid = dback->owner;
+ orphan->offset = dback->offset;
+ orphan->disk_bytenr = rec->cache.start;
+ orphan->disk_len = rec->cache.size;
+ list_add(&dest_root->orphan_data_extents, &orphan->list);
+ recorded_data_ref = 1;
+ }
+out:
+ btrfs_free_path(path);
+ if (!ret)
+ return !recorded_data_ref;
+ else
+ return ret;
+}
+
+/*
+ * when an incorrect extent item is found, this will delete
+ * all of the existing entries for it and recreate them
+ * based on what the tree scan found.
+ */
+static int fixup_extent_refs(struct btrfs_fs_info *info,
+ struct cache_tree *extent_cache,
+ struct extent_record *rec)
+{
+ struct btrfs_trans_handle *trans = NULL;
+ int ret;
+ struct btrfs_path *path;
+ struct list_head *cur = rec->backrefs.next;
+ struct cache_extent *cache;
+ struct extent_backref *back;
+ int allocated = 0;
+ u64 flags = 0;
+
+ if (rec->flag_block_full_backref)
+ flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ if (rec->refs != rec->extent_item_refs && !rec->metadata) {
+ /*
+ * Sometimes the backrefs themselves are so broken they don't
+ * get attached to any meaningful rec, so first go back and
+ * check any of our backrefs that we couldn't find and throw
+ * them into the list if we find the backref so that
+ * verify_backrefs can figure out what to do.
+ */
+ ret = find_possible_backrefs(info, path, extent_cache, rec);
+ if (ret < 0)
+ goto out;
+ }
+
+ /* step one, make sure all of the backrefs agree */
+ ret = verify_backrefs(info, path, rec);
+ if (ret < 0)
+ goto out;
+
+ trans = btrfs_start_transaction(info->extent_root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+
+ /* step two, delete all the existing records */
+ ret = delete_extent_records(trans, info->extent_root, path,
+ rec->start, rec->max_size);
+
+ if (ret < 0)
+ goto out;
+
+ /* was this block corrupt? If so, don't add references to it */
+ cache = lookup_cache_extent(info->corrupt_blocks,
+ rec->start, rec->max_size);
+ if (cache) {
+ ret = 0;
+ goto out;
+ }
+
+ /* step three, recreate all the refs we did find */
+ while(cur != &rec->backrefs) {
+ back = list_entry(cur, struct extent_backref, list);
+ cur = cur->next;
+
+ /*
+ * if we didn't find any references, don't create a
+ * new extent record
+ */
+ if (!back->found_ref)
+ continue;
+
+ rec->bad_full_backref = 0;
+ ret = record_extent(trans, info, path, rec, back, allocated, flags);
+ allocated = 1;
+
+ if (ret)
+ goto out;
+ }
+out:
+ if (trans) {
+ int err = btrfs_commit_transaction(trans, info->extent_root);
+ if (!ret)
+ ret = err;
+ }
+
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
+ struct extent_record *rec)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root = fs_info->extent_root;
+ struct btrfs_path *path;
+ struct btrfs_extent_item *ei;
+ struct btrfs_key key;
+ u64 flags;
+ int ret = 0;
+
+ key.objectid = rec->start;
+ if (rec->metadata) {
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ key.offset = rec->info_level;
+ } else {
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = rec->max_size;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret < 0) {
+ btrfs_free_path(path);
+ btrfs_commit_transaction(trans, root);
+ return ret;
+ } else if (ret) {
+ fprintf(stderr, "Didn't find extent for %llu\n",
+ (unsigned long long)rec->start);
+ btrfs_free_path(path);
+ btrfs_commit_transaction(trans, root);
+ return -ENOENT;
+ }
+
+ ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_extent_item);
+ flags = btrfs_extent_flags(path->nodes[0], ei);
+ if (rec->flag_block_full_backref) {
+ fprintf(stderr, "setting full backref on %llu\n",
+ (unsigned long long)key.objectid);
+ flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+ } else {
+ fprintf(stderr, "clearing full backref on %llu\n",
+ (unsigned long long)key.objectid);
+ flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
+ }
+ btrfs_set_extent_flags(path->nodes[0], ei, flags);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ btrfs_free_path(path);
+ return btrfs_commit_transaction(trans, root);
+}
+
+/* right now we only prune from the extent allocation tree */
+static int prune_one_block(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *info,
+ struct btrfs_corrupt_block *corrupt)
+{
+ int ret;
+ struct btrfs_path path;
+ struct extent_buffer *eb;
+ u64 found;
+ int slot;
+ int nritems;
+ int level = corrupt->level + 1;
+
+ btrfs_init_path(&path);
+again:
+ /* we want to stop at the parent to our busted block */
+ path.lowest_level = level;
+
+ ret = btrfs_search_slot(trans, info->extent_root,
+ &corrupt->key, &path, -1, 1);
+
+ if (ret < 0)
+ goto out;
+
+ eb = path.nodes[level];
+ if (!eb) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ /*
+ * hopefully the search gave us the block we want to prune,
+ * lets try that first
+ */
+ slot = path.slots[level];
+ found = btrfs_node_blockptr(eb, slot);
+ if (found == corrupt->cache.start)
+ goto del_ptr;
+
+ nritems = btrfs_header_nritems(eb);
+
+ /* the search failed, lets scan this node and hope we find it */
+ for (slot = 0; slot < nritems; slot++) {
+ found = btrfs_node_blockptr(eb, slot);
+ if (found == corrupt->cache.start)
+ goto del_ptr;
+ }
+ /*
+ * we couldn't find the bad block. TODO, search all the nodes for pointers
+ * to this block
+ */
+ if (eb == info->extent_root->node) {
+ ret = -ENOENT;
+ goto out;
+ } else {
+ level++;
+ btrfs_release_path(&path);
+ goto again;
+ }
+
+del_ptr:
+ printk("deleting pointer to block %Lu\n", corrupt->cache.start);
+ ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
+
+out:
+ btrfs_release_path(&path);
+ return ret;
+}
+
+static int prune_corrupt_blocks(struct btrfs_fs_info *info)
+{
+ struct btrfs_trans_handle *trans = NULL;
+ struct cache_extent *cache;
+ struct btrfs_corrupt_block *corrupt;
+
+ while (1) {
+ cache = search_cache_extent(info->corrupt_blocks, 0);
+ if (!cache)
+ break;
+ if (!trans) {
+ trans = btrfs_start_transaction(info->extent_root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+ }
+ corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
+ prune_one_block(trans, info, corrupt);
+ remove_cache_extent(info->corrupt_blocks, cache);
+ }
+ if (trans)
+ return btrfs_commit_transaction(trans, info->extent_root);
+ return 0;
+}
+
+static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_block_group_cache *cache;
+ u64 start, end;
+ int ret;
+
+ while (1) {
+ ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
+ &start, &end, EXTENT_DIRTY);
+ if (ret)
+ break;
+ clear_extent_dirty(&fs_info->free_space_cache, start, end,
+ GFP_NOFS);
+ }
+
+ start = 0;
+ while (1) {
+ cache = btrfs_lookup_first_block_group(fs_info, start);
+ if (!cache)
+ break;
+ if (cache->cached)
+ cache->cached = 0;
+ start = cache->key.objectid + cache->key.offset;
+ }
+}
+
+static int check_extent_refs(struct btrfs_root *root,
+ struct cache_tree *extent_cache)
+{
+ struct extent_record *rec;
+ struct cache_extent *cache;
+ int err = 0;
+ int ret = 0;
+ int fixed = 0;
+ int had_dups = 0;
+ int recorded = 0;
+
+ if (repair) {
+ /*
+ * if we're doing a repair, we have to make sure
+ * we don't allocate from the problem extents.
+ * In the worst case, this will be all the
+ * extents in the FS
+ */
+ cache = search_cache_extent(extent_cache, 0);
+ while(cache) {
+ rec = container_of(cache, struct extent_record, cache);
+ set_extent_dirty(root->fs_info->excluded_extents,
+ rec->start,
+ rec->start + rec->max_size - 1,
+ GFP_NOFS);
+ cache = next_cache_extent(cache);
+ }
+
+ /* pin down all the corrupted blocks too */
+ cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
+ while(cache) {
+ set_extent_dirty(root->fs_info->excluded_extents,
+ cache->start,
+ cache->start + cache->size - 1,
+ GFP_NOFS);
+ cache = next_cache_extent(cache);
+ }
+ prune_corrupt_blocks(root->fs_info);
+ reset_cached_block_groups(root->fs_info);
+ }
+
+ reset_cached_block_groups(root->fs_info);
+
+ /*
+ * We need to delete any duplicate entries we find first otherwise we
+ * could mess up the extent tree when we have backrefs that actually
+ * belong to a different extent item and not the weird duplicate one.
+ */
+ while (repair && !list_empty(&duplicate_extents)) {
+ rec = list_entry(duplicate_extents.next, struct extent_record,
+ list);
+ list_del_init(&rec->list);
+
+ /* Sometimes we can find a backref before we find an actual
+ * extent, so we need to process it a little bit to see if there
+ * truly are multiple EXTENT_ITEM_KEY's for the same range, or
+ * if this is a backref screwup. If we need to delete stuff
+ * process_duplicates() will return 0, otherwise it will return
+ * 1 and we
+ */
+ if (process_duplicates(root, extent_cache, rec))
+ continue;
+ ret = delete_duplicate_records(root, rec);
+ if (ret < 0)
+ return ret;
+ /*
+ * delete_duplicate_records will return the number of entries
+ * deleted, so if it's greater than 0 then we know we actually
+ * did something and we need to remove.
+ */
+ if (ret)
+ had_dups = 1;
+ }
+
+ if (had_dups)
+ return -EAGAIN;
+
+ while(1) {
+ int cur_err = 0;
+
+ fixed = 0;
+ recorded = 0;
+ cache = search_cache_extent(extent_cache, 0);
+ if (!cache)
+ break;
+ rec = container_of(cache, struct extent_record, cache);
+ if (rec->num_duplicates) {
+ fprintf(stderr, "extent item %llu has multiple extent "
+ "items\n", (unsigned long long)rec->start);
+ err = 1;
+ cur_err = 1;
+ }
+
+ if (rec->refs != rec->extent_item_refs) {
+ fprintf(stderr, "ref mismatch on [%llu %llu] ",
+ (unsigned long long)rec->start,
+ (unsigned long long)rec->nr);
+ fprintf(stderr, "extent item %llu, found %llu\n",
+ (unsigned long long)rec->extent_item_refs,
+ (unsigned long long)rec->refs);
+ ret = record_orphan_data_extents(root->fs_info, rec);
+ if (ret < 0)
+ goto repair_abort;
+ if (ret == 0) {
+ recorded = 1;
+ } else {
+ /*
+ * we can't use the extent to repair file
+ * extent, let the fallback method handle it.
+ */
+ if (!fixed && repair) {
+ ret = fixup_extent_refs(
+ root->fs_info,
+ extent_cache, rec);
+ if (ret)
+ goto repair_abort;
+ fixed = 1;
+ }
+ }
+ err = 1;
+ cur_err = 1;
+ }
+ if (all_backpointers_checked(rec, 1)) {
+ fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
+ (unsigned long long)rec->start,
+ (unsigned long long)rec->nr);
+
+ if (!fixed && !recorded && repair) {
+ ret = fixup_extent_refs(root->fs_info,
+ extent_cache, rec);
+ if (ret)
+ goto repair_abort;
+ fixed = 1;
+ }
+ cur_err = 1;
+ err = 1;
+ }
+ if (!rec->owner_ref_checked) {
+ fprintf(stderr, "owner ref check failed [%llu %llu]\n",
+ (unsigned long long)rec->start,
+ (unsigned long long)rec->nr);
+ if (!fixed && !recorded && repair) {
+ ret = fixup_extent_refs(root->fs_info,
+ extent_cache, rec);
+ if (ret)
+ goto repair_abort;
+ fixed = 1;
+ }
+ err = 1;
+ cur_err = 1;
+ }
+ if (rec->bad_full_backref) {
+ fprintf(stderr, "bad full backref, on [%llu]\n",
+ (unsigned long long)rec->start);
+ if (repair) {
+ ret = fixup_extent_flags(root->fs_info, rec);
+ if (ret)
+ goto repair_abort;
+ fixed = 1;
+ }
+ err = 1;
+ cur_err = 1;
+ }
+ /*
+ * Although it's not a extent ref's problem, we reuse this
+ * routine for error reporting.
+ * No repair function yet.
+ */
+ if (rec->crossing_stripes) {
+ fprintf(stderr,
+ "bad metadata [%llu, %llu) crossing stripe boundary\n",
+ rec->start, rec->start + rec->max_size);
+ err = 1;
+ cur_err = 1;
+ }
+
+ if (rec->wrong_chunk_type) {
+ fprintf(stderr,
+ "bad extent [%llu, %llu), type mismatch with chunk\n",
+ rec->start, rec->start + rec->max_size);
+ err = 1;
+ cur_err = 1;
+ }
+
+ remove_cache_extent(extent_cache, cache);
+ free_all_extent_backrefs(rec);
+ if (!init_extent_tree && repair && (!cur_err || fixed))
+ clear_extent_dirty(root->fs_info->excluded_extents,
+ rec->start,
+ rec->start + rec->max_size - 1,
+ GFP_NOFS);
+ free(rec);
+ }
+repair_abort:
+ if (repair) {
+ if (ret && ret != -EAGAIN) {
+ fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
+ exit(1);
+ } else if (!ret) {
+ struct btrfs_trans_handle *trans;
+
+ root = root->fs_info->extent_root;
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto repair_abort;
+ }
+
+ btrfs_fix_block_accounting(trans, root);
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret)
+ goto repair_abort;
+ }
+ if (err)
+ fprintf(stderr, "repaired damaged extent references\n");
+ return ret;
+ }
+ return err;
+}
+
+u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
+{
+ u64 stripe_size;
+
+ if (type & BTRFS_BLOCK_GROUP_RAID0) {
+ stripe_size = length;
+ stripe_size /= num_stripes;
+ } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
+ stripe_size = length * 2;
+ stripe_size /= num_stripes;
+ } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
+ stripe_size = length;
+ stripe_size /= (num_stripes - 1);
+ } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
+ stripe_size = length;
+ stripe_size /= (num_stripes - 2);
+ } else {
+ stripe_size = length;
+ }
+ return stripe_size;
+}
+
+/*
+ * Check the chunk with its block group/dev list ref:
+ * Return 0 if all refs seems valid.
+ * Return 1 if part of refs seems valid, need later check for rebuild ref
+ * like missing block group and needs to search extent tree to rebuild them.
+ * Return -1 if essential refs are missing and unable to rebuild.
+ */
+static int check_chunk_refs(struct chunk_record *chunk_rec,
+ struct block_group_tree *block_group_cache,
+ struct device_extent_tree *dev_extent_cache,
+ int silent)
+{
+ struct cache_extent *block_group_item;
+ struct block_group_record *block_group_rec;
+ struct cache_extent *dev_extent_item;
+ struct device_extent_record *dev_extent_rec;
+ u64 devid;
+ u64 offset;
+ u64 length;
+ int metadump_v2 = 0;
+ int i;
+ int ret = 0;
+
+ block_group_item = lookup_cache_extent(&block_group_cache->tree,
+ chunk_rec->offset,
+ chunk_rec->length);
+ if (block_group_item) {
+ block_group_rec = container_of(block_group_item,
+ struct block_group_record,
+ cache);
+ if (chunk_rec->length != block_group_rec->offset ||
+ chunk_rec->offset != block_group_rec->objectid ||
+ (!metadump_v2 &&
+ chunk_rec->type_flags != block_group_rec->flags)) {
+ if (!silent)
+ fprintf(stderr,
+ "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
+ chunk_rec->objectid,
+ chunk_rec->type,
+ chunk_rec->offset,
+ chunk_rec->length,
+ chunk_rec->offset,
+ chunk_rec->type_flags,
+ block_group_rec->objectid,
+ block_group_rec->type,
+ block_group_rec->offset,
+ block_group_rec->offset,
+ block_group_rec->objectid,
+ block_group_rec->flags);
+ ret = -1;
+ } else {
+ list_del_init(&block_group_rec->list);
+ chunk_rec->bg_rec = block_group_rec;
+ }
+ } else {
+ if (!silent)
+ fprintf(stderr,
+ "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
+ chunk_rec->objectid,
+ chunk_rec->type,
+ chunk_rec->offset,
+ chunk_rec->length,
+ chunk_rec->offset,
+ chunk_rec->type_flags);
+ ret = 1;
+ }
+
+ if (metadump_v2)
+ return ret;
+
+ length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
+ chunk_rec->num_stripes);
+ for (i = 0; i < chunk_rec->num_stripes; ++i) {
+ devid = chunk_rec->stripes[i].devid;
+ offset = chunk_rec->stripes[i].offset;
+ dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
+ devid, offset, length);
+ if (dev_extent_item) {
+ dev_extent_rec = container_of(dev_extent_item,
+ struct device_extent_record,
+ cache);
+ if (dev_extent_rec->objectid != devid ||
+ dev_extent_rec->offset != offset ||
+ dev_extent_rec->chunk_offset != chunk_rec->offset ||
+ dev_extent_rec->length != length) {
+ if (!silent)
+ fprintf(stderr,
+ "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
+ chunk_rec->objectid,
+ chunk_rec->type,
+ chunk_rec->offset,
+ chunk_rec->stripes[i].devid,
+ chunk_rec->stripes[i].offset,
+ dev_extent_rec->objectid,
+ dev_extent_rec->offset,
+ dev_extent_rec->length);
+ ret = -1;
+ } else {
+ list_move(&dev_extent_rec->chunk_list,
+ &chunk_rec->dextents);
+ }
+ } else {
+ if (!silent)
+ fprintf(stderr,
+ "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
+ chunk_rec->objectid,
+ chunk_rec->type,
+ chunk_rec->offset,
+ chunk_rec->stripes[i].devid,
+ chunk_rec->stripes[i].offset);
+ ret = -1;
+ }
+ }
+ return ret;
+}
+
+/* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
+int check_chunks(struct cache_tree *chunk_cache,
+ struct block_group_tree *block_group_cache,
+ struct device_extent_tree *dev_extent_cache,
+ struct list_head *good, struct list_head *bad,
+ struct list_head *rebuild, int silent)
+{
+ struct cache_extent *chunk_item;
+ struct chunk_record *chunk_rec;
+ struct block_group_record *bg_rec;
+ struct device_extent_record *dext_rec;
+ int err;
+ int ret = 0;
+
+ chunk_item = first_cache_extent(chunk_cache);
+ while (chunk_item) {
+ chunk_rec = container_of(chunk_item, struct chunk_record,
+ cache);
+ err = check_chunk_refs(chunk_rec, block_group_cache,
+ dev_extent_cache, silent);
+ if (err < 0)
+ ret = err;
+ if (err == 0 && good)
+ list_add_tail(&chunk_rec->list, good);
+ if (err > 0 && rebuild)
+ list_add_tail(&chunk_rec->list, rebuild);
+ if (err < 0 && bad)
+ list_add_tail(&chunk_rec->list, bad);
+ chunk_item = next_cache_extent(chunk_item);
+ }
+
+ list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
+ if (!silent)
+ fprintf(stderr,
+ "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
+ bg_rec->objectid,
+ bg_rec->offset,
+ bg_rec->flags);
+ if (!ret)
+ ret = 1;
+ }
+
+ list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
+ chunk_list) {
+ if (!silent)
+ fprintf(stderr,
+ "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
+ dext_rec->objectid,
+ dext_rec->offset,
+ dext_rec->length);
+ if (!ret)
+ ret = 1;
+ }
+ return ret;
+}
+
+
+static int check_device_used(struct device_record *dev_rec,
+ struct device_extent_tree *dext_cache)
+{
+ struct cache_extent *cache;
+ struct device_extent_record *dev_extent_rec;
+ u64 total_byte = 0;
+
+ cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
+ while (cache) {
+ dev_extent_rec = container_of(cache,
+ struct device_extent_record,
+ cache);
+ if (dev_extent_rec->objectid != dev_rec->devid)
+ break;
+
+ list_del_init(&dev_extent_rec->device_list);
+ total_byte += dev_extent_rec->length;
+ cache = next_cache_extent(cache);
+ }
+
+ if (total_byte != dev_rec->byte_used) {
+ fprintf(stderr,
+ "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
+ total_byte, dev_rec->byte_used, dev_rec->objectid,
+ dev_rec->type, dev_rec->offset);
+ return -1;
+ } else {
+ return 0;
+ }
+}
+
+/* check btrfs_dev_item -> btrfs_dev_extent */
+static int check_devices(struct rb_root *dev_cache,
+ struct device_extent_tree *dev_extent_cache)
+{
+ struct rb_node *dev_node;
+ struct device_record *dev_rec;
+ struct device_extent_record *dext_rec;
+ int err;
+ int ret = 0;
+
+ dev_node = rb_first(dev_cache);
+ while (dev_node) {
+ dev_rec = container_of(dev_node, struct device_record, node);
+ err = check_device_used(dev_rec, dev_extent_cache);
+ if (err)
+ ret = err;
+
+ dev_node = rb_next(dev_node);
+ }
+ list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
+ device_list) {
+ fprintf(stderr,
+ "Device extent[%llu, %llu, %llu] didn't find its device.\n",
+ dext_rec->objectid, dext_rec->offset, dext_rec->length);
+ if (!ret)
+ ret = 1;
+ }
+ return ret;
+}
+
+static int add_root_item_to_list(struct list_head *head,
+ u64 objectid, u64 bytenr, u64 last_snapshot,
+ u8 level, u8 drop_level,
+ int level_size, struct btrfs_key *drop_key)
+{
+
+ struct root_item_record *ri_rec;
+ ri_rec = malloc(sizeof(*ri_rec));
+ if (!ri_rec)
+ return -ENOMEM;
+ ri_rec->bytenr = bytenr;
+ ri_rec->objectid = objectid;
+ ri_rec->level = level;
+ ri_rec->level_size = level_size;
+ ri_rec->drop_level = drop_level;
+ ri_rec->last_snapshot = last_snapshot;
+ if (drop_key)
+ memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
+ list_add_tail(&ri_rec->list, head);
+
+ return 0;
+}
+
+static void free_root_item_list(struct list_head *list)
+{
+ struct root_item_record *ri_rec;
+
+ while (!list_empty(list)) {
+ ri_rec = list_first_entry(list, struct root_item_record,
+ list);
+ list_del_init(&ri_rec->list);
+ free(ri_rec);
+ }
+}
+
+static int deal_root_from_list(struct list_head *list,
+ struct btrfs_root *root,
+ struct block_info *bits,
+ int bits_nr,
+ struct cache_tree *pending,
+ struct cache_tree *seen,
+ struct cache_tree *reada,
+ struct cache_tree *nodes,
+ struct cache_tree *extent_cache,
+ struct cache_tree *chunk_cache,
+ struct rb_root *dev_cache,
+ struct block_group_tree *block_group_cache,
+ struct device_extent_tree *dev_extent_cache)
+{
+ int ret = 0;
+ u64 last;
+
+ while (!list_empty(list)) {
+ struct root_item_record *rec;
+ struct extent_buffer *buf;
+ rec = list_entry(list->next,
+ struct root_item_record, list);
+ last = 0;
+ buf = read_tree_block(root->fs_info->tree_root,
+ rec->bytenr, rec->level_size, 0);
+ if (!extent_buffer_uptodate(buf)) {
+ free_extent_buffer(buf);
+ ret = -EIO;
+ break;
+ }
+ add_root_to_pending(buf, extent_cache, pending,
+ seen, nodes, rec->objectid);
+ /*
+ * To rebuild extent tree, we need deal with snapshot
+ * one by one, otherwise we deal with node firstly which
+ * can maximize readahead.
+ */
+ while (1) {
+ ret = run_next_block(root, bits, bits_nr, &last,
+ pending, seen, reada, nodes,
+ extent_cache, chunk_cache,
+ dev_cache, block_group_cache,
+ dev_extent_cache, rec);
+ if (ret != 0)
+ break;
+ }
+ free_extent_buffer(buf);
+ list_del(&rec->list);
+ free(rec);
+ if (ret < 0)
+ break;
+ }
+ while (ret >= 0) {
+ ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
+ reada, nodes, extent_cache, chunk_cache,
+ dev_cache, block_group_cache,
+ dev_extent_cache, NULL);
+ if (ret != 0) {
+ if (ret > 0)
+ ret = 0;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int check_chunks_and_extents(struct btrfs_root *root)
+{
+ struct rb_root dev_cache;
+ struct cache_tree chunk_cache;
+ struct block_group_tree block_group_cache;
+ struct device_extent_tree dev_extent_cache;
+ struct cache_tree extent_cache;
+ struct cache_tree seen;
+ struct cache_tree pending;
+ struct cache_tree reada;
+ struct cache_tree nodes;
+ struct extent_io_tree excluded_extents;
+ struct cache_tree corrupt_blocks;
+ struct btrfs_path path;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ int ret, err = 0;
+ struct block_info *bits;
+ int bits_nr;
+ struct extent_buffer *leaf;
+ int slot;
+ struct btrfs_root_item ri;
+ struct list_head dropping_trees;
+ struct list_head normal_trees;
+ struct btrfs_root *root1;
+ u64 objectid;
+ u32 level_size;
+ u8 level;
+
+ dev_cache = RB_ROOT;
+ cache_tree_init(&chunk_cache);
+ block_group_tree_init(&block_group_cache);
+ device_extent_tree_init(&dev_extent_cache);
+
+ cache_tree_init(&extent_cache);
+ cache_tree_init(&seen);
+ cache_tree_init(&pending);
+ cache_tree_init(&nodes);
+ cache_tree_init(&reada);
+ cache_tree_init(&corrupt_blocks);
+ extent_io_tree_init(&excluded_extents);
+ INIT_LIST_HEAD(&dropping_trees);
+ INIT_LIST_HEAD(&normal_trees);
+
+ if (repair) {
+ root->fs_info->excluded_extents = &excluded_extents;
+ root->fs_info->fsck_extent_cache = &extent_cache;
+ root->fs_info->free_extent_hook = free_extent_hook;
+ root->fs_info->corrupt_blocks = &corrupt_blocks;
+ }
+
+ bits_nr = 1024;
+ bits = malloc(bits_nr * sizeof(struct block_info));
+ if (!bits) {
+ perror("malloc");
+ exit(1);
+ }
+
+ if (ctx.progress_enabled) {
+ ctx.tp = TASK_EXTENTS;
+ task_start(ctx.info);
+ }
+
+again:
+ root1 = root->fs_info->tree_root;
+ level = btrfs_header_level(root1->node);
+ ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
+ root1->node->start, 0, level, 0,
+ btrfs_level_size(root1, level), NULL);
+ if (ret < 0)
+ goto out;
+ root1 = root->fs_info->chunk_root;
+ level = btrfs_header_level(root1->node);
+ ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
+ root1->node->start, 0, level, 0,
+ btrfs_level_size(root1, level), NULL);
+ if (ret < 0)
+ goto out;
+ btrfs_init_path(&path);
+ key.offset = 0;
+ key.objectid = 0;
+ btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
+ ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
+ &key, &path, 0, 0);
+ if (ret < 0)
+ goto out;
+ while(1) {
+ leaf = path.nodes[0];
+ slot = path.slots[0];
+ if (slot >= btrfs_header_nritems(path.nodes[0])) {
+ ret = btrfs_next_leaf(root, &path);
+ if (ret != 0)
+ break;
+ leaf = path.nodes[0];
+ slot = path.slots[0];
+ }
+ btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
+ if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
+ unsigned long offset;
+ u64 last_snapshot;
+
+ offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
+ read_extent_buffer(leaf, &ri, offset, sizeof(ri));
+ last_snapshot = btrfs_root_last_snapshot(&ri);
+ if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
+ level = btrfs_root_level(&ri);
+ level_size = btrfs_level_size(root, level);
+ ret = add_root_item_to_list(&normal_trees,
+ found_key.objectid,
+ btrfs_root_bytenr(&ri),
+ last_snapshot, level,
+ 0, level_size, NULL);
+ if (ret < 0)
+ goto out;
+ } else {
+ level = btrfs_root_level(&ri);
+ level_size = btrfs_level_size(root, level);
+ objectid = found_key.objectid;
+ btrfs_disk_key_to_cpu(&found_key,
+ &ri.drop_progress);
+ ret = add_root_item_to_list(&dropping_trees,
+ objectid,
+ btrfs_root_bytenr(&ri),
+ last_snapshot, level,
+ ri.drop_level,
+ level_size, &found_key);
+ if (ret < 0)
+ goto out;
+ }
+ }
+ path.slots[0]++;
+ }
+ btrfs_release_path(&path);
+
+ /*
+ * check_block can return -EAGAIN if it fixes something, please keep
+ * this in mind when dealing with return values from these functions, if
+ * we get -EAGAIN we want to fall through and restart the loop.
+ */
+ ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
+ &seen, &reada, &nodes, &extent_cache,
+ &chunk_cache, &dev_cache, &block_group_cache,
+ &dev_extent_cache);
+ if (ret < 0) {
+ if (ret == -EAGAIN)
+ goto loop;
+ goto out;
+ }
+ ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
+ &pending, &seen, &reada, &nodes,
+ &extent_cache, &chunk_cache, &dev_cache,
+ &block_group_cache, &dev_extent_cache);
+ if (ret < 0) {
+ if (ret == -EAGAIN)
+ goto loop;
+ goto out;
+ }
+
+ ret = check_chunks(&chunk_cache, &block_group_cache,
+ &dev_extent_cache, NULL, NULL, NULL, 0);
+ if (ret) {
+ if (ret == -EAGAIN)
+ goto loop;
+ err = ret;
+ }
+
+ ret = check_extent_refs(root, &extent_cache);
+ if (ret < 0) {
+ if (ret == -EAGAIN)
+ goto loop;
+ goto out;
+ }
+
+ ret = check_devices(&dev_cache, &dev_extent_cache);
+ if (ret && err)
+ ret = err;
+
+out:
+ task_stop(ctx.info);
+ if (repair) {
+ free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
+ extent_io_tree_cleanup(&excluded_extents);
+ root->fs_info->fsck_extent_cache = NULL;
+ root->fs_info->free_extent_hook = NULL;
+ root->fs_info->corrupt_blocks = NULL;
+ root->fs_info->excluded_extents = NULL;
+ }
+ free(bits);
+ free_chunk_cache_tree(&chunk_cache);
+ free_device_cache_tree(&dev_cache);
+ free_block_group_tree(&block_group_cache);
+ free_device_extent_tree(&dev_extent_cache);
+ free_extent_cache_tree(&seen);
+ free_extent_cache_tree(&pending);
+ free_extent_cache_tree(&reada);
+ free_extent_cache_tree(&nodes);
+ return ret;
+loop:
+ free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
+ free_extent_cache_tree(&seen);
+ free_extent_cache_tree(&pending);
+ free_extent_cache_tree(&reada);
+ free_extent_cache_tree(&nodes);
+ free_chunk_cache_tree(&chunk_cache);
+ free_block_group_tree(&block_group_cache);
+ free_device_cache_tree(&dev_cache);
+ free_device_extent_tree(&dev_extent_cache);
+ free_extent_record_cache(root->fs_info, &extent_cache);
+ free_root_item_list(&normal_trees);
+ free_root_item_list(&dropping_trees);
+ extent_io_tree_cleanup(&excluded_extents);
+ goto again;
+}
+
+static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, int overwrite)
+{
+ struct extent_buffer *c;
+ struct extent_buffer *old = root->node;
+ int level;
+ int ret;
+ struct btrfs_disk_key disk_key = {0,0,0};
+
+ level = 0;
+
+ if (overwrite) {
+ c = old;
+ extent_buffer_get(c);
+ goto init;
+ }
+ c = btrfs_alloc_free_block(trans, root,
+ btrfs_level_size(root, 0),
+ root->root_key.objectid,
+ &disk_key, level, 0, 0);
+ if (IS_ERR(c)) {
+ c = old;
+ extent_buffer_get(c);
+ overwrite = 1;
+ }
+init:
+ memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
+ btrfs_set_header_level(c, level);
+ btrfs_set_header_bytenr(c, c->start);
+ btrfs_set_header_generation(c, trans->transid);
+ btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
+ btrfs_set_header_owner(c, root->root_key.objectid);
+
+ write_extent_buffer(c, root->fs_info->fsid,
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
+
+ write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
+ btrfs_header_chunk_tree_uuid(c),
+ BTRFS_UUID_SIZE);
+
+ btrfs_mark_buffer_dirty(c);
+ /*
+ * this case can happen in the following case:
+ *
+ * 1.overwrite previous root.
+ *
+ * 2.reinit reloc data root, this is because we skip pin
+ * down reloc data tree before which means we can allocate
+ * same block bytenr here.
+ */
+ if (old->start == c->start) {
+ btrfs_set_root_generation(&root->root_item,
+ trans->transid);
+ root->root_item.level = btrfs_header_level(root->node);
+ ret = btrfs_update_root(trans, root->fs_info->tree_root,
+ &root->root_key, &root->root_item);
+ if (ret) {
+ free_extent_buffer(c);
+ return ret;
+ }
+ }
+ free_extent_buffer(old);
+ root->node = c;
+ add_root_to_dirty_list(root);
+ return 0;
+}
+
+static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb, int tree_root)
+{
+ struct extent_buffer *tmp;
+ struct btrfs_root_item *ri;
+ struct btrfs_key key;
+ u64 bytenr;
+ u32 leafsize;
+ int level = btrfs_header_level(eb);
+ int nritems;
+ int ret;
+ int i;
+
+ /*
+ * If we have pinned this block before, don't pin it again.
+ * This can not only avoid forever loop with broken filesystem
+ * but also give us some speedups.
+ */
+ if (test_range_bit(&fs_info->pinned_extents, eb->start,
+ eb->start + eb->len - 1, EXTENT_DIRTY, 0))
+ return 0;
+
+ btrfs_pin_extent(fs_info, eb->start, eb->len);
+
+ leafsize = btrfs_super_leafsize(fs_info->super_copy);
+ nritems = btrfs_header_nritems(eb);
+ for (i = 0; i < nritems; i++) {
+ if (level == 0) {
+ btrfs_item_key_to_cpu(eb, &key, i);
+ if (key.type != BTRFS_ROOT_ITEM_KEY)
+ continue;
+ /* Skip the extent root and reloc roots */
+ if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
+ key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
+ key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ continue;
+ ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
+ bytenr = btrfs_disk_root_bytenr(eb, ri);
+
+ /*
+ * If at any point we start needing the real root we
+ * will have to build a stump root for the root we are
+ * in, but for now this doesn't actually use the root so
+ * just pass in extent_root.
+ */
+ tmp = read_tree_block(fs_info->extent_root, bytenr,
+ leafsize, 0);
+ if (!extent_buffer_uptodate(tmp)) {
+ fprintf(stderr, "Error reading root block\n");
+ return -EIO;
+ }
+ ret = pin_down_tree_blocks(fs_info, tmp, 0);
+ free_extent_buffer(tmp);
+ if (ret)
+ return ret;
+ } else {
+ bytenr = btrfs_node_blockptr(eb, i);
+
+ /* If we aren't the tree root don't read the block */
+ if (level == 1 && !tree_root) {
+ btrfs_pin_extent(fs_info, bytenr, leafsize);
+ continue;
+ }
+
+ tmp = read_tree_block(fs_info->extent_root, bytenr,
+ leafsize, 0);
+ if (!extent_buffer_uptodate(tmp)) {
+ fprintf(stderr, "Error reading tree block\n");
+ return -EIO;
+ }
+ ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
+ free_extent_buffer(tmp);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
+{
+ int ret;
+
+ ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
+ if (ret)
+ return ret;
+
+ return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
+}
+
+static int reset_block_groups(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_block_group_cache *cache;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_chunk *chunk;
+ struct btrfs_key key;
+ int ret;
+ u64 start;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = 0;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
+ if (ret < 0) {
+ btrfs_free_path(path);
+ return ret;
+ }
+
+ /*
+ * We do this in case the block groups were screwed up and had alloc
+ * bits that aren't actually set on the chunks. This happens with
+ * restored images every time and could happen in real life I guess.
+ */
+ fs_info->avail_data_alloc_bits = 0;
+ fs_info->avail_metadata_alloc_bits = 0;
+ fs_info->avail_system_alloc_bits = 0;
+
+ /* First we need to create the in-memory block groups */
+ while (1) {
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(fs_info->chunk_root, path);
+ if (ret < 0) {
+ btrfs_free_path(path);
+ return ret;
+ }
+ if (ret) {
+ ret = 0;
+ break;
+ }
+ }
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.type != BTRFS_CHUNK_ITEM_KEY) {
+ path->slots[0]++;
+ continue;
+ }
+
+ chunk = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_chunk);
+ btrfs_add_block_group(fs_info, 0,
+ btrfs_chunk_type(leaf, chunk),
+ key.objectid, key.offset,
+ btrfs_chunk_length(leaf, chunk));
+ set_extent_dirty(&fs_info->free_space_cache, key.offset,
+ key.offset + btrfs_chunk_length(leaf, chunk),
+ GFP_NOFS);
+ path->slots[0]++;
+ }
+ start = 0;
+ while (1) {
+ cache = btrfs_lookup_first_block_group(fs_info, start);
+ if (!cache)
+ break;
+ cache->cached = 1;
+ start = cache->key.objectid + cache->key.offset;
+ }
+
+ btrfs_free_path(path);
+ return 0;
+}
+
+static int reset_balance(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_root *root = fs_info->tree_root;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ int del_slot, del_nr = 0;
+ int ret;
+ int found = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = BTRFS_BALANCE_OBJECTID;
+ key.type = BTRFS_BALANCE_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret) {
+ if (ret > 0)
+ ret = 0;
+ if (!ret)
+ goto reinit_data_reloc;
+ else
+ goto out;
+ }
+
+ ret = btrfs_del_item(trans, root, path);
+ if (ret)
+ goto out;
+ btrfs_release_path(path);
+
+ key.objectid = BTRFS_TREE_RELOC_OBJECTID;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret < 0)
+ goto out;
+ while (1) {
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ if (!found)
+ break;
+
+ if (del_nr) {
+ ret = btrfs_del_items(trans, root, path,
+ del_slot, del_nr);
+ del_nr = 0;
+ if (ret)
+ goto out;
+ }
+ key.offset++;
+ btrfs_release_path(path);
+
+ found = 0;
+ ret = btrfs_search_slot(trans, root, &key, path,
+ -1, 1);
+ if (ret < 0)
+ goto out;
+ continue;
+ }
+ found = 1;
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
+ break;
+ if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+ path->slots[0]++;
+ continue;
+ }
+ if (!del_nr) {
+ del_slot = path->slots[0];
+ del_nr = 1;
+ } else {
+ del_nr++;
+ }
+ path->slots[0]++;
+ }
+
+ if (del_nr) {
+ ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
+ if (ret)
+ goto out;
+ }
+ btrfs_release_path(path);
+
+reinit_data_reloc:
+ key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ root = btrfs_read_fs_root(fs_info, &key);
+ if (IS_ERR(root)) {
+ fprintf(stderr, "Error reading data reloc tree\n");
+ ret = PTR_ERR(root);
+ goto out;
+ }
+ record_root_in_trans(trans, root);
+ ret = btrfs_fsck_reinit_root(trans, root, 0);
+ if (ret)
+ goto out;
+ ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int reinit_extent_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
+{
+ u64 start = 0;
+ int ret;
+
+ /*
+ * The only reason we don't do this is because right now we're just
+ * walking the trees we find and pinning down their bytes, we don't look
+ * at any of the leaves. In order to do mixed groups we'd have to check
+ * the leaves of any fs roots and pin down the bytes for any file
+ * extents we find. Not hard but why do it if we don't have to?
+ */
+ if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
+ fprintf(stderr, "We don't support re-initing the extent tree "
+ "for mixed block groups yet, please notify a btrfs "
+ "developer you want to do this so they can add this "
+ "functionality.\n");
+ return -EINVAL;
+ }
+
+ /*
+ * first we need to walk all of the trees except the extent tree and pin
+ * down the bytes that are in use so we don't overwrite any existing
+ * metadata.
+ */
+ ret = pin_metadata_blocks(fs_info);
+ if (ret) {
+ fprintf(stderr, "error pinning down used bytes\n");
+ return ret;
+ }
+
+ /*
+ * Need to drop all the block groups since we're going to recreate all
+ * of them again.
+ */
+ btrfs_free_block_groups(fs_info);
+ ret = reset_block_groups(fs_info);
+ if (ret) {
+ fprintf(stderr, "error resetting the block groups\n");
+ return ret;
+ }
+
+ /* Ok we can allocate now, reinit the extent root */
+ ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
+ if (ret) {
+ fprintf(stderr, "extent root initialization failed\n");
+ /*
+ * When the transaction code is updated we should end the
+ * transaction, but for now progs only knows about commit so
+ * just return an error.
+ */
+ return ret;
+ }
+
+ /*
+ * Now we have all the in-memory block groups setup so we can make
+ * allocations properly, and the metadata we care about is safe since we
+ * pinned all of it above.
+ */
+ while (1) {
+ struct btrfs_block_group_cache *cache;
+
+ cache = btrfs_lookup_first_block_group(fs_info, start);
+ if (!cache)
+ break;
+ start = cache->key.objectid + cache->key.offset;
+ ret = btrfs_insert_item(trans, fs_info->extent_root,
+ &cache->key, &cache->item,
+ sizeof(cache->item));
+ if (ret) {
+ fprintf(stderr, "Error adding block group\n");
+ return ret;
+ }
+ btrfs_extent_post_op(trans, fs_info->extent_root);
+ }
+
+ ret = reset_balance(trans, fs_info);
+ if (ret)
+ fprintf(stderr, "error reseting the pending balance\n");
+
+ return ret;
+}
+
+static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
+{
+ struct btrfs_path *path;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_key key;
+ int ret;
+
+ printf("Recowing metadata block %llu\n", eb->start);
+ key.objectid = btrfs_header_owner(eb);
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ root = btrfs_read_fs_root(root->fs_info, &key);
+ if (IS_ERR(root)) {
+ fprintf(stderr, "Couldn't find owner root %llu\n",
+ key.objectid);
+ return PTR_ERR(root);
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
+ path->lowest_level = btrfs_header_level(eb);
+ if (path->lowest_level)
+ btrfs_node_key_to_cpu(eb, &key, 0);
+ else
+ btrfs_item_key_to_cpu(eb, &key, 0);
+
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ btrfs_commit_transaction(trans, root);
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
+{
+ struct btrfs_path *path;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_key key;
+ int ret;
+
+ printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
+ bad->key.type, bad->key.offset);
+ key.objectid = bad->root_id;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ root = btrfs_read_fs_root(root->fs_info, &key);
+ if (IS_ERR(root)) {
+ fprintf(stderr, "Couldn't find owner root %llu\n",
+ key.objectid);
+ return PTR_ERR(root);
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
+ ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
+ if (ret) {
+ if (ret > 0)
+ ret = 0;
+ goto out;
+ }
+ ret = btrfs_del_item(trans, root, path);
+out:
+ btrfs_commit_transaction(trans, root);
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int zero_log_tree(struct btrfs_root *root)
+{
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ return ret;
+ }
+ btrfs_set_super_log_root(root->fs_info->super_copy, 0);
+ btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
+ ret = btrfs_commit_transaction(trans, root);
+ return ret;
+}
+
+static int populate_csum(struct btrfs_trans_handle *trans,
+ struct btrfs_root *csum_root, char *buf, u64 start,
+ u64 len)
+{
+ u64 offset = 0;
+ u64 sectorsize;
+ int ret = 0;
+
+ while (offset < len) {
+ sectorsize = csum_root->sectorsize;
+ ret = read_extent_data(csum_root, buf, start + offset,
+ &sectorsize, 0);
+ if (ret)
+ break;
+ ret = btrfs_csum_file_block(trans, csum_root, start + len,
+ start + offset, buf, sectorsize);
+ if (ret)
+ break;
+ offset += sectorsize;
+ }
+ return ret;
+}
+
+static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *csum_root,
+ struct btrfs_root *cur_root)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct extent_buffer *node;
+ struct btrfs_file_extent_item *fi;
+ char *buf = NULL;
+ u64 start = 0;
+ u64 len = 0;
+ int slot = 0;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ buf = malloc(cur_root->fs_info->csum_root->sectorsize);
+ if (!buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ key.objectid = 0;
+ key.offset = 0;
+ key.type = 0;
+
+ ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ /* Iterate all regular file extents and fill its csum */
+ while (1) {
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ goto next;
+ node = path->nodes[0];
+ slot = path->slots[0];
+ fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
+ goto next;
+ start = btrfs_file_extent_disk_bytenr(node, fi);
+ len = btrfs_file_extent_disk_num_bytes(node, fi);
+
+ ret = populate_csum(trans, csum_root, buf, start, len);
+ if (ret == -EEXIST)
+ ret = 0;
+ if (ret < 0)
+ goto out;
+next:
+ /*
+ * TODO: if next leaf is corrupted, jump to nearest next valid
+ * leaf.
+ */
+ ret = btrfs_next_item(cur_root, path);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+out:
+ btrfs_free_path(path);
+ free(buf);
+ return ret;
+}
+
+static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *csum_root)
+{
+ struct btrfs_fs_info *fs_info = csum_root->fs_info;
+ struct btrfs_path *path;
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_root *cur_root;
+ struct extent_buffer *node;
+ struct btrfs_key key;
+ int slot = 0;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = BTRFS_FS_TREE_OBJECTID;
+ key.offset = 0;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+
+ ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ while (1) {
+ node = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(node, &key, slot);
+ if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
+ goto out;
+ if (key.type != BTRFS_ROOT_ITEM_KEY)
+ goto next;
+ if (!is_fstree(key.objectid))
+ goto next;
+ key.offset = (u64)-1;
+
+ cur_root = btrfs_read_fs_root(fs_info, &key);
+ if (IS_ERR(cur_root) || !cur_root) {
+ fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
+ key.objectid);
+ goto out;
+ }
+ ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
+ cur_root);
+ if (ret < 0)
+ goto out;
+next:
+ ret = btrfs_next_item(tree_root, path);
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ if (ret < 0)
+ goto out;
+ }
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *csum_root)
+{
+ struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
+ struct btrfs_path *path;
+ struct btrfs_extent_item *ei;
+ struct extent_buffer *leaf;
+ char *buf;
+ struct btrfs_key key;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = 0;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
+ if (ret < 0) {
+ btrfs_free_path(path);
+ return ret;
+ }
+
+ buf = malloc(csum_root->sectorsize);
+ if (!buf) {
+ btrfs_free_path(path);
+ return -ENOMEM;
+ }
+
+ while (1) {
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(extent_root, path);
+ if (ret < 0)
+ break;
+ if (ret) {
+ ret = 0;
+ break;
+ }
+ }
+ leaf = path->nodes[0];
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.type != BTRFS_EXTENT_ITEM_KEY) {
+ path->slots[0]++;
+ continue;
+ }
+
+ ei = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_item);
+ if (!(btrfs_extent_flags(leaf, ei) &
+ BTRFS_EXTENT_FLAG_DATA)) {
+ path->slots[0]++;
+ continue;
+ }
+
+ ret = populate_csum(trans, csum_root, buf, key.objectid,
+ key.offset);
+ if (ret)
+ break;
+ path->slots[0]++;
+ }
+
+ btrfs_free_path(path);
+ free(buf);
+ return ret;
+}
+
+/*
+ * Recalculate the csum and put it into the csum tree.
+ *
+ * Extent tree init will wipe out all the extent info, so in that case, we
+ * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
+ * will use fs/subvol trees to init the csum tree.
+ */
+static int fill_csum_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *csum_root,
+ int search_fs_tree)
+{
+ if (search_fs_tree)
+ return fill_csum_tree_from_fs(trans, csum_root);
+ else
+ return fill_csum_tree_from_extent(trans, csum_root);
+}
+
+struct root_item_info {
+ /* level of the root */
+ u8 level;
+ /* number of nodes at this level, must be 1 for a root */
+ int node_count;
+ u64 bytenr;
+ u64 gen;
+ struct cache_extent cache_extent;
+};
+
+static struct cache_tree *roots_info_cache = NULL;
+
+static void free_roots_info_cache(void)
+{
+ if (!roots_info_cache)
+ return;
+
+ while (!cache_tree_empty(roots_info_cache)) {
+ struct cache_extent *entry;
+ struct root_item_info *rii;
+
+ entry = first_cache_extent(roots_info_cache);
+ if (!entry)
+ break;
+ remove_cache_extent(roots_info_cache, entry);
+ rii = container_of(entry, struct root_item_info, cache_extent);
+ free(rii);
+ }
+
+ free(roots_info_cache);
+ roots_info_cache = NULL;
+}
+
+static int build_roots_info_cache(struct btrfs_fs_info *info)
+{
+ int ret = 0;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct btrfs_path *path;
+
+ if (!roots_info_cache) {
+ roots_info_cache = malloc(sizeof(*roots_info_cache));
+ if (!roots_info_cache)
+ return -ENOMEM;
+ cache_tree_init(roots_info_cache);
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = 0;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ leaf = path->nodes[0];
+
+ while (1) {
+ struct btrfs_key found_key;
+ struct btrfs_extent_item *ei;
+ struct btrfs_extent_inline_ref *iref;
+ int slot = path->slots[0];
+ int type;
+ u64 flags;
+ u64 root_id;
+ u8 level;
+ struct cache_extent *entry;
+ struct root_item_info *rii;
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(info->extent_root, path);
+ if (ret < 0) {
+ break;
+ } else if (ret) {
+ ret = 0;
+ break;
+ }
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+ if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
+ found_key.type != BTRFS_METADATA_ITEM_KEY)
+ goto next;
+
+ ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
+ flags = btrfs_extent_flags(leaf, ei);
+
+ if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
+ !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
+ goto next;
+
+ if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
+ iref = (struct btrfs_extent_inline_ref *)(ei + 1);
+ level = found_key.offset;
+ } else {
+ struct btrfs_tree_block_info *binfo;
+
+ binfo = (struct btrfs_tree_block_info *)(ei + 1);
+ iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
+ level = btrfs_tree_block_level(leaf, binfo);
+ }
+
+ /*
+ * For a root extent, it must be of the following type and the
+ * first (and only one) iref in the item.
+ */
+ type = btrfs_extent_inline_ref_type(leaf, iref);
+ if (type != BTRFS_TREE_BLOCK_REF_KEY)
+ goto next;
+
+ root_id = btrfs_extent_inline_ref_offset(leaf, iref);
+ entry = lookup_cache_extent(roots_info_cache, root_id, 1);
+ if (!entry) {
+ rii = malloc(sizeof(struct root_item_info));
+ if (!rii) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ rii->cache_extent.start = root_id;
+ rii->cache_extent.size = 1;
+ rii->level = (u8)-1;
+ entry = &rii->cache_extent;
+ ret = insert_cache_extent(roots_info_cache, entry);
+ ASSERT(ret == 0);
+ } else {
+ rii = container_of(entry, struct root_item_info,
+ cache_extent);
+ }
+
+ ASSERT(rii->cache_extent.start == root_id);
+ ASSERT(rii->cache_extent.size == 1);
+
+ if (level > rii->level || rii->level == (u8)-1) {
+ rii->level = level;
+ rii->bytenr = found_key.objectid;
+ rii->gen = btrfs_extent_generation(leaf, ei);
+ rii->node_count = 1;
+ } else if (level == rii->level) {
+ rii->node_count++;
+ }
+next:
+ path->slots[0]++;
+ }
+
+out:
+ btrfs_free_path(path);
+
+ return ret;
+}
+
+static int maybe_repair_root_item(struct btrfs_fs_info *info,
+ struct btrfs_path *path,
+ const struct btrfs_key *root_key,
+ const int read_only_mode)
+{
+ const u64 root_id = root_key->objectid;
+ struct cache_extent *entry;
+ struct root_item_info *rii;
+ struct btrfs_root_item ri;
+ unsigned long offset;
+
+ entry = lookup_cache_extent(roots_info_cache, root_id, 1);
+ if (!entry) {
+ fprintf(stderr,
+ "Error: could not find extent items for root %llu\n",
+ root_key->objectid);
+ return -ENOENT;
+ }
+
+ rii = container_of(entry, struct root_item_info, cache_extent);
+ ASSERT(rii->cache_extent.start == root_id);
+ ASSERT(rii->cache_extent.size == 1);
+
+ if (rii->node_count != 1) {
+ fprintf(stderr,
+ "Error: could not find btree root extent for root %llu\n",
+ root_id);
+ return -ENOENT;
+ }
+
+ offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
+ read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
+
+ if (btrfs_root_bytenr(&ri) != rii->bytenr ||
+ btrfs_root_level(&ri) != rii->level ||
+ btrfs_root_generation(&ri) != rii->gen) {
+
+ /*
+ * If we're in repair mode but our caller told us to not update
+ * the root item, i.e. just check if it needs to be updated, don't
+ * print this message, since the caller will call us again shortly
+ * for the same root item without read only mode (the caller will
+ * open a transaction first).
+ */
+ if (!(read_only_mode && repair))
+ fprintf(stderr,
+ "%sroot item for root %llu,"
+ " current bytenr %llu, current gen %llu, current level %u,"
+ " new bytenr %llu, new gen %llu, new level %u\n",
+ (read_only_mode ? "" : "fixing "),
+ root_id,
+ btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
+ btrfs_root_level(&ri),
+ rii->bytenr, rii->gen, rii->level);
+
+ if (btrfs_root_generation(&ri) > rii->gen) {
+ fprintf(stderr,
+ "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
+ root_id, btrfs_root_generation(&ri), rii->gen);
+ return -EINVAL;
+ }
+
+ if (!read_only_mode) {
+ btrfs_set_root_bytenr(&ri, rii->bytenr);
+ btrfs_set_root_level(&ri, rii->level);
+ btrfs_set_root_generation(&ri, rii->gen);
+ write_extent_buffer(path->nodes[0], &ri,
+ offset, sizeof(ri));
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
+ * caused read-only snapshots to be corrupted if they were created at a moment
+ * when the source subvolume/snapshot had orphan items. The issue was that the
+ * on-disk root items became incorrect, referring to the pre orphan cleanup root
+ * node instead of the post orphan cleanup root node.
+ * So this function, and its callees, just detects and fixes those cases. Even
+ * though the regression was for read-only snapshots, this function applies to
+ * any snapshot/subvolume root.
+ * This must be run before any other repair code - not doing it so, makes other
+ * repair code delete or modify backrefs in the extent tree for example, which
+ * will result in an inconsistent fs after repairing the root items.
+ */
+static int repair_root_items(struct btrfs_fs_info *info)
+{
+ struct btrfs_path *path = NULL;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct btrfs_trans_handle *trans = NULL;
+ int ret = 0;
+ int bad_roots = 0;
+ int need_trans = 0;
+
+ ret = build_roots_info_cache(info);
+ if (ret)
+ goto out;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = 0;
+
+again:
+ /*
+ * Avoid opening and committing transactions if a leaf doesn't have
+ * any root items that need to be fixed, so that we avoid rotating
+ * backup roots unnecessarily.
+ */
+ if (need_trans) {
+ trans = btrfs_start_transaction(info->tree_root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+ }
+
+ ret = btrfs_search_slot(trans, info->tree_root, &key, path,
+ 0, trans ? 1 : 0);
+ if (ret < 0)
+ goto out;
+ leaf = path->nodes[0];
+
+ while (1) {
+ struct btrfs_key found_key;
+
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ int no_more_keys = find_next_key(path, &key);
+
+ btrfs_release_path(path);
+ if (trans) {
+ ret = btrfs_commit_transaction(trans,
+ info->tree_root);
+ trans = NULL;
+ if (ret < 0)
+ goto out;
+ }
+ need_trans = 0;
+ if (no_more_keys)
+ break;
+ goto again;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+ if (found_key.type != BTRFS_ROOT_ITEM_KEY)
+ goto next;
+ if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+ goto next;
+
+ ret = maybe_repair_root_item(info, path, &found_key,
+ trans ? 0 : 1);
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ if (!trans && repair) {
+ need_trans = 1;
+ key = found_key;
+ btrfs_release_path(path);
+ goto again;
+ }
+ bad_roots++;
+ }
+next:
+ path->slots[0]++;
+ }
+ ret = 0;
+out:
+ free_roots_info_cache();
+ btrfs_free_path(path);
+ if (trans)
+ btrfs_commit_transaction(trans, info->tree_root);
+ if (ret < 0)
+ return ret;
+
+ return bad_roots;
+}
+
+const char * const cmd_check_usage[] = {
+ "btrfs check [options] <device>",
+ "Check structural inegrity of a filesystem (unmounted).",
+ "Check structural inegrity of an unmounted filesystem. Verify internal",
+ "trees' consistency and item connectivity. In the repair mode try to",
+ "fix the problems found.",
+ "WARNING: the repair mode is considered dangerous",
+ "",
+ "-s|--super <superblock> use this superblock copy",
+ "-b|--backup use the backup root copy",
+ "--repair try to repair the filesystem",
+ "--readonly run in read-only mode (default)",
+ "--init-csum-tree create a new CRC tree",
+ "--init-extent-tree create a new extent tree",
+ "--check-data-csum verify checkums of data blocks",
+ "-Q|--qgroup-report print a report on qgroup consistency",
+ "-E|--subvol-extents <subvolid>",
+ " print subvolume extents and sharing state",
+ "-r|--tree-root <bytenr> use the given bytenr for the tree root",
+ "-p|--progress indicate progress",
+ NULL
+};
+
+int cmd_check(int argc, char **argv)
+{
+ struct cache_tree root_cache;
+ struct btrfs_root *root;
+ struct btrfs_fs_info *info;
+ u64 bytenr = 0;
+ u64 subvolid = 0;
+ u64 tree_root_bytenr = 0;
+ char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
+ int ret;
+ u64 num;
+ int init_csum_tree = 0;
+ int readonly = 0;
+ int qgroup_report = 0;
+ enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
+
+ while(1) {
+ int c;
+ enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
+ OPT_CHECK_CSUM, OPT_READONLY };
+ static const struct option long_options[] = {
+ { "super", required_argument, NULL, 's' },
+ { "repair", no_argument, NULL, OPT_REPAIR },
+ { "readonly", no_argument, NULL, OPT_READONLY },
+ { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
+ { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
+ { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
+ { "backup", no_argument, NULL, 'b' },
+ { "subvol-extents", required_argument, NULL, 'E' },
+ { "qgroup-report", no_argument, NULL, 'Q' },
+ { "tree-root", required_argument, NULL, 'r' },
+ { "progress", no_argument, NULL, 'p' },
+ { NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
+ if (c < 0)
+ break;
+ switch(c) {
+ case 'a': /* ignored */ break;
+ case 'b':
+ ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
+ break;
+ case 's':
+ num = arg_strtou64(optarg);
+ if (num >= BTRFS_SUPER_MIRROR_MAX) {
+ fprintf(stderr,
+ "ERROR: super mirror should be less than: %d\n",
+ BTRFS_SUPER_MIRROR_MAX);
+ exit(1);
+ }
+ bytenr = btrfs_sb_offset(((int)num));
+ printf("using SB copy %llu, bytenr %llu\n", num,
+ (unsigned long long)bytenr);
+ break;
+ case 'Q':
+ qgroup_report = 1;
+ break;
+ case 'E':
+ subvolid = arg_strtou64(optarg);
+ break;
+ case 'r':
+ tree_root_bytenr = arg_strtou64(optarg);
+ break;
+ case 'p':
+ ctx.progress_enabled = true;
+ break;
+ case '?':
+ case 'h':
+ usage(cmd_check_usage);
+ case OPT_REPAIR:
+ printf("enabling repair mode\n");
+ repair = 1;
+ ctree_flags |= OPEN_CTREE_WRITES;
+ break;
+ case OPT_READONLY:
+ readonly = 1;
+ break;
+ case OPT_INIT_CSUM:
+ printf("Creating a new CRC tree\n");
+ init_csum_tree = 1;
+ repair = 1;
+ ctree_flags |= OPEN_CTREE_WRITES;
+ break;
+ case OPT_INIT_EXTENT:
+ init_extent_tree = 1;
+ ctree_flags |= (OPEN_CTREE_WRITES |
+ OPEN_CTREE_NO_BLOCK_GROUPS);
+ repair = 1;
+ break;
+ case OPT_CHECK_CSUM:
+ check_data_csum = 1;
+ break;
+ }
+ }
+ argc = argc - optind;
+
+ if (check_argc_exact(argc, 1))
+ usage(cmd_check_usage);
+
+ if (ctx.progress_enabled) {
+ ctx.tp = TASK_NOTHING;
+ ctx.info = task_init(print_status_check, print_status_return, &ctx);
+ }
+
+ /* This check is the only reason for --readonly to exist */
+ if (readonly && repair) {
+ fprintf(stderr, "Repair options are not compatible with --readonly\n");
+ exit(1);
+ }
+
+ radix_tree_init();
+ cache_tree_init(&root_cache);
+
+ if((ret = check_mounted(argv[optind])) < 0) {
+ fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
+ goto err_out;
+ } else if(ret) {
+ fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
+ ret = -EBUSY;
+ goto err_out;
+ }
+
+ /* only allow partial opening under repair mode */
+ if (repair)
+ ctree_flags |= OPEN_CTREE_PARTIAL;
+
+ info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
+ ctree_flags);
+ if (!info) {
+ fprintf(stderr, "Couldn't open file system\n");
+ ret = -EIO;
+ goto err_out;
+ }
+
+ global_info = info;
+ root = info->fs_root;
+
+ /*
+ * repair mode will force us to commit transaction which
+ * will make us fail to load log tree when mounting.
+ */
+ if (repair && btrfs_super_log_root(info->super_copy)) {
+ ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
+ if (!ret) {
+ ret = 1;
+ goto close_out;
+ }
+ ret = zero_log_tree(root);
+ if (ret) {
+ fprintf(stderr, "fail to zero log tree\n");
+ goto close_out;
+ }
+ }
+
+ uuid_unparse(info->super_copy->fsid, uuidbuf);
+ if (qgroup_report) {
+ printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
+ uuidbuf);
+ ret = qgroup_verify_all(info);
+ if (ret == 0)
+ print_qgroup_report(1);
+ goto close_out;
+ }
+ if (subvolid) {
+ printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
+ subvolid, argv[optind], uuidbuf);
+ ret = print_extent_state(info, subvolid);
+ goto close_out;
+ }
+ printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
+
+ if (!extent_buffer_uptodate(info->tree_root->node) ||
+ !extent_buffer_uptodate(info->dev_root->node) ||
+ !extent_buffer_uptodate(info->chunk_root->node)) {
+ fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
+ ret = -EIO;
+ goto close_out;
+ }
+
+ if (init_extent_tree || init_csum_tree) {
+ struct btrfs_trans_handle *trans;
+
+ trans = btrfs_start_transaction(info->extent_root, 0);
+ if (IS_ERR(trans)) {
+ fprintf(stderr, "Error starting transaction\n");
+ ret = PTR_ERR(trans);
+ goto close_out;
+ }
+
+ if (init_extent_tree) {
+ printf("Creating a new extent tree\n");
+ ret = reinit_extent_tree(trans, info);
+ if (ret)
+ goto close_out;
+ }
+
+ if (init_csum_tree) {
+ fprintf(stderr, "Reinit crc root\n");
+ ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
+ if (ret) {
+ fprintf(stderr, "crc root initialization failed\n");
+ ret = -EIO;
+ goto close_out;
+ }
+
+ ret = fill_csum_tree(trans, info->csum_root,
+ init_extent_tree);
+ if (ret) {
+ fprintf(stderr, "crc refilling failed\n");
+ return -EIO;
+ }
+ }
+ /*
+ * Ok now we commit and run the normal fsck, which will add
+ * extent entries for all of the items it finds.
+ */
+ ret = btrfs_commit_transaction(trans, info->extent_root);
+ if (ret)
+ goto close_out;
+ }
+ if (!extent_buffer_uptodate(info->extent_root->node)) {
+ fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
+ ret = -EIO;
+ goto close_out;
+ }
+ if (!extent_buffer_uptodate(info->csum_root->node)) {
+ fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
+ ret = -EIO;
+ goto close_out;
+ }
+
+ if (!ctx.progress_enabled)
+ fprintf(stderr, "checking extents\n");
+ ret = check_chunks_and_extents(root);
+ if (ret)
+ fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
+
+ ret = repair_root_items(info);
+ if (ret < 0)
+ goto close_out;
+ if (repair) {
+ fprintf(stderr, "Fixed %d roots.\n", ret);
+ ret = 0;
+ } else if (ret > 0) {
+ fprintf(stderr,
+ "Found %d roots with an outdated root item.\n",
+ ret);
+ fprintf(stderr,
+ "Please run a filesystem check with the option --repair to fix them.\n");
+ ret = 1;
+ goto close_out;
+ }
+
+ if (!ctx.progress_enabled) {
+ if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
+ fprintf(stderr, "checking free space tree\n");
+ else
+ fprintf(stderr, "checking free space cache\n");
+ }
+ ret = check_space_cache(root);
+ if (ret)
+ goto out;
+
+ /*
+ * We used to have to have these hole extents in between our real
+ * extents so if we don't have this flag set we need to make sure there
+ * are no gaps in the file extents for inodes, otherwise we can just
+ * ignore it when this happens.
+ */
+ no_holes = btrfs_fs_incompat(root->fs_info,
+ BTRFS_FEATURE_INCOMPAT_NO_HOLES);
+ if (!ctx.progress_enabled)
+ fprintf(stderr, "checking fs roots\n");
+ ret = check_fs_roots(root, &root_cache);
+ if (ret)
+ goto out;
+
+ fprintf(stderr, "checking csums\n");
+ ret = check_csums(root);
+ if (ret)
+ goto out;
+
+ fprintf(stderr, "checking root refs\n");
+ ret = check_root_refs(root, &root_cache);
+ if (ret)
+ goto out;
+
+ while (repair && !list_empty(&root->fs_info->recow_ebs)) {
+ struct extent_buffer *eb;
+
+ eb = list_first_entry(&root->fs_info->recow_ebs,
+ struct extent_buffer, recow);
+ list_del_init(&eb->recow);
+ ret = recow_extent_buffer(root, eb);
+ if (ret)
+ break;
+ }
+
+ while (!list_empty(&delete_items)) {
+ struct bad_item *bad;
+
+ bad = list_first_entry(&delete_items, struct bad_item, list);
+ list_del_init(&bad->list);
+ if (repair)
+ ret = delete_bad_item(root, bad);
+ free(bad);
+ }
+
+ if (info->quota_enabled) {
+ int err;
+ fprintf(stderr, "checking quota groups\n");
+ err = qgroup_verify_all(info);
+ if (err)
+ goto out;
+ }
+
+ if (!list_empty(&root->fs_info->recow_ebs)) {
+ fprintf(stderr, "Transid errors in file system\n");
+ ret = 1;
+ }
+out:
+ print_qgroup_report(0);
+ if (found_old_backref) { /*
+ * there was a disk format change when mixed
+ * backref was in testing tree. The old format
+ * existed about one week.
+ */
+ printf("\n * Found old mixed backref format. "
+ "The old format is not supported! *"
+ "\n * Please mount the FS in readonly mode, "
+ "backup data and re-format the FS. *\n\n");
+ ret = 1;
+ }
+ printf("found %llu bytes used err is %d\n",
+ (unsigned long long)bytes_used, ret);
+ printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
+ printf("total tree bytes: %llu\n",
+ (unsigned long long)total_btree_bytes);
+ printf("total fs tree bytes: %llu\n",
+ (unsigned long long)total_fs_tree_bytes);
+ printf("total extent tree bytes: %llu\n",
+ (unsigned long long)total_extent_tree_bytes);
+ printf("btree space waste bytes: %llu\n",
+ (unsigned long long)btree_space_waste);
+ printf("file data blocks allocated: %llu\n referenced %llu\n",
+ (unsigned long long)data_bytes_allocated,
+ (unsigned long long)data_bytes_referenced);
+
+ free_root_recs_tree(&root_cache);
+close_out:
+ close_ctree(root);
+err_out:
+ if (ctx.progress_enabled)
+ task_deinit(ctx.info);
+
+ return ret;
+}
diff --git a/cmds-device.c b/cmds-device.c
new file mode 100644
index 00000000..50c1c5df
--- /dev/null
+++ b/cmds-device.c
@@ -0,0 +1,543 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <getopt.h>
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "ioctl.h"
+#include "utils.h"
+#include "volumes.h"
+#include "cmds-fi-usage.h"
+
+#include "commands.h"
+
+static const char * const device_cmd_group_usage[] = {
+ "btrfs device <command> [<args>]",
+ NULL
+};
+
+static const char * const cmd_device_add_usage[] = {
+ "btrfs device add [options] <device> [<device>...] <path>",
+ "Add a device to a filesystem",
+ "-K|--nodiscard do not perform whole device TRIM",
+ "-f|--force force overwrite existing filesystem on the disk",
+ NULL
+};
+
+static int cmd_device_add(int argc, char **argv)
+{
+ char *mntpnt;
+ int i, fdmnt, ret = 0;
+ DIR *dirstream = NULL;
+ int discard = 1;
+ int force = 0;
+ int last_dev;
+
+ while (1) {
+ int c;
+ static const struct option long_options[] = {
+ { "nodiscard", optional_argument, NULL, 'K'},
+ { "force", no_argument, NULL, 'f'},
+ { NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long(argc, argv, "Kf", long_options, NULL);
+ if (c < 0)
+ break;
+ switch (c) {
+ case 'K':
+ discard = 0;
+ break;
+ case 'f':
+ force = 1;
+ break;
+ default:
+ usage(cmd_device_add_usage);
+ }
+ }
+
+ if (check_argc_min(argc - optind, 2))
+ usage(cmd_device_add_usage);
+
+ last_dev = argc - 1;
+ mntpnt = argv[last_dev];
+
+ fdmnt = btrfs_open_dir(mntpnt, &dirstream, 1);
+ if (fdmnt < 0)
+ return 1;
+
+ for (i = optind; i < last_dev; i++){
+ struct btrfs_ioctl_vol_args ioctl_args;
+ int devfd, res;
+ u64 dev_block_count = 0;
+ char *path;
+
+ res = test_dev_for_mkfs(argv[i], force);
+ if (res) {
+ ret++;
+ continue;
+ }
+
+ devfd = open(argv[i], O_RDWR);
+ if (devfd < 0) {
+ error("unable to open device '%s'", argv[i]);
+ ret++;
+ continue;
+ }
+
+ res = btrfs_prepare_device(devfd, argv[i], 1, &dev_block_count,
+ 0, discard);
+ close(devfd);
+ if (res) {
+ ret++;
+ goto error_out;
+ }
+
+ path = canonicalize_path(argv[i]);
+ if (!path) {
+ error("could not canonicalize pathname '%s': %s",
+ argv[i], strerror(errno));
+ ret++;
+ goto error_out;
+ }
+
+ memset(&ioctl_args, 0, sizeof(ioctl_args));
+ strncpy_null(ioctl_args.name, path);
+ res = ioctl(fdmnt, BTRFS_IOC_ADD_DEV, &ioctl_args);
+ if (res < 0) {
+ error("error adding device '%s': %s",
+ path, strerror(errno));
+ ret++;
+ }
+ free(path);
+ }
+
+error_out:
+ close_file_or_dir(fdmnt, dirstream);
+ return !!ret;
+}
+
+static int _cmd_device_remove(int argc, char **argv,
+ const char * const *usagestr)
+{
+ char *mntpnt;
+ int i, fdmnt, ret = 0;
+ DIR *dirstream = NULL;
+
+ if (check_argc_min(argc, 3))
+ usage(usagestr);
+
+ mntpnt = argv[argc - 1];
+
+ fdmnt = btrfs_open_dir(mntpnt, &dirstream, 1);
+ if (fdmnt < 0)
+ return 1;
+
+ for(i=1 ; i < argc - 1; i++ ){
+ struct btrfs_ioctl_vol_args arg;
+ int res;
+
+ if (is_block_device(argv[i]) != 1 && strcmp(argv[i], "missing")) {
+ error("not a block device: %s", argv[i]);
+ ret++;
+ continue;
+ }
+ memset(&arg, 0, sizeof(arg));
+ strncpy_null(arg.name, argv[i]);
+ /*
+ * Positive values are from BTRFS_ERROR_DEV_*,
+ * otherwise it's a generic error, one of errnos
+ */
+ res = ioctl(fdmnt, BTRFS_IOC_RM_DEV, &arg);
+ if (res) {
+ const char *msg;
+
+ if (res > 0)
+ msg = btrfs_err_str(res);
+ else
+ msg = strerror(errno);
+ error("error removing device '%s': %s",
+ argv[i], msg);
+ ret++;
+ }
+ }
+
+ close_file_or_dir(fdmnt, dirstream);
+ return !!ret;
+}
+
+static const char * const cmd_device_remove_usage[] = {
+ "btrfs device remove <device> [<device>...] <path>",
+ "Remove a device from a filesystem",
+ NULL
+};
+
+static int cmd_device_remove(int argc, char **argv)
+{
+ return _cmd_device_remove(argc, argv, cmd_device_remove_usage);
+}
+
+static const char * const cmd_device_delete_usage[] = {
+ "btrfs device delete <device> [<device>...] <path>",
+ "Remove a device from a filesystem",
+ NULL
+};
+
+static int cmd_device_delete(int argc, char **argv)
+{
+ return _cmd_device_remove(argc, argv, cmd_device_delete_usage);
+}
+
+static const char * const cmd_device_scan_usage[] = {
+ "btrfs device scan [(-d|--all-devices)|<device> [<device>...]]",
+ "Scan devices for a btrfs filesystem",
+ " -d|--all-devices (deprecated)",
+ NULL
+};
+
+static int cmd_device_scan(int argc, char **argv)
+{
+ int i;
+ int devstart = 1;
+ int all = 0;
+ int ret = 0;
+
+ optind = 1;
+ while (1) {
+ int c;
+ static const struct option long_options[] = {
+ { "all-devices", no_argument, NULL, 'd'},
+ { NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long(argc, argv, "d", long_options, NULL);
+ if (c < 0)
+ break;
+ switch (c) {
+ case 'd':
+ all = 1;
+ break;
+ default:
+ usage(cmd_device_scan_usage);
+ }
+ }
+
+ if (all && check_argc_max(argc, 2))
+ usage(cmd_device_scan_usage);
+
+ if (all || argc == 1) {
+ printf("Scanning for Btrfs filesystems\n");
+ ret = btrfs_scan_lblkid();
+ error_on(ret, "error %d while scanning", ret);
+ ret = btrfs_register_all_devices();
+ error_on(ret, "error %d while registering devices", ret);
+ goto out;
+ }
+
+ for( i = devstart ; i < argc ; i++ ){
+ char *path;
+
+ if (is_block_device(argv[i]) != 1) {
+ error("not a block device: %s", argv[i]);
+ ret = 1;
+ goto out;
+ }
+ path = canonicalize_path(argv[i]);
+ if (!path) {
+ error("could not canonicalize path '%s': %s",
+ argv[i], strerror(errno));
+ ret = 1;
+ goto out;
+ }
+ printf("Scanning for Btrfs filesystems in '%s'\n", path);
+ if (btrfs_register_one_device(path) != 0) {
+ ret = 1;
+ free(path);
+ goto out;
+ }
+ free(path);
+ }
+
+out:
+ return !!ret;
+}
+
+static const char * const cmd_device_ready_usage[] = {
+ "btrfs device ready <device>",
+ "Check device to see if it has all of its devices in cache for mounting",
+ NULL
+};
+
+static int cmd_device_ready(int argc, char **argv)
+{
+ struct btrfs_ioctl_vol_args args;
+ int fd;
+ int ret;
+ char *path;
+
+ if (check_argc_min(argc, 2))
+ usage(cmd_device_ready_usage);
+
+ fd = open("/dev/btrfs-control", O_RDWR);
+ if (fd < 0) {
+ perror("failed to open /dev/btrfs-control");
+ return 1;
+ }
+
+ path = canonicalize_path(argv[argc - 1]);
+ if (!path) {
+ error("could not canonicalize pathname '%s': %s",
+ argv[argc - 1], strerror(errno));
+ ret = 1;
+ goto out;
+ }
+
+ if (is_block_device(path) != 1) {
+ error("not a block device: %s", path);
+ ret = 1;
+ goto out;
+ }
+
+ memset(&args, 0, sizeof(args));
+ strncpy_null(args.name, path);
+ ret = ioctl(fd, BTRFS_IOC_DEVICES_READY, &args);
+ if (ret < 0) {
+ error("unable to determine if device '%s' is ready for mount: %s",
+ path, strerror(errno));
+ ret = 1;
+ }
+
+out:
+ free(path);
+ close(fd);
+ return ret;
+}
+
+static const char * const cmd_device_stats_usage[] = {
+ "btrfs device stats [-z] <path>|<device>",
+ "Show current device IO stats.",
+ "",
+ "-z show current stats and reset values to zero",
+ NULL
+};
+
+static int cmd_device_stats(int argc, char **argv)
+{
+ char *dev_path;
+ struct btrfs_ioctl_fs_info_args fi_args;
+ struct btrfs_ioctl_dev_info_args *di_args = NULL;
+ int ret;
+ int fdmnt;
+ int i;
+ int c;
+ int err = 0;
+ __u64 flags = 0;
+ DIR *dirstream = NULL;
+
+ optind = 1;
+ while ((c = getopt(argc, argv, "z")) != -1) {
+ switch (c) {
+ case 'z':
+ flags = BTRFS_DEV_STATS_RESET;
+ break;
+ case '?':
+ default:
+ usage(cmd_device_stats_usage);
+ }
+ }
+
+ argc = argc - optind;
+ if (check_argc_exact(argc, 1))
+ usage(cmd_device_stats_usage);
+
+ dev_path = argv[optind];
+
+ fdmnt = open_path_or_dev_mnt(dev_path, &dirstream, 1);
+ if (fdmnt < 0)
+ return 1;
+
+ ret = get_fs_info(dev_path, &fi_args, &di_args);
+ if (ret) {
+ error("getting dev info for devstats failed: %s",
+ strerror(-ret));
+ err = 1;
+ goto out;
+ }
+ if (!fi_args.num_devices) {
+ error("no devices found");
+ err = 1;
+ goto out;
+ }
+
+ for (i = 0; i < fi_args.num_devices; i++) {
+ struct btrfs_ioctl_get_dev_stats args = {0};
+ __u8 path[BTRFS_DEVICE_PATH_NAME_MAX + 1];
+
+ strncpy((char *)path, (char *)di_args[i].path,
+ BTRFS_DEVICE_PATH_NAME_MAX);
+ path[BTRFS_DEVICE_PATH_NAME_MAX] = '\0';
+
+ args.devid = di_args[i].devid;
+ args.nr_items = BTRFS_DEV_STAT_VALUES_MAX;
+ args.flags = flags;
+
+ if (ioctl(fdmnt, BTRFS_IOC_GET_DEV_STATS, &args) < 0) {
+ error("DEV_STATS ioctl failed on %s: %s",
+ path, strerror(errno));
+ err = 1;
+ } else {
+ char *canonical_path;
+
+ canonical_path = canonicalize_path((char *)path);
+
+ if (args.nr_items >= BTRFS_DEV_STAT_WRITE_ERRS + 1)
+ printf("[%s].write_io_errs %llu\n",
+ canonical_path,
+ (unsigned long long) args.values[
+ BTRFS_DEV_STAT_WRITE_ERRS]);
+ if (args.nr_items >= BTRFS_DEV_STAT_READ_ERRS + 1)
+ printf("[%s].read_io_errs %llu\n",
+ canonical_path,
+ (unsigned long long) args.values[
+ BTRFS_DEV_STAT_READ_ERRS]);
+ if (args.nr_items >= BTRFS_DEV_STAT_FLUSH_ERRS + 1)
+ printf("[%s].flush_io_errs %llu\n",
+ canonical_path,
+ (unsigned long long) args.values[
+ BTRFS_DEV_STAT_FLUSH_ERRS]);
+ if (args.nr_items >= BTRFS_DEV_STAT_CORRUPTION_ERRS + 1)
+ printf("[%s].corruption_errs %llu\n",
+ canonical_path,
+ (unsigned long long) args.values[
+ BTRFS_DEV_STAT_CORRUPTION_ERRS]);
+ if (args.nr_items >= BTRFS_DEV_STAT_GENERATION_ERRS + 1)
+ printf("[%s].generation_errs %llu\n",
+ canonical_path,
+ (unsigned long long) args.values[
+ BTRFS_DEV_STAT_GENERATION_ERRS]);
+
+ free(canonical_path);
+ }
+ }
+
+out:
+ free(di_args);
+ close_file_or_dir(fdmnt, dirstream);
+
+ return err;
+}
+
+static const char * const cmd_device_usage_usage[] = {
+ "btrfs device usage [options] <path> [<path>..]",
+ "Show detailed information about internal allocations in devices.",
+ HELPINFO_UNITS_SHORT_LONG,
+ NULL
+};
+
+static int _cmd_device_usage(int fd, char *path, unsigned unit_mode)
+{
+ int i;
+ int ret = 0;
+ struct chunk_info *chunkinfo = NULL;
+ struct device_info *devinfo = NULL;
+ int chunkcount = 0;
+ int devcount = 0;
+
+ ret = load_chunk_and_device_info(fd, &chunkinfo, &chunkcount, &devinfo,
+ &devcount);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < devcount; i++) {
+ printf("%s, ID: %llu\n", devinfo[i].path, devinfo[i].devid);
+ print_device_sizes(fd, &devinfo[i], unit_mode);
+ print_device_chunks(fd, &devinfo[i], chunkinfo, chunkcount,
+ unit_mode);
+ printf("\n");
+ }
+
+out:
+ free(devinfo);
+ free(chunkinfo);
+
+ return ret;
+}
+
+static int cmd_device_usage(int argc, char **argv)
+{
+ unsigned unit_mode;
+ int ret = 0;
+ int i;
+
+ unit_mode = get_unit_mode_from_arg(&argc, argv, 1);
+
+ if (check_argc_min(argc, 2) || argv[1][0] == '-')
+ usage(cmd_device_usage_usage);
+
+ for (i = 1; i < argc; i++) {
+ int fd;
+ DIR *dirstream = NULL;
+
+ if (i > 1)
+ printf("\n");
+
+ fd = btrfs_open_dir(argv[i], &dirstream, 1);
+ if (fd < 0) {
+ ret = 1;
+ break;
+ }
+
+ ret = _cmd_device_usage(fd, argv[i], unit_mode);
+ close_file_or_dir(fd, dirstream);
+
+ if (ret)
+ break;
+ }
+
+ return !!ret;
+}
+
+static const char device_cmd_group_info[] =
+"manage and query devices in the filesystem";
+
+const struct cmd_group device_cmd_group = {
+ device_cmd_group_usage, device_cmd_group_info, {
+ { "add", cmd_device_add, cmd_device_add_usage, NULL, 0 },
+ { "delete", cmd_device_delete, cmd_device_delete_usage, NULL,
+ CMD_ALIAS },
+ { "remove", cmd_device_remove, cmd_device_remove_usage, NULL, 0 },
+ { "scan", cmd_device_scan, cmd_device_scan_usage, NULL, 0 },
+ { "ready", cmd_device_ready, cmd_device_ready_usage, NULL, 0 },
+ { "stats", cmd_device_stats, cmd_device_stats_usage, NULL, 0 },
+ { "usage", cmd_device_usage,
+ cmd_device_usage_usage, NULL, 0 },
+ NULL_CMD_STRUCT
+ }
+};
+
+int cmd_device(int argc, char **argv)
+{
+ return handle_command_group(&device_cmd_group, argc, argv);
+}
diff --git a/cmds-fi-usage.c b/cmds-fi-usage.c
new file mode 100644
index 00000000..33bf403a
--- /dev/null
+++ b/cmds-fi-usage.c
@@ -0,0 +1,1036 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <getopt.h>
+
+#include "utils.h"
+#include "kerncompat.h"
+#include "ctree.h"
+#include "string-table.h"
+#include "cmds-fi-usage.h"
+#include "commands.h"
+
+#include "version.h"
+
+/*
+ * Add the chunk info to the chunk_info list
+ */
+static int add_info_to_list(struct chunk_info **info_ptr,
+ int *info_count,
+ struct btrfs_chunk *chunk)
+{
+
+ u64 type = btrfs_stack_chunk_type(chunk);
+ u64 size = btrfs_stack_chunk_length(chunk);
+ int num_stripes = btrfs_stack_chunk_num_stripes(chunk);
+ int j;
+
+ for (j = 0 ; j < num_stripes ; j++) {
+ int i;
+ struct chunk_info *p = NULL;
+ struct btrfs_stripe *stripe;
+ u64 devid;
+
+ stripe = btrfs_stripe_nr(chunk, j);
+ devid = btrfs_stack_stripe_devid(stripe);
+
+ for (i = 0 ; i < *info_count ; i++)
+ if ((*info_ptr)[i].type == type &&
+ (*info_ptr)[i].devid == devid &&
+ (*info_ptr)[i].num_stripes == num_stripes ) {
+ p = (*info_ptr) + i;
+ break;
+ }
+
+ if (!p) {
+ int tmp = sizeof(struct btrfs_chunk) * (*info_count + 1);
+ struct chunk_info *res = realloc(*info_ptr, tmp);
+
+ if (!res) {
+ free(*info_ptr);
+ error("not enough memory");
+ return -ENOMEM;
+ }
+
+ *info_ptr = res;
+ p = res + *info_count;
+ (*info_count)++;
+
+ p->devid = devid;
+ p->type = type;
+ p->size = 0;
+ p->num_stripes = num_stripes;
+ }
+
+ p->size += size;
+
+ }
+
+ return 0;
+
+}
+
+/*
+ * Helper to sort the chunk type
+ */
+static int cmp_chunk_block_group(u64 f1, u64 f2)
+{
+
+ u64 mask;
+
+ if ((f1 & BTRFS_BLOCK_GROUP_TYPE_MASK) ==
+ (f2 & BTRFS_BLOCK_GROUP_TYPE_MASK))
+ mask = BTRFS_BLOCK_GROUP_PROFILE_MASK;
+ else if (f2 & BTRFS_BLOCK_GROUP_SYSTEM)
+ return -1;
+ else if (f1 & BTRFS_BLOCK_GROUP_SYSTEM)
+ return +1;
+ else
+ mask = BTRFS_BLOCK_GROUP_TYPE_MASK;
+
+ if ((f1 & mask) > (f2 & mask))
+ return +1;
+ else if ((f1 & mask) < (f2 & mask))
+ return -1;
+ else
+ return 0;
+}
+
+/*
+ * Helper to sort the chunk
+ */
+static int cmp_chunk_info(const void *a, const void *b)
+{
+ return cmp_chunk_block_group(
+ ((struct chunk_info *)a)->type,
+ ((struct chunk_info *)b)->type);
+}
+
+static int load_chunk_info(int fd, struct chunk_info **info_ptr, int *info_count)
+{
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ struct btrfs_ioctl_search_header *sh;
+ unsigned long off = 0;
+ int i, e;
+
+ memset(&args, 0, sizeof(args));
+
+ /*
+ * there may be more than one ROOT_ITEM key if there are
+ * snapshots pending deletion, we have to loop through
+ * them.
+ */
+ sk->tree_id = BTRFS_CHUNK_TREE_OBJECTID;
+
+ sk->min_objectid = 0;
+ sk->max_objectid = (u64)-1;
+ sk->max_type = 0;
+ sk->min_type = (u8)-1;
+ sk->min_offset = 0;
+ sk->max_offset = (u64)-1;
+ sk->min_transid = 0;
+ sk->max_transid = (u64)-1;
+ sk->nr_items = 4096;
+
+ while (1) {
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ e = errno;
+ if (e == EPERM)
+ return -e;
+
+ if (ret < 0) {
+ error("cannot look up chunk tree info: %s",
+ strerror(e));
+ return 1;
+ }
+ /* the ioctl returns the number of item it found in nr_items */
+
+ if (sk->nr_items == 0)
+ break;
+
+ off = 0;
+ for (i = 0; i < sk->nr_items; i++) {
+ struct btrfs_chunk *item;
+ sh = (struct btrfs_ioctl_search_header *)(args.buf +
+ off);
+
+ off += sizeof(*sh);
+ item = (struct btrfs_chunk *)(args.buf + off);
+
+ ret = add_info_to_list(info_ptr, info_count, item);
+ if (ret) {
+ *info_ptr = NULL;
+ return 1;
+ }
+
+ off += sh->len;
+
+ sk->min_objectid = sh->objectid;
+ sk->min_type = sh->type;
+ sk->min_offset = sh->offset+1;
+
+ }
+ if (!sk->min_offset) /* overflow */
+ sk->min_type++;
+ else
+ continue;
+
+ if (!sk->min_type)
+ sk->min_objectid++;
+ else
+ continue;
+
+ if (!sk->min_objectid)
+ break;
+ }
+
+ qsort(*info_ptr, *info_count, sizeof(struct chunk_info),
+ cmp_chunk_info);
+
+ return 0;
+}
+
+/*
+ * Helper to sort the struct btrfs_ioctl_space_info
+ */
+static int cmp_btrfs_ioctl_space_info(const void *a, const void *b)
+{
+ return cmp_chunk_block_group(
+ ((struct btrfs_ioctl_space_info *)a)->flags,
+ ((struct btrfs_ioctl_space_info *)b)->flags);
+}
+
+/*
+ * This function load all the information about the space usage
+ */
+static struct btrfs_ioctl_space_args *load_space_info(int fd, char *path)
+{
+ struct btrfs_ioctl_space_args *sargs = NULL, *sargs_orig = NULL;
+ int ret, count;
+
+ sargs_orig = sargs = calloc(1, sizeof(struct btrfs_ioctl_space_args));
+ if (!sargs) {
+ error("not enough memory");
+ return NULL;
+ }
+
+ sargs->space_slots = 0;
+ sargs->total_spaces = 0;
+
+ ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, sargs);
+ if (ret < 0) {
+ error("cannot get space info on '%s': %s", path,
+ strerror(errno));
+ free(sargs);
+ return NULL;
+ }
+ if (!sargs->total_spaces) {
+ free(sargs);
+ printf("No chunks found\n");
+ return NULL;
+ }
+
+ count = sargs->total_spaces;
+
+ sargs = realloc(sargs, sizeof(struct btrfs_ioctl_space_args) +
+ (count * sizeof(struct btrfs_ioctl_space_info)));
+ if (!sargs) {
+ free(sargs_orig);
+ error("not enough memory");
+ return NULL;
+ }
+
+ sargs->space_slots = count;
+ sargs->total_spaces = 0;
+
+ ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, sargs);
+ if (ret < 0) {
+ error("cannot get space info with %u slots: %s",
+ count, strerror(errno));
+ free(sargs);
+ return NULL;
+ }
+
+ qsort(&(sargs->spaces), count, sizeof(struct btrfs_ioctl_space_info),
+ cmp_btrfs_ioctl_space_info);
+
+ return sargs;
+}
+
+/*
+ * This function computes the space occuped by a *single* RAID5/RAID6 chunk.
+ * The computation is performed on the basis of the number of stripes
+ * which compose the chunk, which could be different from the number of devices
+ * if a disk is added later.
+ */
+static void get_raid56_used(int fd, struct chunk_info *chunks, int chunkcount,
+ u64 *raid5_used, u64 *raid6_used)
+{
+ struct chunk_info *info_ptr = chunks;
+ *raid5_used = 0;
+ *raid6_used = 0;
+
+ while (chunkcount-- > 0) {
+ if (info_ptr->type & BTRFS_BLOCK_GROUP_RAID5)
+ (*raid5_used) += info_ptr->size / (info_ptr->num_stripes - 1);
+ if (info_ptr->type & BTRFS_BLOCK_GROUP_RAID6)
+ (*raid6_used) += info_ptr->size / (info_ptr->num_stripes - 2);
+ info_ptr++;
+ }
+}
+
+#define MIN_UNALOCATED_THRESH (16 * 1024 * 1024)
+static int print_filesystem_usage_overall(int fd, struct chunk_info *chunkinfo,
+ int chunkcount, struct device_info *devinfo, int devcount,
+ char *path, unsigned unit_mode)
+{
+ struct btrfs_ioctl_space_args *sargs = NULL;
+ int i;
+ int ret = 0;
+ int width = 10; /* default 10 for human units */
+ /*
+ * r_* prefix is for raw data
+ * l_* is for logical
+ */
+ u64 r_total_size = 0; /* filesystem size, sum of device sizes */
+ u64 r_total_chunks = 0; /* sum of chunks sizes on disk(s) */
+ u64 r_total_used = 0;
+ u64 r_total_unused = 0;
+ u64 r_total_missing = 0; /* sum of missing devices size */
+ u64 r_data_used = 0;
+ u64 r_data_chunks = 0;
+ u64 l_data_chunks = 0;
+ u64 r_metadata_used = 0;
+ u64 r_metadata_chunks = 0;
+ u64 l_metadata_chunks = 0;
+ u64 r_system_used = 0;
+ u64 r_system_chunks = 0;
+ double data_ratio;
+ double metadata_ratio;
+ /* logical */
+ u64 raid5_used = 0;
+ u64 raid6_used = 0;
+ u64 l_global_reserve = 0;
+ u64 l_global_reserve_used = 0;
+ u64 free_estimated = 0;
+ u64 free_min = 0;
+ int max_data_ratio = 1;
+ int mixed = 0;
+
+ sargs = load_space_info(fd, path);
+ if (!sargs) {
+ ret = 1;
+ goto exit;
+ }
+
+ r_total_size = 0;
+ for (i = 0; i < devcount; i++) {
+ r_total_size += devinfo[i].size;
+ if (!devinfo[i].device_size)
+ r_total_missing += devinfo[i].size;
+ }
+
+ if (r_total_size == 0) {
+ error("cannot get space info on '%s': %s",
+ path, strerror(errno));
+
+ ret = 1;
+ goto exit;
+ }
+ get_raid56_used(fd, chunkinfo, chunkcount, &raid5_used, &raid6_used);
+
+ for (i = 0; i < sargs->total_spaces; i++) {
+ int ratio;
+ u64 flags = sargs->spaces[i].flags;
+
+ /*
+ * The raid5/raid6 ratio depends by the stripes number
+ * used by every chunk. It is computed separately
+ */
+ if (flags & BTRFS_BLOCK_GROUP_RAID0)
+ ratio = 1;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID1)
+ ratio = 2;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID5)
+ ratio = 0;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID6)
+ ratio = 0;
+ else if (flags & BTRFS_BLOCK_GROUP_DUP)
+ ratio = 2;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID10)
+ ratio = 2;
+ else
+ ratio = 1;
+
+ if (!ratio)
+ warning("RAID56 detected, not implemented");
+
+ if (ratio > max_data_ratio)
+ max_data_ratio = ratio;
+
+ if (flags & BTRFS_SPACE_INFO_GLOBAL_RSV) {
+ l_global_reserve = sargs->spaces[i].total_bytes;
+ l_global_reserve_used = sargs->spaces[i].used_bytes;
+ }
+ if ((flags & (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA))
+ == (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA)) {
+ mixed = 1;
+ }
+ if (flags & BTRFS_BLOCK_GROUP_DATA) {
+ r_data_used += sargs->spaces[i].used_bytes * ratio;
+ r_data_chunks += sargs->spaces[i].total_bytes * ratio;
+ l_data_chunks += sargs->spaces[i].total_bytes;
+ }
+ if (flags & BTRFS_BLOCK_GROUP_METADATA) {
+ r_metadata_used += sargs->spaces[i].used_bytes * ratio;
+ r_metadata_chunks += sargs->spaces[i].total_bytes * ratio;
+ l_metadata_chunks += sargs->spaces[i].total_bytes;
+ }
+ if (flags & BTRFS_BLOCK_GROUP_SYSTEM) {
+ r_system_used += sargs->spaces[i].used_bytes * ratio;
+ r_system_chunks += sargs->spaces[i].total_bytes * ratio;
+ }
+ }
+
+ r_total_chunks = r_data_chunks + r_system_chunks;
+ r_total_used = r_data_used + r_system_used;
+ if (!mixed) {
+ r_total_chunks += r_metadata_chunks;
+ r_total_used += r_metadata_used;
+ }
+ r_total_unused = r_total_size - r_total_chunks;
+
+ /* Raw / Logical = raid factor, >= 1 */
+ data_ratio = (double)r_data_chunks / l_data_chunks;
+ if (mixed)
+ metadata_ratio = data_ratio;
+ else
+ metadata_ratio = (double)r_metadata_chunks / l_metadata_chunks;
+
+#if 0
+ /* add the raid5/6 allocated space */
+ total_chunks += raid5_used + raid6_used;
+#endif
+
+ /*
+ * We're able to fill at least DATA for the unused space
+ *
+ * With mixed raid levels, this gives a rough estimate but more
+ * accurate than just counting the logical free space
+ * (l_data_chunks - l_data_used)
+ *
+ * In non-mixed case there's no difference.
+ */
+ free_estimated = (r_data_chunks - r_data_used) / data_ratio;
+ /*
+ * For mixed-bg the metadata are left out in calculations thus global
+ * reserve would be lost. Part of it could be permanently allocated,
+ * we have to subtract the used bytes so we don't go under zero free.
+ */
+ if (mixed)
+ free_estimated -= l_global_reserve - l_global_reserve_used;
+ free_min = free_estimated;
+
+ /* Chop unallocatable space */
+ /* FIXME: must be applied per device */
+ if (r_total_unused >= MIN_UNALOCATED_THRESH) {
+ free_estimated += r_total_unused / data_ratio;
+ /* Match the calculation of 'df', use the highest raid ratio */
+ free_min += r_total_unused / max_data_ratio;
+ }
+
+ if (unit_mode != UNITS_HUMAN)
+ width = 18;
+
+ printf("Overall:\n");
+
+ printf(" Device size:\t\t%*s\n", width,
+ pretty_size_mode(r_total_size, unit_mode));
+ printf(" Device allocated:\t\t%*s\n", width,
+ pretty_size_mode(r_total_chunks, unit_mode));
+ printf(" Device unallocated:\t\t%*s\n", width,
+ pretty_size_mode(r_total_unused, unit_mode));
+ printf(" Device missing:\t\t%*s\n", width,
+ pretty_size_mode(r_total_missing, unit_mode));
+ printf(" Used:\t\t\t%*s\n", width,
+ pretty_size_mode(r_total_used, unit_mode));
+ printf(" Free (estimated):\t\t%*s\t(",
+ width,
+ pretty_size_mode(free_estimated, unit_mode));
+ printf("min: %s)\n", pretty_size_mode(free_min, unit_mode));
+ printf(" Data ratio:\t\t\t%*.2f\n",
+ width, data_ratio);
+ printf(" Metadata ratio:\t\t%*.2f\n",
+ width, metadata_ratio);
+ printf(" Global reserve:\t\t%*s\t(used: %s)\n", width,
+ pretty_size_mode(l_global_reserve, unit_mode),
+ pretty_size_mode(l_global_reserve_used, unit_mode));
+
+exit:
+
+ if (sargs)
+ free(sargs);
+
+ return ret;
+}
+
+/*
+ * Helper to sort the device_info structure
+ */
+static int cmp_device_info(const void *a, const void *b)
+{
+ return strcmp(((struct device_info *)a)->path,
+ ((struct device_info *)b)->path);
+}
+
+/*
+ * This function loads the device_info structure and put them in an array
+ */
+static int load_device_info(int fd, struct device_info **device_info_ptr,
+ int *device_info_count)
+{
+ int ret, i, ndevs;
+ struct btrfs_ioctl_fs_info_args fi_args;
+ struct btrfs_ioctl_dev_info_args dev_info;
+ struct device_info *info;
+
+ *device_info_count = 0;
+ *device_info_ptr = NULL;
+
+ ret = ioctl(fd, BTRFS_IOC_FS_INFO, &fi_args);
+ if (ret < 0) {
+ if (errno == EPERM)
+ return -errno;
+ error("cannot get filesystem info: %s",
+ strerror(errno));
+ return 1;
+ }
+
+ info = calloc(fi_args.num_devices, sizeof(struct device_info));
+ if (!info) {
+ error("not enough memory");
+ return 1;
+ }
+
+ for (i = 0, ndevs = 0 ; i <= fi_args.max_id ; i++) {
+ BUG_ON(ndevs >= fi_args.num_devices);
+ memset(&dev_info, 0, sizeof(dev_info));
+ ret = get_device_info(fd, i, &dev_info);
+
+ if (ret == -ENODEV)
+ continue;
+ if (ret) {
+ error("cannot get info about device devid=%d", i);
+ free(info);
+ return ret;
+ }
+
+ info[ndevs].devid = dev_info.devid;
+ if (!dev_info.path[0]) {
+ strcpy(info[ndevs].path, "missing");
+ } else {
+ strcpy(info[ndevs].path, (char *)dev_info.path);
+ info[ndevs].device_size =
+ get_partition_size((char *)dev_info.path);
+ }
+ info[ndevs].size = dev_info.total_bytes;
+ ++ndevs;
+ }
+
+ BUG_ON(ndevs != fi_args.num_devices);
+ qsort(info, fi_args.num_devices,
+ sizeof(struct device_info), cmp_device_info);
+
+ *device_info_count = fi_args.num_devices;
+ *device_info_ptr = info;
+
+ return 0;
+}
+
+int load_chunk_and_device_info(int fd, struct chunk_info **chunkinfo,
+ int *chunkcount, struct device_info **devinfo, int *devcount)
+{
+ int ret;
+
+ ret = load_chunk_info(fd, chunkinfo, chunkcount);
+ if (ret == -EPERM) {
+ warning(
+"cannot read detailed chunk info, RAID5/6 numbers will be incorrect, run as root");
+ } else if (ret) {
+ return ret;
+ }
+
+ ret = load_device_info(fd, devinfo, devcount);
+ if (ret == -EPERM) {
+ warning(
+ "cannot get filesystem info from ioctl(FS_INFO), run as root");
+ ret = 0;
+ }
+
+ return ret;
+}
+
+/*
+ * This function computes the size of a chunk in a disk
+ */
+static u64 calc_chunk_size(struct chunk_info *ci)
+{
+ if (ci->type & BTRFS_BLOCK_GROUP_RAID0)
+ return ci->size / ci->num_stripes;
+ else if (ci->type & BTRFS_BLOCK_GROUP_RAID1)
+ return ci->size ;
+ else if (ci->type & BTRFS_BLOCK_GROUP_DUP)
+ return ci->size ;
+ else if (ci->type & BTRFS_BLOCK_GROUP_RAID5)
+ return ci->size / (ci->num_stripes -1);
+ else if (ci->type & BTRFS_BLOCK_GROUP_RAID6)
+ return ci->size / (ci->num_stripes -2);
+ else if (ci->type & BTRFS_BLOCK_GROUP_RAID10)
+ return ci->size / ci->num_stripes;
+ return ci->size;
+}
+
+/*
+ * This function print the results of the command "btrfs fi usage"
+ * in tabular format
+ */
+static void _cmd_filesystem_usage_tabular(unsigned unit_mode,
+ struct btrfs_ioctl_space_args *sargs,
+ struct chunk_info *chunks_info_ptr,
+ int chunks_info_count,
+ struct device_info *device_info_ptr,
+ int device_info_count)
+{
+ int i;
+ u64 total_unused = 0;
+ struct string_table *matrix = NULL;
+ int ncols, nrows;
+ int col;
+ int unallocated_col;
+ int spaceinfos_col;
+ const int vhdr_skip = 3; /* amount of vertical header space */
+
+ /* id, path, unallocated */
+ ncols = 3;
+ spaceinfos_col = 2;
+ /* Properly count the real space infos */
+ for (i = 0; i < sargs->total_spaces; i++) {
+ if (sargs->spaces[i].flags & BTRFS_SPACE_INFO_GLOBAL_RSV)
+ continue;
+ ncols++;
+ }
+
+ /* 2 for header, empty line, devices, ===, total, used */
+ nrows = vhdr_skip + device_info_count + 1 + 2;
+
+ matrix = table_create(ncols, nrows);
+ if (!matrix) {
+ error("not enough memory");
+ return;
+ }
+
+ /*
+ * We have to skip the global block reserve everywhere as it's an
+ * artificial blockgroup
+ */
+
+ /* header */
+ for (i = 0, col = spaceinfos_col; i < sargs->total_spaces; i++) {
+ u64 flags = sargs->spaces[i].flags;
+
+ if (flags & BTRFS_SPACE_INFO_GLOBAL_RSV)
+ continue;
+
+ table_printf(matrix, col, 0, "<%s",
+ btrfs_group_type_str(flags));
+ table_printf(matrix, col, 1, "<%s",
+ btrfs_group_profile_str(flags));
+ col++;
+ }
+ unallocated_col = col;
+
+ table_printf(matrix, 0, 1, "<Id");
+ table_printf(matrix, 1, 1, "<Path");
+ table_printf(matrix, unallocated_col, 1, "<Unallocated");
+
+ /* body */
+ for (i = 0; i < device_info_count; i++) {
+ int k;
+ char *p;
+
+ u64 total_allocated = 0, unused;
+
+ p = strrchr(device_info_ptr[i].path, '/');
+ if (!p)
+ p = device_info_ptr[i].path;
+ else
+ p++;
+
+ table_printf(matrix, 0, vhdr_skip + i, ">%llu",
+ device_info_ptr[i].devid);
+ table_printf(matrix, 1, vhdr_skip + i, "<%s",
+ device_info_ptr[i].path);
+
+ for (col = spaceinfos_col, k = 0; k < sargs->total_spaces; k++) {
+ u64 flags = sargs->spaces[k].flags;
+ u64 devid = device_info_ptr[i].devid;
+ int j;
+ u64 size = 0;
+
+ if (flags & BTRFS_SPACE_INFO_GLOBAL_RSV)
+ continue;
+
+ for (j = 0 ; j < chunks_info_count ; j++) {
+ if (chunks_info_ptr[j].type != flags )
+ continue;
+ if (chunks_info_ptr[j].devid != devid)
+ continue;
+
+ size += calc_chunk_size(chunks_info_ptr+j);
+ }
+
+ if (size)
+ table_printf(matrix, col, vhdr_skip+ i,
+ ">%s", pretty_size_mode(size, unit_mode));
+ else
+ table_printf(matrix, col, vhdr_skip + i, ">-");
+
+ total_allocated += size;
+ col++;
+ }
+
+ unused = get_partition_size(device_info_ptr[i].path)
+ - total_allocated;
+
+ table_printf(matrix, unallocated_col, vhdr_skip + i,
+ ">%s", pretty_size_mode(unused, unit_mode));
+ total_unused += unused;
+
+ }
+
+ for (i = 0; i < spaceinfos_col; i++) {
+ table_printf(matrix, i, vhdr_skip - 1, "*-");
+ table_printf(matrix, i, vhdr_skip + device_info_count, "*-");
+ }
+
+ for (i = 0, col = spaceinfos_col; i < sargs->total_spaces; i++) {
+ if (sargs->spaces[i].flags & BTRFS_SPACE_INFO_GLOBAL_RSV)
+ continue;
+
+ table_printf(matrix, col, vhdr_skip - 1, "*-");
+ table_printf(matrix, col, vhdr_skip + device_info_count, "*-");
+ col++;
+ }
+ /* One for Unallocated */
+ table_printf(matrix, col, vhdr_skip - 1, "*-");
+ table_printf(matrix, col, vhdr_skip + device_info_count, "*-");
+
+ /* footer */
+ table_printf(matrix, 1, vhdr_skip + device_info_count + 1, "<Total");
+ for (i = 0, col = spaceinfos_col; i < sargs->total_spaces; i++) {
+ if (sargs->spaces[i].flags & BTRFS_SPACE_INFO_GLOBAL_RSV)
+ continue;
+
+ table_printf(matrix, col++, vhdr_skip + device_info_count + 1,
+ ">%s",
+ pretty_size_mode(sargs->spaces[i].total_bytes, unit_mode));
+ }
+
+ table_printf(matrix, unallocated_col, vhdr_skip + device_info_count + 1,
+ ">%s", pretty_size_mode(total_unused, unit_mode));
+
+ table_printf(matrix, 1, vhdr_skip + device_info_count + 2, "<Used");
+ for (i = 0, col = spaceinfos_col; i < sargs->total_spaces; i++) {
+ if (sargs->spaces[i].flags & BTRFS_SPACE_INFO_GLOBAL_RSV)
+ continue;
+
+ table_printf(matrix, col++, vhdr_skip + device_info_count + 2,
+ ">%s",
+ pretty_size_mode(sargs->spaces[i].used_bytes, unit_mode));
+ }
+
+ table_dump(matrix);
+ table_free(matrix);
+}
+
+/*
+ * This function prints the unused space per every disk
+ */
+static void print_unused(struct chunk_info *info_ptr,
+ int info_count,
+ struct device_info *device_info_ptr,
+ int device_info_count,
+ unsigned unit_mode)
+{
+ int i;
+ for (i = 0; i < device_info_count; i++) {
+ int j;
+ u64 total = 0;
+
+ for (j = 0; j < info_count; j++)
+ if (info_ptr[j].devid == device_info_ptr[i].devid)
+ total += calc_chunk_size(info_ptr+j);
+
+ printf(" %s\t%10s\n",
+ device_info_ptr[i].path,
+ pretty_size_mode(device_info_ptr[i].size - total,
+ unit_mode));
+ }
+}
+
+/*
+ * This function prints the allocated chunk per every disk
+ */
+static void print_chunk_device(u64 chunk_type,
+ struct chunk_info *chunks_info_ptr,
+ int chunks_info_count,
+ struct device_info *device_info_ptr,
+ int device_info_count,
+ unsigned unit_mode)
+{
+ int i;
+
+ for (i = 0; i < device_info_count; i++) {
+ int j;
+ u64 total = 0;
+
+ for (j = 0; j < chunks_info_count; j++) {
+
+ if (chunks_info_ptr[j].type != chunk_type)
+ continue;
+ if (chunks_info_ptr[j].devid != device_info_ptr[i].devid)
+ continue;
+
+ total += calc_chunk_size(&(chunks_info_ptr[j]));
+ //total += chunks_info_ptr[j].size;
+ }
+
+ if (total > 0)
+ printf(" %s\t%10s\n",
+ device_info_ptr[i].path,
+ pretty_size_mode(total, unit_mode));
+ }
+}
+
+/*
+ * This function print the results of the command "btrfs fi usage"
+ * in linear format
+ */
+static void _cmd_filesystem_usage_linear(unsigned unit_mode,
+ struct btrfs_ioctl_space_args *sargs,
+ struct chunk_info *info_ptr,
+ int info_count,
+ struct device_info *device_info_ptr,
+ int device_info_count)
+{
+ int i;
+
+ for (i = 0; i < sargs->total_spaces; i++) {
+ const char *description;
+ const char *r_mode;
+ u64 flags = sargs->spaces[i].flags;
+
+ if (flags & BTRFS_SPACE_INFO_GLOBAL_RSV)
+ continue;
+
+ description = btrfs_group_type_str(flags);
+ r_mode = btrfs_group_profile_str(flags);
+
+ printf("%s,%s: Size:%s, ",
+ description,
+ r_mode,
+ pretty_size_mode(sargs->spaces[i].total_bytes,
+ unit_mode));
+ printf("Used:%s\n",
+ pretty_size_mode(sargs->spaces[i].used_bytes, unit_mode));
+ print_chunk_device(flags, info_ptr, info_count,
+ device_info_ptr, device_info_count, unit_mode);
+ printf("\n");
+ }
+
+ printf("Unallocated:\n");
+ print_unused(info_ptr, info_count, device_info_ptr, device_info_count,
+ unit_mode);
+}
+
+static int print_filesystem_usage_by_chunk(int fd,
+ struct chunk_info *chunkinfo, int chunkcount,
+ struct device_info *devinfo, int devcount,
+ char *path, unsigned unit_mode, int tabular)
+{
+ struct btrfs_ioctl_space_args *sargs;
+ int ret = 0;
+
+ if (!chunkinfo)
+ return 0;
+
+ sargs = load_space_info(fd, path);
+ if (!sargs) {
+ ret = 1;
+ goto out;
+ }
+
+ if (tabular)
+ _cmd_filesystem_usage_tabular(unit_mode, sargs, chunkinfo,
+ chunkcount, devinfo, devcount);
+ else
+ _cmd_filesystem_usage_linear(unit_mode, sargs, chunkinfo,
+ chunkcount, devinfo, devcount);
+
+ free(sargs);
+out:
+ return ret;
+}
+
+const char * const cmd_filesystem_usage_usage[] = {
+ "btrfs filesystem usage [options] <path> [<path>..]",
+ "Show detailed information about internal filesystem usage .",
+ HELPINFO_UNITS_SHORT_LONG,
+ "-T show data in tabular format",
+ NULL
+};
+
+int cmd_filesystem_usage(int argc, char **argv)
+{
+ int ret = 0;
+ unsigned unit_mode;
+ int i;
+ int more_than_one = 0;
+ int tabular = 0;
+
+ unit_mode = get_unit_mode_from_arg(&argc, argv, 1);
+
+ optind = 1;
+ while (1) {
+ int c;
+
+ c = getopt(argc, argv, "T");
+ if (c < 0)
+ break;
+
+ switch (c) {
+ case 'T':
+ tabular = 1;
+ break;
+ default:
+ usage(cmd_filesystem_usage_usage);
+ }
+ }
+
+ if (check_argc_min(argc - optind, 1))
+ usage(cmd_filesystem_usage_usage);
+
+ for (i = optind; i < argc; i++) {
+ int fd;
+ DIR *dirstream = NULL;
+ struct chunk_info *chunkinfo = NULL;
+ struct device_info *devinfo = NULL;
+ int chunkcount = 0;
+ int devcount = 0;
+
+ fd = btrfs_open_dir(argv[i], &dirstream, 1);
+ if (fd < 0) {
+ ret = 1;
+ goto out;
+ }
+ if (more_than_one)
+ printf("\n");
+
+ ret = load_chunk_and_device_info(fd, &chunkinfo, &chunkcount,
+ &devinfo, &devcount);
+ if (ret)
+ goto cleanup;
+
+ ret = print_filesystem_usage_overall(fd, chunkinfo, chunkcount,
+ devinfo, devcount, argv[i], unit_mode);
+ if (ret)
+ goto cleanup;
+ printf("\n");
+ ret = print_filesystem_usage_by_chunk(fd, chunkinfo, chunkcount,
+ devinfo, devcount, argv[i], unit_mode, tabular);
+cleanup:
+ close_file_or_dir(fd, dirstream);
+ free(chunkinfo);
+ free(devinfo);
+
+ if (ret)
+ goto out;
+ more_than_one = 1;
+ }
+
+out:
+ return !!ret;
+}
+
+void print_device_chunks(int fd, struct device_info *devinfo,
+ struct chunk_info *chunks_info_ptr,
+ int chunks_info_count, unsigned unit_mode)
+{
+ int i;
+ u64 allocated = 0;
+
+ for (i = 0 ; i < chunks_info_count ; i++) {
+ const char *description;
+ const char *r_mode;
+ u64 flags;
+ u64 size;
+
+ if (chunks_info_ptr[i].devid != devinfo->devid)
+ continue;
+
+ flags = chunks_info_ptr[i].type;
+
+ description = btrfs_group_type_str(flags);
+ r_mode = btrfs_group_profile_str(flags);
+ size = calc_chunk_size(chunks_info_ptr+i);
+ printf(" %s,%s:%*s%10s\n",
+ description,
+ r_mode,
+ (int)(20 - strlen(description) - strlen(r_mode)), "",
+ pretty_size_mode(size, unit_mode));
+
+ allocated += size;
+
+ }
+ printf(" Unallocated: %*s%10s\n",
+ (int)(20 - strlen("Unallocated")), "",
+ pretty_size_mode(devinfo->size - allocated, unit_mode));
+}
+
+void print_device_sizes(int fd, struct device_info *devinfo, unsigned unit_mode)
+{
+ printf(" Device size: %*s%10s\n",
+ (int)(20 - strlen("Device size")), "",
+ pretty_size_mode(devinfo->device_size, unit_mode));
+#if 0
+ /*
+ * The term has not seen an agreement and we don't want to change it
+ * once it's in non-development branches or even released.
+ */
+ printf(" FS occupied: %*s%10s\n",
+ (int)(20 - strlen("FS occupied")), "",
+ pretty_size_mode(devinfo->size, unit_mode));
+#endif
+}
diff --git a/cmds-fi-usage.h b/cmds-fi-usage.h
new file mode 100644
index 00000000..adb8ad30
--- /dev/null
+++ b/cmds-fi-usage.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __CMDS_FI_USAGE_H__
+#define __CMDS_FI_USAGE_H__
+
+#include "kerncompat.h"
+#include "ioctl.h"
+
+extern const char * const cmd_filesystem_usage_usage[];
+int cmd_filesystem_usage(int argc, char **argv);
+
+struct device_info {
+ u64 devid;
+ char path[BTRFS_DEVICE_PATH_NAME_MAX];
+ /* Size of the block device */
+ u64 device_size;
+ /* Size that's occupied by the filesystem, can be changed via resize */
+ u64 size;
+};
+
+/*
+ * To store the size information about the chunks:
+ * the chunks info are grouped by the tuple (type, devid, num_stripes),
+ * i.e. if two chunks are of the same type (RAID1, DUP...), are on the
+ * same disk, have the same stripes then their sizes are grouped
+ */
+struct chunk_info {
+ u64 type;
+ u64 size;
+ u64 devid;
+ u64 num_stripes;
+};
+
+int load_chunk_and_device_info(int fd, struct chunk_info **chunkinfo,
+ int *chunkcount, struct device_info **devinfo, int *devcount);
+void print_device_chunks(int fd, struct device_info *devinfo,
+ struct chunk_info *chunks_info_ptr,
+ int chunks_info_count, unsigned unit_mode);
+void print_device_sizes(int fd, struct device_info *devinfo, unsigned unit_mode);
+
+#endif
diff --git a/cmds-filesystem.c b/cmds-filesystem.c
new file mode 100644
index 00000000..4c6e856b
--- /dev/null
+++ b/cmds-filesystem.c
@@ -0,0 +1,1295 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <uuid/uuid.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <ftw.h>
+#include <mntent.h>
+#include <linux/limits.h>
+#include <getopt.h>
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "ioctl.h"
+#include "utils.h"
+#include "volumes.h"
+#include "commands.h"
+#include "cmds-fi-usage.h"
+#include "list_sort.h"
+#include "disk-io.h"
+
+
+/*
+ * for btrfs fi show, we maintain a hash of fsids we've already printed.
+ * This way we don't print dups if a given FS is mounted more than once.
+ */
+#define SEEN_FSID_HASH_SIZE 256
+
+struct seen_fsid {
+ u8 fsid[BTRFS_FSID_SIZE];
+ struct seen_fsid *next;
+};
+
+static struct seen_fsid *seen_fsid_hash[SEEN_FSID_HASH_SIZE] = {NULL,};
+
+static int is_seen_fsid(u8 *fsid)
+{
+ u8 hash = fsid[0];
+ int slot = hash % SEEN_FSID_HASH_SIZE;
+ struct seen_fsid *seen = seen_fsid_hash[slot];
+
+ return seen ? 1 : 0;
+}
+
+static int add_seen_fsid(u8 *fsid)
+{
+ u8 hash = fsid[0];
+ int slot = hash % SEEN_FSID_HASH_SIZE;
+ struct seen_fsid *seen = seen_fsid_hash[slot];
+ struct seen_fsid *alloc;
+
+ if (!seen)
+ goto insert;
+
+ while (1) {
+ if (memcmp(seen->fsid, fsid, BTRFS_FSID_SIZE) == 0)
+ return -EEXIST;
+
+ if (!seen->next)
+ break;
+
+ seen = seen->next;
+ }
+
+insert:
+
+ alloc = malloc(sizeof(*alloc));
+ if (!alloc)
+ return -ENOMEM;
+
+ alloc->next = NULL;
+ memcpy(alloc->fsid, fsid, BTRFS_FSID_SIZE);
+
+ if (seen)
+ seen->next = alloc;
+ else
+ seen_fsid_hash[slot] = alloc;
+
+ return 0;
+}
+
+static void free_seen_fsid(void)
+{
+ int slot;
+ struct seen_fsid *seen;
+ struct seen_fsid *next;
+
+ for (slot = 0; slot < SEEN_FSID_HASH_SIZE; slot++) {
+ seen = seen_fsid_hash[slot];
+ while (seen) {
+ next = seen->next;
+ free(seen);
+ seen = next;
+ }
+ seen_fsid_hash[slot] = NULL;
+ }
+}
+
+static const char * const filesystem_cmd_group_usage[] = {
+ "btrfs filesystem [<group>] <command> [<args>]",
+ NULL
+};
+
+static const char * const cmd_filesystem_df_usage[] = {
+ "btrfs filesystem df [options] <path>",
+ "Show space usage information for a mount point",
+ HELPINFO_UNITS_SHORT_LONG,
+ NULL
+};
+
+static int get_df(int fd, struct btrfs_ioctl_space_args **sargs_ret)
+{
+ u64 count = 0;
+ int ret;
+ struct btrfs_ioctl_space_args *sargs;
+
+ sargs = malloc(sizeof(struct btrfs_ioctl_space_args));
+ if (!sargs)
+ return -ENOMEM;
+
+ sargs->space_slots = 0;
+ sargs->total_spaces = 0;
+
+ ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, sargs);
+ if (ret < 0) {
+ error("cannot get space info: %s\n", strerror(errno));
+ free(sargs);
+ return -errno;
+ }
+ /* This really should never happen */
+ if (!sargs->total_spaces) {
+ free(sargs);
+ return -ENOENT;
+ }
+ count = sargs->total_spaces;
+ free(sargs);
+
+ sargs = malloc(sizeof(struct btrfs_ioctl_space_args) +
+ (count * sizeof(struct btrfs_ioctl_space_info)));
+ if (!sargs)
+ return -ENOMEM;
+
+ sargs->space_slots = count;
+ sargs->total_spaces = 0;
+ ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, sargs);
+ if (ret < 0) {
+ error("cannot get space info with %llu slots: %s",
+ count, strerror(errno));
+ free(sargs);
+ return -errno;
+ }
+ *sargs_ret = sargs;
+ return 0;
+}
+
+static void print_df(struct btrfs_ioctl_space_args *sargs, unsigned unit_mode)
+{
+ u64 i;
+ struct btrfs_ioctl_space_info *sp = sargs->spaces;
+
+ for (i = 0; i < sargs->total_spaces; i++, sp++) {
+ printf("%s, %s: total=%s, used=%s\n",
+ btrfs_group_type_str(sp->flags),
+ btrfs_group_profile_str(sp->flags),
+ pretty_size_mode(sp->total_bytes, unit_mode),
+ pretty_size_mode(sp->used_bytes, unit_mode));
+ }
+}
+
+static int cmd_filesystem_df(int argc, char **argv)
+{
+ struct btrfs_ioctl_space_args *sargs = NULL;
+ int ret;
+ int fd;
+ char *path;
+ DIR *dirstream = NULL;
+ unsigned unit_mode;
+
+ unit_mode = get_unit_mode_from_arg(&argc, argv, 1);
+
+ if (argc != 2 || argv[1][0] == '-')
+ usage(cmd_filesystem_df_usage);
+
+ path = argv[1];
+
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ ret = get_df(fd, &sargs);
+
+ if (ret == 0) {
+ print_df(sargs, unit_mode);
+ free(sargs);
+ } else {
+ error("get_df failed %s", strerror(-ret));
+ }
+
+ close_file_or_dir(fd, dirstream);
+ return !!ret;
+}
+
+static int match_search_item_kernel(__u8 *fsid, char *mnt, char *label,
+ char *search)
+{
+ char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
+ int search_len = strlen(search);
+
+ search_len = min(search_len, BTRFS_UUID_UNPARSED_SIZE);
+ uuid_unparse(fsid, uuidbuf);
+ if (!strncmp(uuidbuf, search, search_len))
+ return 1;
+
+ if (*label && strcmp(label, search) == 0)
+ return 1;
+
+ if (strcmp(mnt, search) == 0)
+ return 1;
+
+ return 0;
+}
+
+static int uuid_search(struct btrfs_fs_devices *fs_devices, char *search)
+{
+ char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
+ struct list_head *cur;
+ struct btrfs_device *device;
+ int search_len = strlen(search);
+
+ search_len = min(search_len, BTRFS_UUID_UNPARSED_SIZE);
+ uuid_unparse(fs_devices->fsid, uuidbuf);
+ if (!strncmp(uuidbuf, search, search_len))
+ return 1;
+
+ list_for_each(cur, &fs_devices->devices) {
+ device = list_entry(cur, struct btrfs_device, dev_list);
+ if ((device->label && strcmp(device->label, search) == 0) ||
+ strcmp(device->name, search) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Sort devices by devid, ascending
+ */
+static int cmp_device_id(void *priv, struct list_head *a,
+ struct list_head *b)
+{
+ const struct btrfs_device *da = list_entry(a, struct btrfs_device,
+ dev_list);
+ const struct btrfs_device *db = list_entry(b, struct btrfs_device,
+ dev_list);
+
+ return da->devid < db->devid ? -1 :
+ da->devid > db->devid ? 1 : 0;
+}
+
+static void splice_device_list(struct list_head *seed_devices,
+ struct list_head *all_devices)
+{
+ struct btrfs_device *in_all, *next_all;
+ struct btrfs_device *in_seed, *next_seed;
+
+ list_for_each_entry_safe(in_all, next_all, all_devices, dev_list) {
+ list_for_each_entry_safe(in_seed, next_seed, seed_devices,
+ dev_list) {
+ if (in_all->devid == in_seed->devid) {
+ /*
+ * When do dev replace in a sprout fs
+ * to a dev in its seed fs, the replacing
+ * dev will reside in the sprout fs and
+ * the replaced dev will still exist
+ * in the seed fs.
+ * So pick the latest one when showing
+ * the sprout fs.
+ */
+ if (in_all->generation
+ < in_seed->generation) {
+ list_del(&in_all->dev_list);
+ free(in_all);
+ } else if (in_all->generation
+ > in_seed->generation) {
+ list_del(&in_seed->dev_list);
+ free(in_seed);
+ }
+ break;
+ }
+ }
+ }
+
+ list_splice(seed_devices, all_devices);
+}
+
+static void print_devices(struct btrfs_fs_devices *fs_devices,
+ u64 *devs_found, unsigned unit_mode)
+{
+ struct btrfs_device *device;
+ struct btrfs_fs_devices *cur_fs;
+ struct list_head *all_devices;
+
+ all_devices = &fs_devices->devices;
+ cur_fs = fs_devices->seed;
+ /* add all devices of seed fs to the fs to be printed */
+ while (cur_fs) {
+ splice_device_list(&cur_fs->devices, all_devices);
+ cur_fs = cur_fs->seed;
+ }
+
+ list_sort(NULL, all_devices, cmp_device_id);
+ list_for_each_entry(device, all_devices, dev_list) {
+ printf("\tdevid %4llu size %s used %s path %s\n",
+ (unsigned long long)device->devid,
+ pretty_size_mode(device->total_bytes, unit_mode),
+ pretty_size_mode(device->bytes_used, unit_mode),
+ device->name);
+
+ (*devs_found)++;
+ }
+}
+
+static void print_one_uuid(struct btrfs_fs_devices *fs_devices,
+ unsigned unit_mode)
+{
+ char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
+ struct btrfs_device *device;
+ u64 devs_found = 0;
+ u64 total;
+
+ if (add_seen_fsid(fs_devices->fsid))
+ return;
+
+ uuid_unparse(fs_devices->fsid, uuidbuf);
+ device = list_entry(fs_devices->devices.next, struct btrfs_device,
+ dev_list);
+ if (device->label && device->label[0])
+ printf("Label: '%s' ", device->label);
+ else
+ printf("Label: none ");
+
+ total = device->total_devs;
+ printf(" uuid: %s\n\tTotal devices %llu FS bytes used %s\n", uuidbuf,
+ (unsigned long long)total,
+ pretty_size_mode(device->super_bytes_used, unit_mode));
+
+ print_devices(fs_devices, &devs_found, unit_mode);
+
+ if (devs_found < total) {
+ printf("\t*** Some devices missing\n");
+ }
+ printf("\n");
+}
+
+/* adds up all the used spaces as reported by the space info ioctl
+ */
+static u64 calc_used_bytes(struct btrfs_ioctl_space_args *si)
+{
+ u64 ret = 0;
+ int i;
+ for (i = 0; i < si->total_spaces; i++)
+ ret += si->spaces[i].used_bytes;
+ return ret;
+}
+
+static int print_one_fs(struct btrfs_ioctl_fs_info_args *fs_info,
+ struct btrfs_ioctl_dev_info_args *dev_info,
+ struct btrfs_ioctl_space_args *space_info,
+ char *label, unsigned unit_mode)
+{
+ int i;
+ int fd;
+ int missing = 0;
+ char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
+ struct btrfs_ioctl_dev_info_args *tmp_dev_info;
+ int ret;
+
+ ret = add_seen_fsid(fs_info->fsid);
+ if (ret == -EEXIST)
+ return 0;
+ else if (ret)
+ return ret;
+
+ uuid_unparse(fs_info->fsid, uuidbuf);
+ if (label && *label)
+ printf("Label: '%s' ", label);
+ else
+ printf("Label: none ");
+
+ printf(" uuid: %s\n\tTotal devices %llu FS bytes used %s\n", uuidbuf,
+ fs_info->num_devices,
+ pretty_size_mode(calc_used_bytes(space_info),
+ unit_mode));
+
+ for (i = 0; i < fs_info->num_devices; i++) {
+ char *canonical_path;
+
+ tmp_dev_info = (struct btrfs_ioctl_dev_info_args *)&dev_info[i];
+
+ /* Add check for missing devices even mounted */
+ fd = open((char *)tmp_dev_info->path, O_RDONLY);
+ if (fd < 0) {
+ missing = 1;
+ continue;
+ }
+ close(fd);
+ canonical_path = canonicalize_path((char *)tmp_dev_info->path);
+ printf("\tdevid %4llu size %s used %s path %s\n",
+ tmp_dev_info->devid,
+ pretty_size_mode(tmp_dev_info->total_bytes, unit_mode),
+ pretty_size_mode(tmp_dev_info->bytes_used, unit_mode),
+ canonical_path);
+
+ free(canonical_path);
+ }
+
+ if (missing)
+ printf("\t*** Some devices missing\n");
+ printf("\n");
+ return 0;
+}
+
+static int btrfs_scan_kernel(void *search, unsigned unit_mode)
+{
+ int ret = 0, fd;
+ int found = 0;
+ FILE *f;
+ struct mntent *mnt;
+ struct btrfs_ioctl_fs_info_args fs_info_arg;
+ struct btrfs_ioctl_dev_info_args *dev_info_arg = NULL;
+ struct btrfs_ioctl_space_args *space_info_arg = NULL;
+ char label[BTRFS_LABEL_SIZE];
+
+ f = setmntent("/proc/self/mounts", "r");
+ if (f == NULL)
+ return 1;
+
+ memset(label, 0, sizeof(label));
+ while ((mnt = getmntent(f)) != NULL) {
+ if (strcmp(mnt->mnt_type, "btrfs"))
+ continue;
+ ret = get_fs_info(mnt->mnt_dir, &fs_info_arg,
+ &dev_info_arg);
+ if (ret) {
+ kfree(dev_info_arg);
+ goto out;
+ }
+
+ /* skip all fs already shown as mounted fs */
+ if (is_seen_fsid(fs_info_arg.fsid))
+ continue;
+
+ ret = get_label_mounted(mnt->mnt_dir, label);
+ /* provide backward kernel compatibility */
+ if (ret == -ENOTTY)
+ ret = get_label_unmounted(
+ (const char *)dev_info_arg->path, label);
+
+ if (ret) {
+ kfree(dev_info_arg);
+ goto out;
+ }
+ if (search && !match_search_item_kernel(fs_info_arg.fsid,
+ mnt->mnt_dir, label, search)) {
+ kfree(dev_info_arg);
+ dev_info_arg = NULL;
+ continue;
+ }
+
+ fd = open(mnt->mnt_dir, O_RDONLY);
+ if ((fd != -1) && !get_df(fd, &space_info_arg)) {
+ print_one_fs(&fs_info_arg, dev_info_arg,
+ space_info_arg, label, unit_mode);
+ kfree(space_info_arg);
+ memset(label, 0, sizeof(label));
+ found = 1;
+ }
+ if (fd != -1)
+ close(fd);
+ kfree(dev_info_arg);
+ dev_info_arg = NULL;
+ }
+
+out:
+ endmntent(f);
+ return !found;
+}
+
+static int dev_to_fsid(char *dev, __u8 *fsid)
+{
+ struct btrfs_super_block *disk_super;
+ char buf[BTRFS_SUPER_INFO_SIZE];
+ int ret;
+ int fd;
+
+ fd = open(dev, O_RDONLY);
+ if (fd < 0) {
+ ret = -errno;
+ return ret;
+ }
+
+ disk_super = (struct btrfs_super_block *)buf;
+ ret = btrfs_read_dev_super(fd, disk_super,
+ BTRFS_SUPER_INFO_OFFSET, 0);
+ if (ret)
+ goto out;
+
+ memcpy(fsid, disk_super->fsid, BTRFS_FSID_SIZE);
+ ret = 0;
+
+out:
+ close(fd);
+ return ret;
+}
+
+static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
+{
+ struct btrfs_fs_devices *cur_seed, *next_seed;
+ struct btrfs_device *device;
+
+ while (!list_empty(&fs_devices->devices)) {
+ device = list_entry(fs_devices->devices.next,
+ struct btrfs_device, dev_list);
+ list_del(&device->dev_list);
+
+ free(device->name);
+ free(device->label);
+ free(device);
+ }
+
+ /* free seed fs chain */
+ cur_seed = fs_devices->seed;
+ fs_devices->seed = NULL;
+ while (cur_seed) {
+ next_seed = cur_seed->seed;
+ free(cur_seed);
+
+ cur_seed = next_seed;
+ }
+
+ list_del(&fs_devices->list);
+ free(fs_devices);
+}
+
+static int copy_device(struct btrfs_device *dst,
+ struct btrfs_device *src)
+{
+ dst->devid = src->devid;
+ memcpy(dst->uuid, src->uuid, BTRFS_UUID_SIZE);
+ if (src->name == NULL)
+ dst->name = NULL;
+ else {
+ dst->name = strdup(src->name);
+ if (!dst->name)
+ return -ENOMEM;
+ }
+ if (src->label == NULL)
+ dst->label = NULL;
+ else {
+ dst->label = strdup(src->label);
+ if (!dst->label) {
+ free(dst->name);
+ return -ENOMEM;
+ }
+ }
+ dst->total_devs = src->total_devs;
+ dst->super_bytes_used = src->super_bytes_used;
+ dst->total_bytes = src->total_bytes;
+ dst->bytes_used = src->bytes_used;
+ dst->generation = src->generation;
+
+ return 0;
+}
+
+static int copy_fs_devices(struct btrfs_fs_devices *dst,
+ struct btrfs_fs_devices *src)
+{
+ struct btrfs_device *cur_dev, *dev_copy;
+ int ret = 0;
+
+ memcpy(dst->fsid, src->fsid, BTRFS_FSID_SIZE);
+ INIT_LIST_HEAD(&dst->devices);
+ dst->seed = NULL;
+
+ list_for_each_entry(cur_dev, &src->devices, dev_list) {
+ dev_copy = malloc(sizeof(*dev_copy));
+ if (!dev_copy) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ ret = copy_device(dev_copy, cur_dev);
+ if (ret) {
+ free(dev_copy);
+ break;
+ }
+
+ list_add(&dev_copy->dev_list, &dst->devices);
+ dev_copy->fs_devices = dst;
+ }
+
+ return ret;
+}
+
+static int find_and_copy_seed(struct btrfs_fs_devices *seed,
+ struct btrfs_fs_devices *copy,
+ struct list_head *fs_uuids) {
+ struct btrfs_fs_devices *cur_fs;
+
+ list_for_each_entry(cur_fs, fs_uuids, list)
+ if (!memcmp(seed->fsid, cur_fs->fsid, BTRFS_FSID_SIZE))
+ return copy_fs_devices(copy, cur_fs);
+
+ return 1;
+}
+
+static int has_seed_devices(struct btrfs_fs_devices *fs_devices)
+{
+ struct btrfs_device *device;
+ int dev_cnt_total, dev_cnt = 0;
+
+ device = list_first_entry(&fs_devices->devices, struct btrfs_device,
+ dev_list);
+
+ dev_cnt_total = device->total_devs;
+
+ list_for_each_entry(device, &fs_devices->devices, dev_list)
+ dev_cnt++;
+
+ return dev_cnt_total != dev_cnt;
+}
+
+static int search_umounted_fs_uuids(struct list_head *all_uuids,
+ char *search, int *found)
+{
+ struct btrfs_fs_devices *cur_fs, *fs_copy;
+ struct list_head *fs_uuids;
+ int ret = 0;
+
+ fs_uuids = btrfs_scanned_uuids();
+
+ /*
+ * The fs_uuids list is global, and open_ctree_* will
+ * modify it, make a private copy here
+ */
+ list_for_each_entry(cur_fs, fs_uuids, list) {
+ /* don't bother handle all fs, if search target specified */
+ if (search) {
+ if (uuid_search(cur_fs, search) == 0)
+ continue;
+ if (found)
+ *found = 1;
+ }
+
+ /* skip all fs already shown as mounted fs */
+ if (is_seen_fsid(cur_fs->fsid))
+ continue;
+
+ fs_copy = calloc(1, sizeof(*fs_copy));
+ if (!fs_copy) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = copy_fs_devices(fs_copy, cur_fs);
+ if (ret) {
+ free(fs_copy);
+ goto out;
+ }
+
+ list_add(&fs_copy->list, all_uuids);
+ }
+
+out:
+ return ret;
+}
+
+static int map_seed_devices(struct list_head *all_uuids)
+{
+ struct btrfs_fs_devices *cur_fs, *cur_seed;
+ struct btrfs_fs_devices *seed_copy;
+ struct btrfs_fs_devices *opened_fs;
+ struct btrfs_device *device;
+ struct btrfs_fs_info *fs_info;
+ struct list_head *fs_uuids;
+ int ret = 0;
+
+ fs_uuids = btrfs_scanned_uuids();
+
+ list_for_each_entry(cur_fs, all_uuids, list) {
+ device = list_first_entry(&cur_fs->devices,
+ struct btrfs_device, dev_list);
+ if (!device)
+ continue;
+
+ /* skip fs without seeds */
+ if (!has_seed_devices(cur_fs))
+ continue;
+
+ /*
+ * open_ctree_* detects seed/sprout mapping
+ */
+ fs_info = open_ctree_fs_info(device->name, 0, 0,
+ OPEN_CTREE_PARTIAL);
+ if (!fs_info)
+ continue;
+
+ /*
+ * copy the seed chain under the opened fs
+ */
+ opened_fs = fs_info->fs_devices;
+ cur_seed = cur_fs;
+ while (opened_fs->seed) {
+ seed_copy = malloc(sizeof(*seed_copy));
+ if (!seed_copy) {
+ ret = -ENOMEM;
+ goto fail_out;
+ }
+ ret = find_and_copy_seed(opened_fs->seed, seed_copy,
+ fs_uuids);
+ if (ret) {
+ free(seed_copy);
+ goto fail_out;
+ }
+
+ cur_seed->seed = seed_copy;
+
+ opened_fs = opened_fs->seed;
+ cur_seed = cur_seed->seed;
+ }
+
+ close_ctree(fs_info->chunk_root);
+ }
+
+out:
+ return ret;
+fail_out:
+ close_ctree(fs_info->chunk_root);
+ goto out;
+}
+
+static const char * const cmd_filesystem_show_usage[] = {
+ "btrfs filesystem show [options] [<path>|<uuid>|<device>|label]",
+ "Show the structure of a filesystem",
+ "-d|--all-devices show only disks under /dev containing btrfs filesystem",
+ "-m|--mounted show only mounted btrfs",
+ HELPINFO_UNITS_LONG,
+ "If no argument is given, structure of all present filesystems is shown.",
+ NULL
+};
+
+static int cmd_filesystem_show(int argc, char **argv)
+{
+ LIST_HEAD(all_uuids);
+ struct btrfs_fs_devices *fs_devices;
+ char *search = NULL;
+ int ret;
+ /* default, search both kernel and udev */
+ int where = -1;
+ int type = 0;
+ char mp[PATH_MAX];
+ char path[PATH_MAX];
+ __u8 fsid[BTRFS_FSID_SIZE];
+ char uuid_buf[BTRFS_UUID_UNPARSED_SIZE];
+ unsigned unit_mode;
+ int found = 0;
+
+ unit_mode = get_unit_mode_from_arg(&argc, argv, 0);
+
+ while (1) {
+ int c;
+ static const struct option long_options[] = {
+ { "all-devices", no_argument, NULL, 'd'},
+ { "mounted", no_argument, NULL, 'm'},
+ { NULL, 0, NULL, 0 }
+ };
+
+ c = getopt_long(argc, argv, "dm", long_options, NULL);
+ if (c < 0)
+ break;
+ switch (c) {
+ case 'd':
+ where = BTRFS_SCAN_LBLKID;
+ break;
+ case 'm':
+ where = BTRFS_SCAN_MOUNTED;
+ break;
+ default:
+ usage(cmd_filesystem_show_usage);
+ }
+ }
+
+ if (check_argc_max(argc, optind + 1))
+ usage(cmd_filesystem_show_usage);
+
+ if (argc > optind) {
+ search = argv[optind];
+ if (*search == 0)
+ usage(cmd_filesystem_show_usage);
+ type = check_arg_type(search);
+
+ /*
+ * For search is a device:
+ * realpath do /dev/mapper/XX => /dev/dm-X
+ * which is required by BTRFS_SCAN_DEV
+ * For search is a mountpoint:
+ * realpath do /mnt/btrfs/ => /mnt/btrfs
+ * which shall be recognized by btrfs_scan_kernel()
+ */
+ if (realpath(search, path))
+ search = path;
+
+ /*
+ * Needs special handling if input arg is block dev And if
+ * input arg is mount-point just print it right away
+ */
+ if (type == BTRFS_ARG_BLKDEV && where != BTRFS_SCAN_LBLKID) {
+ ret = get_btrfs_mount(search, mp, sizeof(mp));
+ if (!ret) {
+ /* given block dev is mounted */
+ search = mp;
+ type = BTRFS_ARG_MNTPOINT;
+ } else {
+ ret = dev_to_fsid(search, fsid);
+ if (ret) {
+ error("no btrfs on %s", search);
+ return 1;
+ }
+ uuid_unparse(fsid, uuid_buf);
+ search = uuid_buf;
+ type = BTRFS_ARG_UUID;
+ goto devs_only;
+ }
+ }
+ }
+
+ if (where == BTRFS_SCAN_LBLKID)
+ goto devs_only;
+
+ /* show mounted btrfs */
+ ret = btrfs_scan_kernel(search, unit_mode);
+ if (search && !ret) {
+ /* since search is found we are done */
+ goto out;
+ }
+
+ /* shows mounted only */
+ if (where == BTRFS_SCAN_MOUNTED)
+ goto out;
+
+devs_only:
+ ret = btrfs_scan_lblkid();
+
+ if (ret) {
+ error("blkid device scan returned %d\n", ret);
+ return 1;
+ }
+
+ ret = search_umounted_fs_uuids(&all_uuids, search, &found);
+ if (ret < 0) {
+ error("searching target device returned error %d", ret);
+ return 1;
+ }
+
+ /*
+ * The seed/sprout mapping are not detected yet,
+ * do mapping build for all umounted fs
+ */
+ ret = map_seed_devices(&all_uuids);
+ if (ret) {
+ error("mapping seed devices returned error %d", ret);
+ return 1;
+ }
+
+ list_for_each_entry(fs_devices, &all_uuids, list)
+ print_one_uuid(fs_devices, unit_mode);
+
+ if (search && !found)
+ ret = 1;
+
+ while (!list_empty(&all_uuids)) {
+ fs_devices = list_entry(all_uuids.next,
+ struct btrfs_fs_devices, list);
+ free_fs_devices(fs_devices);
+ }
+out:
+ free_seen_fsid();
+ return ret;
+}
+
+static const char * const cmd_filesystem_sync_usage[] = {
+ "btrfs filesystem sync <path>",
+ "Force a sync on a filesystem",
+ NULL
+};
+
+static int cmd_filesystem_sync(int argc, char **argv)
+{
+ int fd, res, e;
+ char *path;
+ DIR *dirstream = NULL;
+
+ if (check_argc_exact(argc, 2))
+ usage(cmd_filesystem_sync_usage);
+
+ path = argv[1];
+
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ printf("FSSync '%s'\n", path);
+ res = ioctl(fd, BTRFS_IOC_SYNC);
+ e = errno;
+ close_file_or_dir(fd, dirstream);
+ if( res < 0 ){
+ error("sync ioctl failed on '%s': %s", path, strerror(e));
+ return 1;
+ }
+
+ return 0;
+}
+
+static int parse_compress_type(char *s)
+{
+ if (strcmp(optarg, "zlib") == 0)
+ return BTRFS_COMPRESS_ZLIB;
+ else if (strcmp(optarg, "lzo") == 0)
+ return BTRFS_COMPRESS_LZO;
+ else {
+ error("unknown compression type %s", s);
+ exit(1);
+ };
+}
+
+static const char * const cmd_filesystem_defrag_usage[] = {
+ "btrfs filesystem defragment [options] <file>|<dir> [<file>|<dir>...]",
+ "Defragment a file or a directory",
+ "",
+ "-v be verbose",
+ "-r defragment files recursively",
+ "-c[zlib,lzo] compress the file while defragmenting",
+ "-f flush data to disk immediately after defragmenting",
+ "-s start defragment only from byte onward",
+ "-l len defragment only up to len bytes",
+ "-t size target extent size hint",
+ NULL
+};
+
+static int do_defrag(int fd, int fancy_ioctl,
+ struct btrfs_ioctl_defrag_range_args *range)
+{
+ int ret;
+
+ if (!fancy_ioctl)
+ ret = ioctl(fd, BTRFS_IOC_DEFRAG, NULL);
+ else
+ ret = ioctl(fd, BTRFS_IOC_DEFRAG_RANGE, range);
+
+ return ret;
+}
+
+static int defrag_global_fancy_ioctl;
+static struct btrfs_ioctl_defrag_range_args defrag_global_range;
+static int defrag_global_verbose;
+static int defrag_global_errors;
+static int defrag_callback(const char *fpath, const struct stat *sb,
+ int typeflag, struct FTW *ftwbuf)
+{
+ int ret = 0;
+ int e = 0;
+ int fd = 0;
+
+ if ((typeflag == FTW_F) && S_ISREG(sb->st_mode)) {
+ if (defrag_global_verbose)
+ printf("%s\n", fpath);
+ fd = open(fpath, O_RDWR);
+ if (fd < 0)
+ goto error;
+ ret = do_defrag(fd, defrag_global_fancy_ioctl, &defrag_global_range);
+ e = errno;
+ close(fd);
+ if (ret && e == ENOTTY && defrag_global_fancy_ioctl) {
+ error("defrag range ioctl not "
+ "supported in this kernel, please try "
+ "without any options.");
+ defrag_global_errors++;
+ return ENOTTY;
+ }
+ if (ret)
+ goto error;
+ }
+ return 0;
+
+error:
+ error("defrag failed on %s: %s", fpath, strerror(e));
+ defrag_global_errors++;
+ return 0;
+}
+
+static int cmd_filesystem_defrag(int argc, char **argv)
+{
+ int fd;
+ int flush = 0;
+ u64 start = 0;
+ u64 len = (u64)-1;
+ u64 thresh = 0;
+ int i;
+ int recursive = 0;
+ int ret = 0;
+ int e = 0;
+ int compress_type = BTRFS_COMPRESS_NONE;
+ DIR *dirstream;
+
+ defrag_global_errors = 0;
+ defrag_global_verbose = 0;
+ defrag_global_errors = 0;
+ defrag_global_fancy_ioctl = 0;
+ optind = 1;
+ while(1) {
+ int c = getopt(argc, argv, "vrc::fs:l:t:");
+ if (c < 0)
+ break;
+
+ switch(c) {
+ case 'c':
+ compress_type = BTRFS_COMPRESS_ZLIB;
+ if (optarg)
+ compress_type = parse_compress_type(optarg);
+ defrag_global_fancy_ioctl = 1;
+ break;
+ case 'f':
+ flush = 1;
+ defrag_global_fancy_ioctl = 1;
+ break;
+ case 'v':
+ defrag_global_verbose = 1;
+ break;
+ case 's':
+ start = parse_size(optarg);
+ defrag_global_fancy_ioctl = 1;
+ break;
+ case 'l':
+ len = parse_size(optarg);
+ defrag_global_fancy_ioctl = 1;
+ break;
+ case 't':
+ thresh = parse_size(optarg);
+ if (thresh > (u32)-1) {
+ warning(
+ "target extent size %llu too big, trimmed to %u",
+ thresh, (u32)-1);
+ thresh = (u32)-1;
+ }
+ defrag_global_fancy_ioctl = 1;
+ break;
+ case 'r':
+ recursive = 1;
+ break;
+ default:
+ usage(cmd_filesystem_defrag_usage);
+ }
+ }
+
+ if (check_argc_min(argc - optind, 1))
+ usage(cmd_filesystem_defrag_usage);
+
+ memset(&defrag_global_range, 0, sizeof(defrag_global_range));
+ defrag_global_range.start = start;
+ defrag_global_range.len = len;
+ defrag_global_range.extent_thresh = (u32)thresh;
+ if (compress_type) {
+ defrag_global_range.flags |= BTRFS_DEFRAG_RANGE_COMPRESS;
+ defrag_global_range.compress_type = compress_type;
+ }
+ if (flush)
+ defrag_global_range.flags |= BTRFS_DEFRAG_RANGE_START_IO;
+
+ for (i = optind; i < argc; i++) {
+ struct stat st;
+
+ dirstream = NULL;
+ fd = open_file_or_dir(argv[i], &dirstream);
+ if (fd < 0) {
+ error("cannot open %s: %s\n", argv[i],
+ strerror(errno));
+ defrag_global_errors++;
+ close_file_or_dir(fd, dirstream);
+ continue;
+ }
+ if (fstat(fd, &st)) {
+ error("failed to stat %s: %s",
+ argv[i], strerror(errno));
+ defrag_global_errors++;
+ close_file_or_dir(fd, dirstream);
+ continue;
+ }
+ if (!(S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))) {
+ error("%s is not a directory or a regular file\n",
+ argv[i]);
+ defrag_global_errors++;
+ close_file_or_dir(fd, dirstream);
+ continue;
+ }
+ if (recursive) {
+ if (S_ISDIR(st.st_mode)) {
+ ret = nftw(argv[i], defrag_callback, 10,
+ FTW_MOUNT | FTW_PHYS);
+ if (ret == ENOTTY)
+ exit(1);
+ /* errors are handled in the callback */
+ ret = 0;
+ } else {
+ if (defrag_global_verbose)
+ printf("%s\n", argv[i]);
+ ret = do_defrag(fd, defrag_global_fancy_ioctl,
+ &defrag_global_range);
+ e = errno;
+ }
+ } else {
+ if (defrag_global_verbose)
+ printf("%s\n", argv[i]);
+ ret = do_defrag(fd, defrag_global_fancy_ioctl,
+ &defrag_global_range);
+ e = errno;
+ }
+ close_file_or_dir(fd, dirstream);
+ if (ret && e == ENOTTY && defrag_global_fancy_ioctl) {
+ error("defrag range ioctl not "
+ "supported in this kernel, please try "
+ "without any options.");
+ defrag_global_errors++;
+ break;
+ }
+ if (ret) {
+ error("defrag failed on %s: %s", argv[i], strerror(e));
+ defrag_global_errors++;
+ }
+ }
+ if (defrag_global_errors)
+ fprintf(stderr, "total %d failures\n", defrag_global_errors);
+
+ return !!defrag_global_errors;
+}
+
+static const char * const cmd_filesystem_resize_usage[] = {
+ "btrfs filesystem resize [devid:][+/-]<newsize>[kKmMgGtTpPeE]|[devid:]max <path>",
+ "Resize a filesystem",
+ "If 'max' is passed, the filesystem will occupy all available space",
+ "on the device 'devid'.",
+ "[kK] means KiB, which denotes 1KiB = 1024B, 1MiB = 1024KiB, etc.",
+ NULL
+};
+
+static int cmd_filesystem_resize(int argc, char **argv)
+{
+ struct btrfs_ioctl_vol_args args;
+ int fd, res, len, e;
+ char *amount, *path;
+ DIR *dirstream = NULL;
+ struct stat st;
+
+ if (check_argc_exact(argc, 3))
+ usage(cmd_filesystem_resize_usage);
+
+ amount = argv[1];
+ path = argv[2];
+
+ len = strlen(amount);
+ if (len == 0 || len >= BTRFS_VOL_NAME_MAX) {
+ error("resize value too long (%s)", amount);
+ return 1;
+ }
+
+ res = stat(path, &st);
+ if (res < 0) {
+ error("resize: cannot stat %s: %s", path, strerror(errno));
+ return 1;
+ }
+ if (!S_ISDIR(st.st_mode)) {
+ error("resize works on mounted filesystems and accepts only\n"
+ "directories as argument. Passing file containing a btrfs image\n"
+ "would resize the underlying filesystem instead of the image.\n");
+ return 1;
+ }
+
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ printf("Resize '%s' of '%s'\n", path, amount);
+ memset(&args, 0, sizeof(args));
+ strncpy_null(args.name, amount);
+ res = ioctl(fd, BTRFS_IOC_RESIZE, &args);
+ e = errno;
+ close_file_or_dir(fd, dirstream);
+ if( res < 0 ){
+ switch (e) {
+ case EFBIG:
+ error("unable to resize '%s': no enough free space",
+ path);
+ break;
+ default:
+ error("unable to resize '%s': %s", path, strerror(e));
+ break;
+ }
+ return 1;
+ } else if (res > 0) {
+ const char *err_str = btrfs_err_str(res);
+
+ if (err_str) {
+ error("resizing of '%s' failed: %s", path, err_str);
+ } else {
+ error("resizing of '%s' failed: unknown error %d",
+ path, res);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+static const char * const cmd_filesystem_label_usage[] = {
+ "btrfs filesystem label [<device>|<mount_point>] [<newlabel>]",
+ "Get or change the label of a filesystem",
+ "With one argument, get the label of filesystem on <device>.",
+ "If <newlabel> is passed, set the filesystem label to <newlabel>.",
+ NULL
+};
+
+static int cmd_filesystem_label(int argc, char **argv)
+{
+ if (check_argc_min(argc, 2) || check_argc_max(argc, 3))
+ usage(cmd_filesystem_label_usage);
+
+ if (argc > 2) {
+ return set_label(argv[1], argv[2]);
+ } else {
+ char label[BTRFS_LABEL_SIZE];
+ int ret;
+
+ ret = get_label(argv[1], label);
+ if (!ret)
+ fprintf(stdout, "%s\n", label);
+
+ return ret;
+ }
+}
+
+static const char filesystem_cmd_group_info[] =
+"overall filesystem tasks and information";
+
+const struct cmd_group filesystem_cmd_group = {
+ filesystem_cmd_group_usage, filesystem_cmd_group_info, {
+ { "df", cmd_filesystem_df, cmd_filesystem_df_usage, NULL, 0 },
+ { "show", cmd_filesystem_show, cmd_filesystem_show_usage, NULL,
+ 0 },
+ { "sync", cmd_filesystem_sync, cmd_filesystem_sync_usage, NULL,
+ 0 },
+ { "defragment", cmd_filesystem_defrag,
+ cmd_filesystem_defrag_usage, NULL, 0 },
+ { "balance", cmd_balance, NULL, &balance_cmd_group,
+ CMD_HIDDEN },
+ { "resize", cmd_filesystem_resize, cmd_filesystem_resize_usage,
+ NULL, 0 },
+ { "label", cmd_filesystem_label, cmd_filesystem_label_usage,
+ NULL, 0 },
+ { "usage", cmd_filesystem_usage,
+ cmd_filesystem_usage_usage, NULL, 0 },
+
+ NULL_CMD_STRUCT
+ }
+};
+
+int cmd_filesystem(int argc, char **argv)
+{
+ return handle_command_group(&filesystem_cmd_group, argc, argv);
+}
diff --git a/cmds-inspect.c b/cmds-inspect.c
new file mode 100644
index 00000000..7fa4881a
--- /dev/null
+++ b/cmds-inspect.c
@@ -0,0 +1,644 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+
+#include "kerncompat.h"
+#include "ioctl.h"
+#include "utils.h"
+#include "ctree.h"
+#include "send-utils.h"
+#include "disk-io.h"
+#include "commands.h"
+#include "btrfs-list.h"
+
+static const char * const inspect_cmd_group_usage[] = {
+ "btrfs inspect-internal <command> <args>",
+ NULL
+};
+
+static int __ino_to_path_fd(u64 inum, int fd, int verbose, const char *prepend)
+{
+ int ret;
+ int i;
+ struct btrfs_ioctl_ino_path_args ipa;
+ struct btrfs_data_container fspath[PATH_MAX];
+
+ memset(fspath, 0, sizeof(*fspath));
+ ipa.inum = inum;
+ ipa.size = PATH_MAX;
+ ipa.fspath = ptr_to_u64(fspath);
+
+ ret = ioctl(fd, BTRFS_IOC_INO_PATHS, &ipa);
+ if (ret < 0) {
+ printf("ioctl ret=%d, error: %s\n", ret, strerror(errno));
+ goto out;
+ }
+
+ if (verbose)
+ printf("ioctl ret=%d, bytes_left=%lu, bytes_missing=%lu, "
+ "cnt=%d, missed=%d\n", ret,
+ (unsigned long)fspath->bytes_left,
+ (unsigned long)fspath->bytes_missing,
+ fspath->elem_cnt, fspath->elem_missed);
+
+ for (i = 0; i < fspath->elem_cnt; ++i) {
+ u64 ptr;
+ char *str;
+ ptr = (u64)(unsigned long)fspath->val;
+ ptr += fspath->val[i];
+ str = (char *)(unsigned long)ptr;
+ if (prepend)
+ printf("%s/%s\n", prepend, str);
+ else
+ printf("%s\n", str);
+ }
+
+out:
+ return !!ret;
+}
+
+static const char * const cmd_inspect_inode_resolve_usage[] = {
+ "btrfs inspect-internal inode-resolve [-v] <inode> <path>",
+ "Get file system paths for the given inode",
+ "",
+ "-v verbose mode",
+ NULL
+};
+
+static int cmd_inspect_inode_resolve(int argc, char **argv)
+{
+ int fd;
+ int verbose = 0;
+ int ret;
+ DIR *dirstream = NULL;
+
+ optind = 1;
+ while (1) {
+ int c = getopt(argc, argv, "v");
+ if (c < 0)
+ break;
+
+ switch (c) {
+ case 'v':
+ verbose = 1;
+ break;
+ default:
+ usage(cmd_inspect_inode_resolve_usage);
+ }
+ }
+
+ if (check_argc_exact(argc - optind, 2))
+ usage(cmd_inspect_inode_resolve_usage);
+
+ fd = btrfs_open_dir(argv[optind + 1], &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ ret = __ino_to_path_fd(arg_strtou64(argv[optind]), fd, verbose,
+ argv[optind+1]);
+ close_file_or_dir(fd, dirstream);
+ return !!ret;
+
+}
+
+static const char * const cmd_inspect_logical_resolve_usage[] = {
+ "btrfs inspect-internal logical-resolve [-Pv] [-s bufsize] <logical> <path>",
+ "Get file system paths for the given logical address",
+ "-P skip the path resolving and print the inodes instead",
+ "-v verbose mode",
+ "-s bufsize set inode container's size. This is used to increase inode",
+ " container's size in case it is not enough to read all the ",
+ " resolved results. The max value one can set is 64k",
+ NULL
+};
+
+static int cmd_inspect_logical_resolve(int argc, char **argv)
+{
+ int ret;
+ int fd;
+ int i;
+ int verbose = 0;
+ int getpath = 1;
+ int bytes_left;
+ struct btrfs_ioctl_logical_ino_args loi;
+ struct btrfs_data_container *inodes;
+ u64 size = 4096;
+ char full_path[4096];
+ char *path_ptr;
+ DIR *dirstream = NULL;
+
+ optind = 1;
+ while (1) {
+ int c = getopt(argc, argv, "Pvs:");
+ if (c < 0)
+ break;
+
+ switch (c) {
+ case 'P':
+ getpath = 0;
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ case 's':
+ size = arg_strtou64(optarg);
+ break;
+ default:
+ usage(cmd_inspect_logical_resolve_usage);
+ }
+ }
+
+ if (check_argc_exact(argc - optind, 2))
+ usage(cmd_inspect_logical_resolve_usage);
+
+ size = min(size, (u64)64 * 1024);
+ inodes = malloc(size);
+ if (!inodes)
+ return 1;
+
+ memset(inodes, 0, sizeof(*inodes));
+ loi.logical = arg_strtou64(argv[optind]);
+ loi.size = size;
+ loi.inodes = ptr_to_u64(inodes);
+
+ fd = btrfs_open_dir(argv[optind + 1], &dirstream, 1);
+ if (fd < 0) {
+ ret = 12;
+ goto out;
+ }
+
+ ret = ioctl(fd, BTRFS_IOC_LOGICAL_INO, &loi);
+ if (ret < 0) {
+ printf("ioctl ret=%d, error: %s\n", ret, strerror(errno));
+ goto out;
+ }
+
+ if (verbose)
+ printf("ioctl ret=%d, total_size=%llu, bytes_left=%lu, "
+ "bytes_missing=%lu, cnt=%d, missed=%d\n",
+ ret, size,
+ (unsigned long)inodes->bytes_left,
+ (unsigned long)inodes->bytes_missing,
+ inodes->elem_cnt, inodes->elem_missed);
+
+ bytes_left = sizeof(full_path);
+ ret = snprintf(full_path, bytes_left, "%s/", argv[optind+1]);
+ path_ptr = full_path + ret;
+ bytes_left -= ret + 1;
+ BUG_ON(bytes_left < 0);
+
+ for (i = 0; i < inodes->elem_cnt; i += 3) {
+ u64 inum = inodes->val[i];
+ u64 offset = inodes->val[i+1];
+ u64 root = inodes->val[i+2];
+ int path_fd;
+ char *name;
+ DIR *dirs = NULL;
+
+ if (getpath) {
+ name = btrfs_list_path_for_root(fd, root);
+ if (IS_ERR(name)) {
+ ret = PTR_ERR(name);
+ goto out;
+ }
+ if (!name) {
+ path_ptr[-1] = '\0';
+ path_fd = fd;
+ } else {
+ path_ptr[-1] = '/';
+ ret = snprintf(path_ptr, bytes_left, "%s",
+ name);
+ BUG_ON(ret >= bytes_left);
+ free(name);
+ path_fd = btrfs_open_dir(full_path, &dirs, 1);
+ if (path_fd < 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+ __ino_to_path_fd(inum, path_fd, verbose, full_path);
+ if (path_fd != fd)
+ close_file_or_dir(path_fd, dirs);
+ } else {
+ printf("inode %llu offset %llu root %llu\n", inum,
+ offset, root);
+ }
+ }
+
+out:
+ close_file_or_dir(fd, dirstream);
+ free(inodes);
+ return !!ret;
+}
+
+static const char * const cmd_inspect_subvolid_resolve_usage[] = {
+ "btrfs inspect-internal subvolid-resolve <subvolid> <path>",
+ "Get file system paths for the given subvolume ID.",
+ NULL
+};
+
+static int cmd_inspect_subvolid_resolve(int argc, char **argv)
+{
+ int ret;
+ int fd = -1;
+ u64 subvol_id;
+ char path[PATH_MAX];
+ DIR *dirstream = NULL;
+
+ if (check_argc_exact(argc, 3))
+ usage(cmd_inspect_subvolid_resolve_usage);
+
+ fd = btrfs_open_dir(argv[2], &dirstream, 1);
+ if (fd < 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ subvol_id = arg_strtou64(argv[1]);
+ ret = btrfs_subvolid_resolve(fd, path, sizeof(path), subvol_id);
+
+ if (ret) {
+ fprintf(stderr,
+ "%s: btrfs_subvolid_resolve(subvol_id %llu) failed with ret=%d\n",
+ argv[0], (unsigned long long)subvol_id, ret);
+ goto out;
+ }
+
+ path[PATH_MAX - 1] = '\0';
+ printf("%s\n", path);
+
+out:
+ close_file_or_dir(fd, dirstream);
+ return ret ? 1 : 0;
+}
+
+static const char* const cmd_inspect_rootid_usage[] = {
+ "btrfs inspect-internal rootid <path>",
+ "Get tree ID of the containing subvolume of path.",
+ NULL
+};
+
+static int cmd_inspect_rootid(int argc, char **argv)
+{
+ int ret;
+ int fd = -1;
+ u64 rootid;
+ DIR *dirstream = NULL;
+
+ if (check_argc_exact(argc, 2))
+ usage(cmd_inspect_rootid_usage);
+
+ fd = btrfs_open_dir(argv[1], &dirstream, 1);
+ if (fd < 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ ret = lookup_ino_rootid(fd, &rootid);
+ if (ret) {
+ fprintf(stderr, "%s: rootid failed with ret=%d\n",
+ argv[0], ret);
+ goto out;
+ }
+
+ printf("%llu\n", (unsigned long long)rootid);
+out:
+ close_file_or_dir(fd, dirstream);
+
+ return !!ret;
+}
+
+static const char* const cmd_inspect_min_dev_size_usage[] = {
+ "btrfs inspect-internal min-dev-size [options] <path>",
+ "Get the minimum size the device can be shrunk to. The",
+ "device id 1 is used by default.",
+ "--id DEVID specify the device id to query",
+ NULL
+};
+
+struct dev_extent_elem {
+ u64 start;
+ /* inclusive end */
+ u64 end;
+ struct list_head list;
+};
+
+static int add_dev_extent(struct list_head *list,
+ const u64 start, const u64 end,
+ const int append)
+{
+ struct dev_extent_elem *e;
+
+ e = malloc(sizeof(*e));
+ if (!e)
+ return -ENOMEM;
+
+ e->start = start;
+ e->end = end;
+
+ if (append)
+ list_add_tail(&e->list, list);
+ else
+ list_add(&e->list, list);
+
+ return 0;
+}
+
+static void free_dev_extent_list(struct list_head *list)
+{
+ while (!list_empty(list)) {
+ struct dev_extent_elem *e;
+
+ e = list_first_entry(list, struct dev_extent_elem, list);
+ list_del(&e->list);
+ free(e);
+ }
+}
+
+static int hole_includes_sb_mirror(const u64 start, const u64 end)
+{
+ int i;
+ int ret = 0;
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ u64 bytenr = btrfs_sb_offset(i);
+
+ if (bytenr >= start && bytenr <= end) {
+ ret = 1;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void adjust_dev_min_size(struct list_head *extents,
+ struct list_head *holes,
+ u64 *min_size)
+{
+ /*
+ * If relocation of the block group of a device extent must happen (see
+ * below) scratch space is used for the relocation. So track here the
+ * size of the largest device extent that has to be relocated. We track
+ * only the largest and not the sum of the sizes of all relocated block
+ * groups because after each block group is relocated the running
+ * transaction is committed so that pinned space is released.
+ */
+ u64 scratch_space = 0;
+
+ /*
+ * List of device extents is sorted by descending order of the extent's
+ * end offset. If some extent goes beyond the computed minimum size,
+ * which initially matches the sum of the lenghts of all extents,
+ * we need to check if the extent can be relocated to an hole in the
+ * device between [0, *min_size[ (which is what the resize ioctl does).
+ */
+ while (!list_empty(extents)) {
+ struct dev_extent_elem *e;
+ struct dev_extent_elem *h;
+ int found = 0;
+ u64 extent_len;
+ u64 hole_len = 0;
+
+ e = list_first_entry(extents, struct dev_extent_elem, list);
+ if (e->end <= *min_size)
+ break;
+
+ /*
+ * Our extent goes beyond the computed *min_size. See if we can
+ * find a hole large enough to relocate it to. If not we must stop
+ * and set *min_size to the end of the extent.
+ */
+ extent_len = e->end - e->start + 1;
+ list_for_each_entry(h, holes, list) {
+ hole_len = h->end - h->start + 1;
+ if (hole_len >= extent_len) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ *min_size = e->end + 1;
+ break;
+ }
+
+ /*
+ * If the hole found contains the location for a superblock
+ * mirror, we are pessimistic and require allocating one
+ * more extent of the same size. This is because the block
+ * group could be in the worst case used by a single extent
+ * with a size >= (block_group.length - superblock.size).
+ */
+ if (hole_includes_sb_mirror(h->start,
+ h->start + extent_len - 1))
+ *min_size += extent_len;
+
+ if (hole_len > extent_len) {
+ h->start += extent_len;
+ } else {
+ list_del(&h->list);
+ free(h);
+ }
+
+ list_del(&e->list);
+ free(e);
+
+ if (extent_len > scratch_space)
+ scratch_space = extent_len;
+ }
+
+ if (scratch_space) {
+ *min_size += scratch_space;
+ /*
+ * Chunk allocation requires inserting/updating items in the
+ * chunk tree, so often this can lead to the need of allocating
+ * a new system chunk too, which has a maximum size of 32Mb.
+ */
+ *min_size += 32 * 1024 * 1024;
+ }
+}
+
+static int print_min_dev_size(int fd, u64 devid)
+{
+ int ret = 1;
+ /*
+ * Device allocations starts at 1Mb or at the value passed through the
+ * mount option alloc_start if it's bigger than 1Mb. The alloc_start
+ * option is used for debugging and testing only, and recently the
+ * possibility of deprecating/removing it has been discussed, so we
+ * ignore it here.
+ */
+ u64 min_size = 1 * 1024 * 1024ull;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ u64 last_pos = (u64)-1;
+ LIST_HEAD(extents);
+ LIST_HEAD(holes);
+
+ memset(&args, 0, sizeof(args));
+ sk->tree_id = BTRFS_DEV_TREE_OBJECTID;
+ sk->min_objectid = devid;
+ sk->max_objectid = devid;
+ sk->max_type = BTRFS_DEV_EXTENT_KEY;
+ sk->min_type = BTRFS_DEV_EXTENT_KEY;
+ sk->min_offset = 0;
+ sk->max_offset = (u64)-1;
+ sk->min_transid = 0;
+ sk->max_transid = (u64)-1;
+ sk->nr_items = 4096;
+
+ while (1) {
+ int i;
+ struct btrfs_ioctl_search_header *sh;
+ unsigned long off = 0;
+
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0) {
+ fprintf(stderr,
+ "Error invoking tree search ioctl: %s\n",
+ strerror(errno));
+ ret = 1;
+ goto out;
+ }
+
+ if (sk->nr_items == 0)
+ break;
+
+ for (i = 0; i < sk->nr_items; i++) {
+ struct btrfs_dev_extent *extent;
+ u64 len;
+
+ sh = (struct btrfs_ioctl_search_header *)(args.buf +
+ off);
+ off += sizeof(*sh);
+ extent = (struct btrfs_dev_extent *)(args.buf + off);
+ off += sh->len;
+
+ sk->min_objectid = sh->objectid;
+ sk->min_type = sh->type;
+ sk->min_offset = sh->offset + 1;
+
+ if (sh->objectid != devid ||
+ sh->type != BTRFS_DEV_EXTENT_KEY)
+ continue;
+
+ len = btrfs_stack_dev_extent_length(extent);
+ min_size += len;
+ ret = add_dev_extent(&extents, sh->offset,
+ sh->offset + len - 1, 0);
+
+ if (!ret && last_pos != (u64)-1 &&
+ last_pos != sh->offset)
+ ret = add_dev_extent(&holes, last_pos,
+ sh->offset - 1, 1);
+ if (ret) {
+ fprintf(stderr, "Error: %s\n", strerror(-ret));
+ ret = 1;
+ goto out;
+ }
+
+ last_pos = sh->offset + len;
+ }
+
+ if (sk->min_type != BTRFS_DEV_EXTENT_KEY ||
+ sk->min_objectid != devid)
+ break;
+ }
+
+ adjust_dev_min_size(&extents, &holes, &min_size);
+ printf("%llu bytes (%s)\n", min_size, pretty_size(min_size));
+ ret = 0;
+out:
+ free_dev_extent_list(&extents);
+ free_dev_extent_list(&holes);
+
+ return ret;
+}
+
+static int cmd_inspect_min_dev_size(int argc, char **argv)
+{
+ int ret;
+ int fd = -1;
+ DIR *dirstream = NULL;
+ u64 devid = 1;
+
+ while (1) {
+ int c;
+ enum { GETOPT_VAL_DEVID = 256 };
+ static const struct option long_options[] = {
+ { "id", required_argument, NULL, GETOPT_VAL_DEVID },
+ {NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long(argc, argv, "", long_options, NULL);
+ if (c < 0)
+ break;
+
+ switch (c) {
+ case GETOPT_VAL_DEVID:
+ devid = arg_strtou64(optarg);
+ break;
+ default:
+ usage(cmd_inspect_min_dev_size_usage);
+ }
+ }
+ if (check_argc_exact(argc - optind, 1))
+ usage(cmd_inspect_min_dev_size_usage);
+
+ fd = btrfs_open_dir(argv[optind], &dirstream, 1);
+ if (fd < 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ ret = print_min_dev_size(fd, devid);
+ close_file_or_dir(fd, dirstream);
+out:
+ return !!ret;
+}
+
+static const char inspect_cmd_group_info[] =
+"query various internal information";
+
+const struct cmd_group inspect_cmd_group = {
+ inspect_cmd_group_usage, inspect_cmd_group_info, {
+ { "inode-resolve", cmd_inspect_inode_resolve,
+ cmd_inspect_inode_resolve_usage, NULL, 0 },
+ { "logical-resolve", cmd_inspect_logical_resolve,
+ cmd_inspect_logical_resolve_usage, NULL, 0 },
+ { "subvolid-resolve", cmd_inspect_subvolid_resolve,
+ cmd_inspect_subvolid_resolve_usage, NULL, 0 },
+ { "rootid", cmd_inspect_rootid, cmd_inspect_rootid_usage, NULL,
+ 0 },
+ { "min-dev-size", cmd_inspect_min_dev_size,
+ cmd_inspect_min_dev_size_usage, NULL, 0 },
+ NULL_CMD_STRUCT
+ }
+};
+
+int cmd_inspect(int argc, char **argv)
+{
+ return handle_command_group(&inspect_cmd_group, argc, argv);
+}
diff --git a/cmds-property.c b/cmds-property.c
new file mode 100644
index 00000000..b7b24841
--- /dev/null
+++ b/cmds-property.c
@@ -0,0 +1,478 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+
+#include "commands.h"
+#include "props.h"
+#include "ctree.h"
+#include "utils.h"
+
+static const char * const property_cmd_group_usage[] = {
+ "btrfs property get/set/list [-t <type>] <object> [<name>] [value]",
+ NULL
+};
+
+static int parse_prop(const char *arg, const struct prop_handler *props,
+ const struct prop_handler **prop_ret)
+{
+ const struct prop_handler *prop = props;
+
+ for (; prop->name; prop++) {
+ if (!strcmp(prop->name, arg)) {
+ *prop_ret = prop;
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+static int get_fsid(const char *path, u8 *fsid, int silent)
+{
+ int ret;
+ int fd;
+ struct btrfs_ioctl_fs_info_args args;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ ret = -errno;
+ if (!silent)
+ error("failed to open %s: %s", path,
+ strerror(-ret));
+ goto out;
+ }
+
+ ret = ioctl(fd, BTRFS_IOC_FS_INFO, &args);
+ if (ret < 0) {
+ ret = -errno;
+ goto out;
+ }
+
+ memcpy(fsid, args.fsid, BTRFS_FSID_SIZE);
+ ret = 0;
+
+out:
+ if (fd != -1)
+ close(fd);
+ return ret;
+}
+
+static int check_btrfs_object(const char *object)
+{
+ int ret;
+ u8 fsid[BTRFS_FSID_SIZE];
+
+ ret = get_fsid(object, fsid, 0);
+ if (ret < 0)
+ ret = 0;
+ else
+ ret = 1;
+ return ret;
+}
+
+static int check_is_root(const char *object)
+{
+ int ret;
+ u8 fsid[BTRFS_FSID_SIZE];
+ u8 fsid2[BTRFS_FSID_SIZE];
+ char *tmp = NULL;
+ char *rp;
+
+ rp = realpath(object, NULL);
+ if (!rp) {
+ ret = -errno;
+ goto out;
+ }
+ if (!strcmp(rp, "/")) {
+ ret = 0;
+ goto out;
+ }
+
+ tmp = malloc(strlen(object) + 5);
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ strcpy(tmp, object);
+ if (tmp[strlen(tmp) - 1] != '/')
+ strcat(tmp, "/");
+ strcat(tmp, "..");
+
+ ret = get_fsid(object, fsid, 0);
+ if (ret < 0) {
+ error("get_fsid for %s failed: %s", object, strerror(-ret));
+ goto out;
+ }
+
+ ret = get_fsid(tmp, fsid2, 1);
+ if (ret == -ENOTTY) {
+ ret = 0;
+ goto out;
+ } else if (ret == -ENOTDIR) {
+ ret = 1;
+ goto out;
+ } else if (ret < 0) {
+ error("get_fsid for %s failed: %s", tmp, strerror(-ret));
+ goto out;
+ }
+
+ if (memcmp(fsid, fsid2, BTRFS_FSID_SIZE)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = 1;
+
+out:
+ free(tmp);
+ free(rp);
+ return ret;
+}
+
+static int count_bits(int v)
+{
+ unsigned int tmp = (unsigned int)v;
+ int cnt = 0;
+
+ while (tmp) {
+ if (tmp & 1)
+ cnt++;
+ tmp >>= 1;
+ }
+ return cnt;
+}
+
+static int autodetect_object_types(const char *object, int *types_out)
+{
+ int ret;
+ int is_btrfs_object;
+ int types = 0;
+ struct stat st;
+
+ is_btrfs_object = check_btrfs_object(object);
+
+ ret = lstat(object, &st);
+ if (ret < 0) {
+ ret = -errno;
+ goto out;
+ }
+
+ if (is_btrfs_object) {
+ types |= prop_object_inode;
+ if (st.st_ino == BTRFS_FIRST_FREE_OBJECTID)
+ types |= prop_object_subvol;
+
+ ret = check_is_root(object);
+ if (ret < 0)
+ goto out;
+ if (!ret)
+ types |= prop_object_root;
+ }
+
+ if (S_ISBLK(st.st_mode))
+ types |= prop_object_dev;
+
+ ret = 0;
+ *types_out = types;
+
+out:
+ return ret;
+}
+
+static int print_prop_help(const struct prop_handler *prop)
+{
+ fprintf(stdout, "%-20s%s\n", prop->name, prop->desc);
+ return 0;
+}
+
+static int dump_prop(const struct prop_handler *prop,
+ const char *object,
+ int types,
+ int type,
+ int name_and_help)
+{
+ int ret = 0;
+
+ if ((types & type) && (prop->types & type)) {
+ if (!name_and_help)
+ ret = prop->handler(type, object, prop->name, NULL);
+ else
+ ret = print_prop_help(prop);
+ }
+ return ret;
+}
+
+static int dump_props(int types, const char *object, int name_and_help)
+{
+ int ret;
+ int i;
+ int j;
+ const struct prop_handler *prop;
+
+ for (i = 0; prop_handlers[i].name; i++) {
+ prop = &prop_handlers[i];
+ for (j = 1; j < __prop_object_max; j <<= 1) {
+ ret = dump_prop(prop, object, types, j, name_and_help);
+ if (ret < 0) {
+ ret = 50;
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int setget_prop(int types, const char *object,
+ const char *name, const char *value)
+{
+ int ret;
+ const struct prop_handler *prop = NULL;
+
+ ret = parse_prop(name, prop_handlers, &prop);
+ if (ret == -1) {
+ error("unknown property: %s", name);
+ ret = 40;
+ goto out;
+ }
+
+ types &= prop->types;
+ if (!types) {
+ error("object is not compatible with property: %s", prop->name);
+ ret = 47;
+ goto out;
+ }
+
+ if (count_bits(types) > 1) {
+ error("type of object is ambiguous, please use option -t");
+ ret = 48;
+ goto out;
+ }
+
+ if (value && prop->read_only) {
+ error("property is read-only property: %s",
+ prop->name);
+ ret = 51;
+ goto out;
+ }
+
+ ret = prop->handler(types, object, name, value);
+
+ if (ret < 0)
+ ret = 50;
+ else
+ ret = 0;
+
+out:
+ return ret;
+
+}
+
+static void parse_args(int argc, char **argv,
+ const char * const *usage_str,
+ int *types, char **object,
+ char **name, char **value)
+{
+ int ret;
+ char *type_str = NULL;
+
+ optind = 1;
+ while (1) {
+ int c = getopt(argc, argv, "t:");
+ if (c < 0)
+ break;
+
+ switch (c) {
+ case 't':
+ type_str = optarg;
+ break;
+ default:
+ usage(usage_str);
+ }
+ }
+
+ *types = 0;
+ if (type_str) {
+ if (!strcmp(type_str, "s") || !strcmp(type_str, "subvol")) {
+ *types = prop_object_subvol;
+ } else if (!strcmp(type_str, "f") ||
+ !strcmp(type_str, "filesystem")) {
+ *types = prop_object_root;
+ } else if (!strcmp(type_str, "i") ||
+ !strcmp(type_str, "inode")) {
+ *types = prop_object_inode;
+ } else if (!strcmp(type_str, "d") ||
+ !strcmp(type_str, "device")) {
+ *types = prop_object_dev;
+ } else {
+ error("invalid object type: %s", type_str);
+ usage(usage_str);
+ }
+ }
+
+ if (object && optind < argc)
+ *object = argv[optind++];
+ if (name && optind < argc)
+ *name = argv[optind++];
+ if (value && optind < argc)
+ *value = argv[optind++];
+
+ if (optind != argc) {
+ error("unexpected agruments found");
+ usage(usage_str);
+ }
+
+ if (!*types && object && *object) {
+ ret = autodetect_object_types(*object, types);
+ if (ret < 0) {
+ error("failed to detect object type: %s",
+ strerror(-ret));
+ usage(usage_str);
+ }
+ if (!*types) {
+ error("object is not a btrfs object: %s", *object);
+ usage(usage_str);
+ }
+ }
+}
+
+static const char * const cmd_property_get_usage[] = {
+ "btrfs property get [-t <type>] <object> [<name>]",
+ "Gets a property from a btrfs object.",
+ "If no name is specified, all properties for the given object are",
+ "printed.",
+ "A filesystem object can be a the filesystem itself, a subvolume,",
+ "an inode or a device. The '-t <type>' option can be used to explicitly",
+ "specify what type of object you meant. This is only needed when a",
+ "property could be set for more then one object type. Possible types",
+ "are s[ubvol], f[ilesystem], i[node] and d[evice].",
+ NULL
+};
+
+static int cmd_property_get(int argc, char **argv)
+{
+ int ret;
+ char *object = NULL;
+ char *name = NULL;
+ int types = 0;
+
+ if (check_argc_min(argc, 2) || check_argc_max(argc, 5))
+ usage(cmd_property_get_usage);
+
+ parse_args(argc, argv, cmd_property_get_usage, &types, &object, &name,
+ NULL);
+ if (!object) {
+ error("invalid arguments");
+ usage(cmd_property_get_usage);
+ }
+
+ if (name)
+ ret = setget_prop(types, object, name, NULL);
+ else
+ ret = dump_props(types, object, 0);
+
+ return ret;
+}
+
+static const char * const cmd_property_set_usage[] = {
+ "btrfs property set [-t <type>] <object> <name> <value>",
+ "Sets a property on a btrfs object.",
+ "Please see the help of 'btrfs property get' for a description of",
+ "objects and object types.",
+ NULL
+};
+
+static int cmd_property_set(int argc, char **argv)
+{
+ int ret;
+ char *object = NULL;
+ char *name = NULL;
+ char *value = NULL;
+ int types = 0;
+
+ if (check_argc_min(argc, 4) || check_argc_max(argc, 6))
+ usage(cmd_property_set_usage);
+
+ parse_args(argc, argv, cmd_property_set_usage, &types,
+ &object, &name, &value);
+ if (!object || !name || !value) {
+ error("invalid arguments");
+ usage(cmd_property_set_usage);
+ }
+
+ ret = setget_prop(types, object, name, value);
+
+ return ret;
+}
+
+static const char * const cmd_property_list_usage[] = {
+ "btrfs property list [-t <type>] <object>",
+ "Lists available properties with their descriptions for the given object.",
+ "Please see the help of 'btrfs property get' for a description of",
+ "objects and object types.",
+ NULL
+};
+
+static int cmd_property_list(int argc, char **argv)
+{
+ int ret;
+ char *object = NULL;
+ int types = 0;
+
+ if (check_argc_min(argc, 2) || check_argc_max(argc, 4))
+ usage(cmd_property_list_usage);
+
+ parse_args(argc, argv, cmd_property_list_usage,
+ &types, &object, NULL, NULL);
+ if (!object) {
+ error("invalid arguments");
+ usage(cmd_property_list_usage);
+ }
+
+ ret = dump_props(types, object, 1);
+
+ return ret;
+}
+
+static const char property_cmd_group_info[] =
+"modify properties of filesystem objects";
+
+const struct cmd_group property_cmd_group = {
+ property_cmd_group_usage, property_cmd_group_info, {
+ { "get", cmd_property_get,
+ cmd_property_get_usage, NULL, 0 },
+ { "set", cmd_property_set,
+ cmd_property_set_usage, NULL, 0 },
+ { "list", cmd_property_list,
+ cmd_property_list_usage, NULL, 0 },
+ NULL_CMD_STRUCT
+ }
+};
+
+int cmd_property(int argc, char **argv)
+{
+ return handle_command_group(&property_cmd_group, argc, argv);
+}
diff --git a/cmds-qgroup.c b/cmds-qgroup.c
new file mode 100644
index 00000000..db5ee21a
--- /dev/null
+++ b/cmds-qgroup.c
@@ -0,0 +1,492 @@
+/*
+ * Copyright (C) 2012 STRATO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <getopt.h>
+
+#include "ctree.h"
+#include "ioctl.h"
+
+#include "commands.h"
+#include "qgroup.h"
+#include "utils.h"
+
+static const char * const qgroup_cmd_group_usage[] = {
+ "btrfs qgroup <command> [options] <path>",
+ NULL
+};
+
+static int qgroup_assign(int assign, int argc, char **argv)
+{
+ int ret = 0;
+ int fd;
+ int rescan = 0;
+ char *path;
+ struct btrfs_ioctl_qgroup_assign_args args;
+ DIR *dirstream = NULL;
+
+ while (1) {
+ enum { GETOPT_VAL_RESCAN = 256 };
+ static const struct option long_options[] = {
+ { "rescan", no_argument, NULL, GETOPT_VAL_RESCAN },
+ { NULL, 0, NULL, 0 }
+ };
+ int c = getopt_long(argc, argv, "", long_options, NULL);
+
+ if (c < 0)
+ break;
+ switch (c) {
+ case GETOPT_VAL_RESCAN:
+ rescan = 1;
+ break;
+ default:
+ /* Usage printed by the caller */
+ return -1;
+ }
+ }
+
+ if (check_argc_exact(argc - optind, 3))
+ return -1;
+
+ memset(&args, 0, sizeof(args));
+ args.assign = assign;
+ args.src = parse_qgroupid(argv[optind]);
+ args.dst = parse_qgroupid(argv[optind + 1]);
+
+ path = argv[optind + 2];
+
+ /*
+ * FIXME src should accept subvol path
+ */
+ if (btrfs_qgroup_level(args.src) >= btrfs_qgroup_level(args.dst)) {
+ error("bad relation requested: %s", path);
+ return 1;
+ }
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ ret = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args);
+ if (ret < 0) {
+ error("unable to assign quota group: %s", strerror(errno));
+ close_file_or_dir(fd, dirstream);
+ return 1;
+ }
+
+ /*
+ * If ret > 0, it means assign caused qgroup data inconsistent state.
+ * Schedule a quota rescan if requested.
+ *
+ * The return value change only happens in newer kernel. But will not
+ * cause problem since old kernel has a bug that will never clear
+ * INCONSISTENT bit.
+ */
+ if (ret > 0) {
+ if (rescan) {
+ struct btrfs_ioctl_quota_rescan_args qargs;
+
+ printf("Quota data changed, rescan scheduled\n");
+ memset(&qargs, 0, sizeof(qargs));
+ ret = ioctl(fd, BTRFS_IOC_QUOTA_RESCAN, &qargs);
+ if (ret < 0)
+ error("quota rescan failed: %s",
+ strerror(errno));
+ } else {
+ warning("quotas may be inconsistent, rescan needed");
+ }
+ }
+ close_file_or_dir(fd, dirstream);
+ return ret;
+}
+
+static int qgroup_create(int create, int argc, char **argv)
+{
+ int ret = 0;
+ int fd;
+ int e;
+ char *path = argv[2];
+ struct btrfs_ioctl_qgroup_create_args args;
+ DIR *dirstream = NULL;
+
+ if (check_argc_exact(argc, 3))
+ return -1;
+
+ memset(&args, 0, sizeof(args));
+ args.create = create;
+ args.qgroupid = parse_qgroupid(argv[1]);
+
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ ret = ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args);
+ e = errno;
+ close_file_or_dir(fd, dirstream);
+ if (ret < 0) {
+ error("unable to %s quota group: %s",
+ create ? "create":"destroy", strerror(e));
+ return 1;
+ }
+ return 0;
+}
+
+static int parse_limit(const char *p, unsigned long long *s)
+{
+ char *endptr;
+ unsigned long long size;
+ unsigned long long CLEAR_VALUE = -1;
+
+ if (strcasecmp(p, "none") == 0) {
+ *s = CLEAR_VALUE;
+ return 1;
+ }
+
+ if (p[0] == '-')
+ return 0;
+
+ size = strtoull(p, &endptr, 10);
+ if (p == endptr)
+ return 0;
+
+ switch (*endptr) {
+ case 'T':
+ case 't':
+ size *= 1024;
+ /* fallthrough */
+ case 'G':
+ case 'g':
+ size *= 1024;
+ /* fallthrough */
+ case 'M':
+ case 'm':
+ size *= 1024;
+ /* fallthrough */
+ case 'K':
+ case 'k':
+ size *= 1024;
+ ++endptr;
+ break;
+ case 0:
+ break;
+ default:
+ return 0;
+ }
+
+ if (*endptr)
+ return 0;
+
+ *s = size;
+
+ return 1;
+}
+
+static const char * const cmd_qgroup_assign_usage[] = {
+ "btrfs qgroup assign [options] <src> <dst> <path>",
+ "Assign SRC as the child qgroup of DST",
+ "",
+ "--rescan schedule qutoa rescan if needed",
+ "--no-rescan ",
+ NULL
+};
+
+static int cmd_qgroup_assign(int argc, char **argv)
+{
+ int ret = qgroup_assign(1, argc, argv);
+ if (ret < 0)
+ usage(cmd_qgroup_assign_usage);
+ return ret;
+}
+
+static const char * const cmd_qgroup_remove_usage[] = {
+ "btrfs qgroup remove <src> <dst> <path>",
+ "Remove a child qgroup SRC from DST.",
+ NULL
+};
+
+static int cmd_qgroup_remove(int argc, char **argv)
+{
+ int ret = qgroup_assign(0, argc, argv);
+ if (ret < 0)
+ usage(cmd_qgroup_remove_usage);
+ return ret;
+}
+
+static const char * const cmd_qgroup_create_usage[] = {
+ "btrfs qgroup create <qgroupid> <path>",
+ "Create a subvolume quota group.",
+ NULL
+};
+
+static int cmd_qgroup_create(int argc, char **argv)
+{
+ int ret = qgroup_create(1, argc, argv);
+ if (ret < 0)
+ usage(cmd_qgroup_create_usage);
+ return ret;
+}
+
+static const char * const cmd_qgroup_destroy_usage[] = {
+ "btrfs qgroup destroy <qgroupid> <path>",
+ "Destroy a quota group.",
+ NULL
+};
+
+static int cmd_qgroup_destroy(int argc, char **argv)
+{
+ int ret = qgroup_create(0, argc, argv);
+ if (ret < 0)
+ usage(cmd_qgroup_destroy_usage);
+ return ret;
+}
+
+static const char * const cmd_qgroup_show_usage[] = {
+ "btrfs qgroup show -pcreFf "
+ "[--sort=qgroupid,rfer,excl,max_rfer,max_excl] <path>",
+ "Show subvolume quota groups.",
+ "-p print parent qgroup id",
+ "-c print child qgroup id",
+ "-r print limit of referenced size of qgroup",
+ "-e print limit of exclusive size of qgroup",
+ "-F list all qgroups which impact the given path",
+ " (including ancestral qgroups)",
+ "-f list all qgroups which impact the given path",
+ " (excluding ancestral qgroups)",
+ HELPINFO_UNITS_LONG,
+ "--sort=qgroupid,rfer,excl,max_rfer,max_excl",
+ " list qgroups sorted by specified items",
+ " you can use '+' or '-' in front of each item.",
+ " (+:ascending, -:descending, ascending default)",
+ NULL
+};
+
+static int cmd_qgroup_show(int argc, char **argv)
+{
+ char *path;
+ int ret = 0;
+ int fd;
+ int e;
+ DIR *dirstream = NULL;
+ u64 qgroupid;
+ int filter_flag = 0;
+ unsigned unit_mode;
+
+ struct btrfs_qgroup_comparer_set *comparer_set;
+ struct btrfs_qgroup_filter_set *filter_set;
+ filter_set = btrfs_qgroup_alloc_filter_set();
+ comparer_set = btrfs_qgroup_alloc_comparer_set();
+
+ unit_mode = get_unit_mode_from_arg(&argc, argv, 0);
+
+ optind = 1;
+ while (1) {
+ int c;
+ static const struct option long_options[] = {
+ {"sort", required_argument, NULL, 'S'},
+ { NULL, 0, NULL, 0 }
+ };
+
+ c = getopt_long(argc, argv, "pcreFf", long_options, NULL);
+ if (c < 0)
+ break;
+ switch (c) {
+ case 'p':
+ btrfs_qgroup_setup_print_column(
+ BTRFS_QGROUP_PARENT);
+ break;
+ case 'c':
+ btrfs_qgroup_setup_print_column(
+ BTRFS_QGROUP_CHILD);
+ break;
+ case 'r':
+ btrfs_qgroup_setup_print_column(
+ BTRFS_QGROUP_MAX_RFER);
+ break;
+ case 'e':
+ btrfs_qgroup_setup_print_column(
+ BTRFS_QGROUP_MAX_EXCL);
+ break;
+ case 'F':
+ filter_flag |= 0x1;
+ break;
+ case 'f':
+ filter_flag |= 0x2;
+ break;
+ case 'S':
+ ret = btrfs_qgroup_parse_sort_string(optarg,
+ &comparer_set);
+ if (ret)
+ usage(cmd_qgroup_show_usage);
+ break;
+ default:
+ usage(cmd_qgroup_show_usage);
+ }
+ }
+ btrfs_qgroup_setup_units(unit_mode);
+
+ if (check_argc_exact(argc - optind, 1))
+ usage(cmd_qgroup_show_usage);
+
+ path = argv[optind];
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0) {
+ btrfs_qgroup_free_filter_set(filter_set);
+ btrfs_qgroup_free_comparer_set(comparer_set);
+ return 1;
+ }
+
+ if (filter_flag) {
+ qgroupid = btrfs_get_path_rootid(fd);
+ if (filter_flag & 0x1)
+ btrfs_qgroup_setup_filter(&filter_set,
+ BTRFS_QGROUP_FILTER_ALL_PARENT,
+ qgroupid);
+ if (filter_flag & 0x2)
+ btrfs_qgroup_setup_filter(&filter_set,
+ BTRFS_QGROUP_FILTER_PARENT,
+ qgroupid);
+ }
+ ret = btrfs_show_qgroups(fd, filter_set, comparer_set);
+ e = errno;
+ close_file_or_dir(fd, dirstream);
+ if (ret < 0)
+ error("can't list qgroups: %s", strerror(e));
+
+ return !!ret;
+}
+
+static const char * const cmd_qgroup_limit_usage[] = {
+ "btrfs qgroup limit [options] <size>|none [<qgroupid>] <path>",
+ "Set the limits a subvolume quota group.",
+ "",
+ "-c limit amount of data after compression. This is the default,",
+ " it is currently not possible to turn off this option.",
+ "-e limit space exclusively assigned to this qgroup",
+ NULL
+};
+
+static int cmd_qgroup_limit(int argc, char **argv)
+{
+ int ret = 0;
+ int fd;
+ int e;
+ char *path = NULL;
+ struct btrfs_ioctl_qgroup_limit_args args;
+ unsigned long long size;
+ int compressed = 0;
+ int exclusive = 0;
+ DIR *dirstream = NULL;
+
+ optind = 1;
+ while (1) {
+ int c = getopt(argc, argv, "ce");
+ if (c < 0)
+ break;
+ switch (c) {
+ case 'c':
+ compressed = 1;
+ break;
+ case 'e':
+ exclusive = 1;
+ break;
+ default:
+ usage(cmd_qgroup_limit_usage);
+ }
+ }
+
+ if (check_argc_min(argc - optind, 2))
+ usage(cmd_qgroup_limit_usage);
+
+ if (!parse_limit(argv[optind], &size)) {
+ error("invalid size argument: %s", argv[optind]);
+ return 1;
+ }
+
+ memset(&args, 0, sizeof(args));
+ if (compressed)
+ args.lim.flags |= BTRFS_QGROUP_LIMIT_RFER_CMPR |
+ BTRFS_QGROUP_LIMIT_EXCL_CMPR;
+ if (exclusive) {
+ args.lim.flags |= BTRFS_QGROUP_LIMIT_MAX_EXCL;
+ args.lim.max_exclusive = size;
+ } else {
+ args.lim.flags |= BTRFS_QGROUP_LIMIT_MAX_RFER;
+ args.lim.max_referenced = size;
+ }
+
+ if (argc - optind == 2) {
+ args.qgroupid = 0;
+ path = argv[optind + 1];
+ ret = test_issubvolume(path);
+ if (ret < 0) {
+ error("cannot access '%s': %s", path, strerror(-ret));
+ return 1;
+ }
+ if (!ret) {
+ error("'%s' is not a subvolume", path);
+ return 1;
+ }
+ /*
+ * keep qgroupid at 0, this indicates that the subvolume the
+ * fd refers to is to be limited
+ */
+ } else if (argc - optind == 3) {
+ args.qgroupid = parse_qgroupid(argv[optind + 1]);
+ path = argv[optind + 2];
+ } else
+ usage(cmd_qgroup_limit_usage);
+
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ ret = ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args);
+ e = errno;
+ close_file_or_dir(fd, dirstream);
+ if (ret < 0) {
+ error("unable to limit requested quota group: %s", strerror(e));
+ return 1;
+ }
+ return 0;
+}
+
+static const char qgroup_cmd_group_info[] =
+"manage quota groups";
+
+const struct cmd_group qgroup_cmd_group = {
+ qgroup_cmd_group_usage, qgroup_cmd_group_info, {
+ { "assign", cmd_qgroup_assign, cmd_qgroup_assign_usage,
+ NULL, 0 },
+ { "remove", cmd_qgroup_remove, cmd_qgroup_remove_usage,
+ NULL, 0 },
+ { "create", cmd_qgroup_create, cmd_qgroup_create_usage,
+ NULL, 0 },
+ { "destroy", cmd_qgroup_destroy, cmd_qgroup_destroy_usage,
+ NULL, 0 },
+ { "show", cmd_qgroup_show, cmd_qgroup_show_usage,
+ NULL, 0 },
+ { "limit", cmd_qgroup_limit, cmd_qgroup_limit_usage,
+ NULL, 0 },
+ NULL_CMD_STRUCT
+ }
+};
+
+int cmd_qgroup(int argc, char **argv)
+{
+ return handle_command_group(&qgroup_cmd_group, argc, argv);
+}
diff --git a/cmds-quota.c b/cmds-quota.c
new file mode 100644
index 00000000..34b8dacf
--- /dev/null
+++ b/cmds-quota.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2012 STRATO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "ctree.h"
+#include "ioctl.h"
+
+#include "commands.h"
+#include "utils.h"
+
+static const char * const quota_cmd_group_usage[] = {
+ "btrfs quota <command> [options] <path>",
+ NULL
+};
+
+static int quota_ctl(int cmd, int argc, char **argv)
+{
+ int ret = 0;
+ int fd;
+ int e;
+ char *path = argv[1];
+ struct btrfs_ioctl_quota_ctl_args args;
+ DIR *dirstream = NULL;
+
+ if (check_argc_exact(argc, 2))
+ return -1;
+
+ memset(&args, 0, sizeof(args));
+ args.cmd = cmd;
+
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ ret = ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args);
+ e = errno;
+ close_file_or_dir(fd, dirstream);
+ if (ret < 0) {
+ error("quota command failed: %s", strerror(e));
+ return 1;
+ }
+ return 0;
+}
+
+static const char * const cmd_quota_enable_usage[] = {
+ "btrfs quota enable <path>",
+ "Enable subvolume quota support for a filesystem.",
+ "Any data already present on the filesystem will not count towards",
+ "the space usage numbers. It is recommended to enable quota for a",
+ "filesystem before writing any data to it.",
+ NULL
+};
+
+static int cmd_quota_enable(int argc, char **argv)
+{
+ int ret = quota_ctl(BTRFS_QUOTA_CTL_ENABLE, argc, argv);
+ if (ret < 0)
+ usage(cmd_quota_enable_usage);
+ return ret;
+}
+
+static const char * const cmd_quota_disable_usage[] = {
+ "btrfs quota disable <path>",
+ "Disable subvolume quota support for a filesystem.",
+ NULL
+};
+
+static int cmd_quota_disable(int argc, char **argv)
+{
+ int ret = quota_ctl(BTRFS_QUOTA_CTL_DISABLE, argc, argv);
+ if (ret < 0)
+ usage(cmd_quota_disable_usage);
+ return ret;
+}
+
+static const char * const cmd_quota_rescan_usage[] = {
+ "btrfs quota rescan [-sw] <path>",
+ "Trash all qgroup numbers and scan the metadata again with the current config.",
+ "",
+ "-s show status of a running rescan operation",
+ "-w wait for rescan operation to finish (can be already in progress)",
+ NULL
+};
+
+static int cmd_quota_rescan(int argc, char **argv)
+{
+ int ret = 0;
+ int fd;
+ int e;
+ char *path = NULL;
+ struct btrfs_ioctl_quota_rescan_args args;
+ unsigned long ioctlnum = BTRFS_IOC_QUOTA_RESCAN;
+ DIR *dirstream = NULL;
+ int wait_for_completion = 0;
+
+ optind = 1;
+ while (1) {
+ int c = getopt(argc, argv, "sw");
+ if (c < 0)
+ break;
+ switch (c) {
+ case 's':
+ ioctlnum = BTRFS_IOC_QUOTA_RESCAN_STATUS;
+ break;
+ case 'w':
+ wait_for_completion = 1;
+ break;
+ default:
+ usage(cmd_quota_rescan_usage);
+ }
+ }
+
+ if (ioctlnum != BTRFS_IOC_QUOTA_RESCAN && wait_for_completion) {
+ error("switch -w cannot be used with -s");
+ return 1;
+ }
+
+ if (check_argc_exact(argc - optind, 1))
+ usage(cmd_quota_rescan_usage);
+
+ memset(&args, 0, sizeof(args));
+
+ path = argv[optind];
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ ret = ioctl(fd, ioctlnum, &args);
+ e = errno;
+
+ if (wait_for_completion && (ret == 0 || e == EINPROGRESS)) {
+ ret = ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT, &args);
+ e = errno;
+ }
+ close_file_or_dir(fd, dirstream);
+
+ if (ioctlnum == BTRFS_IOC_QUOTA_RESCAN) {
+ if (ret < 0) {
+ error("quota rescan failed: %s", strerror(e));
+ return 1;
+ } else {
+ printf("quota rescan started\n");
+ }
+ } else {
+ if (!args.flags) {
+ printf("no rescan operation in progress\n");
+ } else {
+ printf("rescan operation running (current key %lld)\n",
+ args.progress);
+ }
+ }
+
+ return 0;
+}
+
+static const char quota_cmd_group_info[] =
+"manage filesystem quota settings";
+
+const struct cmd_group quota_cmd_group = {
+ quota_cmd_group_usage, quota_cmd_group_info, {
+ { "enable", cmd_quota_enable, cmd_quota_enable_usage, NULL, 0 },
+ { "disable", cmd_quota_disable, cmd_quota_disable_usage,
+ NULL, 0 },
+ { "rescan", cmd_quota_rescan, cmd_quota_rescan_usage, NULL, 0 },
+ NULL_CMD_STRUCT
+ }
+};
+
+int cmd_quota(int argc, char **argv)
+{
+ return handle_command_group(&quota_cmd_group, argc, argv);
+}
diff --git a/cmds-receive.c b/cmds-receive.c
new file mode 100644
index 00000000..cbb16421
--- /dev/null
+++ b/cmds-receive.c
@@ -0,0 +1,1306 @@
+/*
+ * Copyright (C) 2012 Alexander Block. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "kerncompat.h"
+#include "androidcompat.h"
+
+#include <unistd.h>
+#include <stdint.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <math.h>
+#include <ftw.h>
+#include <sys/wait.h>
+#include <assert.h>
+#include <getopt.h>
+#include <limits.h>
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <uuid/uuid.h>
+
+#include "ctree.h"
+#include "ioctl.h"
+#include "commands.h"
+#include "utils.h"
+#include "list.h"
+#include "btrfs-list.h"
+
+#include "send.h"
+#include "send-stream.h"
+#include "send-utils.h"
+
+static int g_verbose = 0;
+
+struct btrfs_receive
+{
+ int mnt_fd;
+ int dest_dir_fd;
+
+ int write_fd;
+ char write_path[PATH_MAX];
+
+ char *root_path;
+ char *dest_dir_path; /* relative to root_path */
+ char full_subvol_path[PATH_MAX];
+ char *full_root_path;
+ int dest_dir_chroot;
+
+ struct subvol_info cur_subvol;
+ /*
+ * Substitute for cur_subvol::path which is a pointer and we cannot
+ * change it to an array as it's a public API.
+ */
+ char cur_subvol_path[PATH_MAX];
+
+ struct subvol_uuid_search sus;
+
+ int honor_end_cmd;
+
+ /*
+ * Buffer to store capabilities from security.capabilities xattr,
+ * usually 20 bytes, but make same room for potentially larger
+ * encodings. Must be set only once per file, denoted by length > 0.
+ */
+ char cached_capabilities[64];
+ int cached_capabilities_len;
+};
+
+static int finish_subvol(struct btrfs_receive *r)
+{
+ int ret;
+ int subvol_fd = -1;
+ struct btrfs_ioctl_received_subvol_args rs_args;
+ char uuid_str[BTRFS_UUID_UNPARSED_SIZE];
+ u64 flags;
+
+ if (r->cur_subvol_path[0] == 0)
+ return 0;
+
+ subvol_fd = openat(r->mnt_fd, r->cur_subvol_path,
+ O_RDONLY | O_NOATIME);
+ if (subvol_fd < 0) {
+ ret = -errno;
+ error("cannot open %s: %s\n",
+ r->cur_subvol_path, strerror(-ret));
+ goto out;
+ }
+
+ memset(&rs_args, 0, sizeof(rs_args));
+ memcpy(rs_args.uuid, r->cur_subvol.received_uuid, BTRFS_UUID_SIZE);
+ rs_args.stransid = r->cur_subvol.stransid;
+
+ if (g_verbose >= 1) {
+ uuid_unparse((u8*)rs_args.uuid, uuid_str);
+ fprintf(stderr, "BTRFS_IOC_SET_RECEIVED_SUBVOL uuid=%s, "
+ "stransid=%llu\n", uuid_str, rs_args.stransid);
+ }
+
+ ret = ioctl(subvol_fd, BTRFS_IOC_SET_RECEIVED_SUBVOL, &rs_args);
+ if (ret < 0) {
+ ret = -errno;
+ error("ioctl BTRFS_IOC_SET_RECEIVED_SUBVOL failed: %s",
+ strerror(-ret));
+ goto out;
+ }
+ r->cur_subvol.rtransid = rs_args.rtransid;
+
+ ret = ioctl(subvol_fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags);
+ if (ret < 0) {
+ ret = -errno;
+ error("ioctl BTRFS_IOC_SUBVOL_GETFLAGS failed: %s",
+ strerror(-ret));
+ goto out;
+ }
+
+ flags |= BTRFS_SUBVOL_RDONLY;
+
+ ret = ioctl(subvol_fd, BTRFS_IOC_SUBVOL_SETFLAGS, &flags);
+ if (ret < 0) {
+ ret = -errno;
+ error("failed to make subvolume read only: %s",
+ strerror(-ret));
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (r->cur_subvol_path[0]) {
+ r->cur_subvol_path[0] = 0;
+ }
+ if (subvol_fd != -1)
+ close(subvol_fd);
+ return ret;
+}
+
+static int process_subvol(const char *path, const u8 *uuid, u64 ctransid,
+ void *user)
+{
+ int ret;
+ struct btrfs_receive *r = user;
+ struct btrfs_ioctl_vol_args args_v1;
+ char uuid_str[BTRFS_UUID_UNPARSED_SIZE];
+
+ ret = finish_subvol(r);
+ if (ret < 0)
+ goto out;
+
+ BUG_ON(r->cur_subvol.path);
+ BUG_ON(r->cur_subvol_path[0]);
+
+ if (*r->dest_dir_path == 0) {
+ strncpy_null(r->cur_subvol_path, path);
+ } else {
+ ret = path_cat_out(r->cur_subvol_path, r->dest_dir_path, path);
+ if (ret < 0) {
+ error("subvol: path invalid: %s\n", path);
+ goto out;
+ }
+ }
+ ret = path_cat3_out(r->full_subvol_path, r->root_path,
+ r->dest_dir_path, path);
+ if (ret < 0) {
+ error("subvol: path invalid: %s", path);
+ goto out;
+ }
+
+ fprintf(stderr, "At subvol %s\n", path);
+
+ memcpy(r->cur_subvol.received_uuid, uuid, BTRFS_UUID_SIZE);
+ r->cur_subvol.stransid = ctransid;
+
+ if (g_verbose) {
+ uuid_unparse((u8*)r->cur_subvol.received_uuid, uuid_str);
+ fprintf(stderr, "receiving subvol %s uuid=%s, stransid=%llu\n",
+ path, uuid_str,
+ r->cur_subvol.stransid);
+ }
+
+ memset(&args_v1, 0, sizeof(args_v1));
+ strncpy_null(args_v1.name, path);
+ ret = ioctl(r->dest_dir_fd, BTRFS_IOC_SUBVOL_CREATE, &args_v1);
+ if (ret < 0) {
+ ret = -errno;
+ error("creating subvolume %s failed: %s", path, strerror(-ret));
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int process_snapshot(const char *path, const u8 *uuid, u64 ctransid,
+ const u8 *parent_uuid, u64 parent_ctransid,
+ void *user)
+{
+ int ret;
+ struct btrfs_receive *r = user;
+ char uuid_str[BTRFS_UUID_UNPARSED_SIZE];
+ struct btrfs_ioctl_vol_args_v2 args_v2;
+ struct subvol_info *parent_subvol = NULL;
+
+ ret = finish_subvol(r);
+ if (ret < 0)
+ goto out;
+
+ BUG_ON(r->cur_subvol.path);
+ BUG_ON(r->cur_subvol_path[0]);
+
+ if (*r->dest_dir_path == 0) {
+ strncpy_null(r->cur_subvol_path, path);
+ } else {
+ ret = path_cat_out(r->cur_subvol_path, r->dest_dir_path, path);
+ if (ret < 0) {
+ error("snapshot: path invalid: %s", path);
+ goto out;
+ }
+ }
+ ret = path_cat3_out(r->full_subvol_path, r->root_path,
+ r->dest_dir_path, path);
+ if (ret < 0) {
+ error("snapshot: path invalid: %s", path);
+ goto out;
+ }
+
+ fprintf(stdout, "At snapshot %s\n", path);
+
+ memcpy(r->cur_subvol.received_uuid, uuid, BTRFS_UUID_SIZE);
+ r->cur_subvol.stransid = ctransid;
+
+ if (g_verbose) {
+ uuid_unparse((u8*)r->cur_subvol.received_uuid, uuid_str);
+ fprintf(stderr, "receiving snapshot %s uuid=%s, "
+ "ctransid=%llu ", path, uuid_str,
+ r->cur_subvol.stransid);
+ uuid_unparse(parent_uuid, uuid_str);
+ fprintf(stderr, "parent_uuid=%s, parent_ctransid=%llu\n",
+ uuid_str, parent_ctransid);
+ }
+
+ memset(&args_v2, 0, sizeof(args_v2));
+ strncpy_null(args_v2.name, path);
+
+ parent_subvol = subvol_uuid_search(&r->sus, 0, parent_uuid,
+ parent_ctransid, NULL, subvol_search_by_received_uuid);
+ if (!parent_subvol) {
+ parent_subvol = subvol_uuid_search(&r->sus, 0, parent_uuid,
+ parent_ctransid, NULL, subvol_search_by_uuid);
+ }
+ if (!parent_subvol) {
+ ret = -ENOENT;
+ error("cannot find parent subvolume");
+ goto out;
+ }
+
+ /*
+ * The path is resolved from the root subvol, but we could be in some
+ * subvolume under the root subvolume, so try and adjust the path to be
+ * relative to our root path.
+ */
+ if (r->full_root_path) {
+ size_t root_len;
+ size_t sub_len;
+
+ root_len = strlen(r->full_root_path);
+ sub_len = strlen(parent_subvol->path);
+
+ /* First make sure the parent subvol is actually in our path */
+ if (sub_len < root_len ||
+ strstr(parent_subvol->path, r->full_root_path) == NULL) {
+ error(
+ "parent subvol is not reachable from inside the root subvol");
+ ret = -ENOENT;
+ goto out;
+ }
+
+ if (sub_len == root_len) {
+ parent_subvol->path[0] = '/';
+ parent_subvol->path[1] = '\0';
+ } else {
+ /*
+ * root path is foo/bar
+ * subvol path is foo/bar/baz
+ *
+ * we need to have baz be the path, so we need to move
+ * the bit after foo/bar/, so path + root_len + 1, and
+ * move the part we care about, so sub_len - root_len -
+ * 1.
+ */
+ memmove(parent_subvol->path,
+ parent_subvol->path + root_len + 1,
+ sub_len - root_len - 1);
+ parent_subvol->path[sub_len - root_len - 1] = '\0';
+ }
+ }
+ /*if (rs_args.ctransid > rs_args.rtransid) {
+ if (!r->force) {
+ ret = -EINVAL;
+ fprintf(stderr, "ERROR: subvolume %s was modified after it was received.\n", r->subvol_parent_name);
+ goto out;
+ } else {
+ fprintf(stderr, "WARNING: subvolume %s was modified after it was received.\n", r->subvol_parent_name);
+ }
+ }*/
+
+ if (*parent_subvol->path == 0)
+ args_v2.fd = dup(r->mnt_fd);
+ else
+ args_v2.fd = openat(r->mnt_fd, parent_subvol->path,
+ O_RDONLY | O_NOATIME);
+ if (args_v2.fd < 0) {
+ ret = -errno;
+ if (errno != ENOENT)
+ error("cannot open %s: %s",
+ parent_subvol->path, strerror(-ret));
+ else
+ fprintf(stderr,
+ "It seems that you have changed your default "
+ "subvolume or you specify other subvolume to\n"
+ "mount btrfs, try to remount this btrfs filesystem "
+ "with fs tree, and run btrfs receive again!\n");
+ goto out;
+ }
+
+ ret = ioctl(r->dest_dir_fd, BTRFS_IOC_SNAP_CREATE_V2, &args_v2);
+ close(args_v2.fd);
+ if (ret < 0) {
+ ret = -errno;
+ error("creating snapshot %s -> %s failed: %s",
+ parent_subvol->path, path, strerror(-ret));
+ goto out;
+ }
+
+out:
+ if (parent_subvol) {
+ free(parent_subvol->path);
+ free(parent_subvol);
+ }
+ return ret;
+}
+
+static int process_mkfile(const char *path, void *user)
+{
+ int ret;
+ struct btrfs_receive *r = user;
+ char full_path[PATH_MAX];
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("mkfile: path invalid: %s", path);
+ goto out;
+ }
+
+ if (g_verbose >= 2)
+ fprintf(stderr, "mkfile %s\n", path);
+
+ ret = creat(full_path, 0600);
+ if (ret < 0) {
+ ret = -errno;
+ error("mkfile %s failed: %s", path, strerror(-ret));
+ goto out;
+ }
+ close(ret);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int process_mkdir(const char *path, void *user)
+{
+ int ret;
+ struct btrfs_receive *r = user;
+ char full_path[PATH_MAX];
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("mkdir: path invalid: %s", path);
+ goto out;
+ }
+
+ if (g_verbose >= 2)
+ fprintf(stderr, "mkdir %s\n", path);
+
+ ret = mkdir(full_path, 0700);
+ if (ret < 0) {
+ ret = -errno;
+ error("mkdir %s failed: %s", path, strerror(-ret));
+ }
+
+out:
+ return ret;
+}
+
+static int process_mknod(const char *path, u64 mode, u64 dev, void *user)
+{
+ int ret;
+ struct btrfs_receive *r = user;
+ char full_path[PATH_MAX];
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("mknod: path invalid: %s", path);
+ goto out;
+ }
+
+ if (g_verbose >= 2)
+ fprintf(stderr, "mknod %s mode=%llu, dev=%llu\n",
+ path, mode, dev);
+
+ ret = mknod(full_path, mode & S_IFMT, dev);
+ if (ret < 0) {
+ ret = -errno;
+ error("mknod %s failed: %s", path, strerror(-ret));
+ }
+
+out:
+ return ret;
+}
+
+static int process_mkfifo(const char *path, void *user)
+{
+ int ret;
+ struct btrfs_receive *r = user;
+ char full_path[PATH_MAX];
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("mkfifo: path invalid: %s", path);
+ goto out;
+ }
+
+ if (g_verbose >= 2)
+ fprintf(stderr, "mkfifo %s\n", path);
+
+ ret = mkfifo(full_path, 0600);
+ if (ret < 0) {
+ ret = -errno;
+ error("mkfifo %s failed: %s", path, strerror(-ret));
+ }
+
+out:
+ return ret;
+}
+
+static int process_mksock(const char *path, void *user)
+{
+ int ret;
+ struct btrfs_receive *r = user;
+ char full_path[PATH_MAX];
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("mksock: path invalid: %s", path);
+ goto out;
+ }
+
+ if (g_verbose >= 2)
+ fprintf(stderr, "mksock %s\n", path);
+
+ ret = mknod(full_path, 0600 | S_IFSOCK, 0);
+ if (ret < 0) {
+ ret = -errno;
+ error("mknod %s failed: %s", path, strerror(-ret));
+ }
+
+out:
+ return ret;
+}
+
+static int process_symlink(const char *path, const char *lnk, void *user)
+{
+ int ret;
+ struct btrfs_receive *r = user;
+ char full_path[PATH_MAX];
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("symlink: path invalid: %s", path);
+ goto out;
+ }
+
+ if (g_verbose >= 2)
+ fprintf(stderr, "symlink %s -> %s\n", path, lnk);
+
+ ret = symlink(lnk, full_path);
+ if (ret < 0) {
+ ret = -errno;
+ error("symlink %s -> %s failed: %s", path,
+ lnk, strerror(-ret));
+ }
+
+out:
+ return ret;
+}
+
+static int process_rename(const char *from, const char *to, void *user)
+{
+ int ret;
+ struct btrfs_receive *r = user;
+ char full_from[PATH_MAX];
+ char full_to[PATH_MAX];
+
+ ret = path_cat_out(full_from, r->full_subvol_path, from);
+ if (ret < 0) {
+ error("rename: source path invalid: %s", from);
+ goto out;
+ }
+
+ ret = path_cat_out(full_to, r->full_subvol_path, to);
+ if (ret < 0) {
+ error("rename: target path invalid: %s", to);
+ goto out;
+ }
+
+ if (g_verbose >= 2)
+ fprintf(stderr, "rename %s -> %s\n", from, to);
+
+ ret = rename(full_from, full_to);
+ if (ret < 0) {
+ ret = -errno;
+ error("rename %s -> %s failed: %s", from,
+ to, strerror(-ret));
+ }
+
+out:
+ return ret;
+}
+
+static int process_link(const char *path, const char *lnk, void *user)
+{
+ int ret;
+ struct btrfs_receive *r = user;
+ char full_path[PATH_MAX];
+ char full_link_path[PATH_MAX];
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("link: source path invalid: %s", full_path);
+ goto out;
+ }
+
+ ret = path_cat_out(full_link_path, r->full_subvol_path, lnk);
+ if (ret < 0) {
+ error("link: target path invalid: %s", full_link_path);
+ goto out;
+ }
+
+ if (g_verbose >= 2)
+ fprintf(stderr, "link %s -> %s\n", path, lnk);
+
+ ret = link(full_link_path, full_path);
+ if (ret < 0) {
+ ret = -errno;
+ error("link %s -> %s failed: %s", path, lnk, strerror(-ret));
+ }
+
+out:
+ return ret;
+}
+
+
+static int process_unlink(const char *path, void *user)
+{
+ int ret;
+ struct btrfs_receive *r = user;
+ char full_path[PATH_MAX];
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("unlink: path invalid: %s", path);
+ goto out;
+ }
+
+ if (g_verbose >= 2)
+ fprintf(stderr, "unlink %s\n", path);
+
+ ret = unlink(full_path);
+ if (ret < 0) {
+ ret = -errno;
+ error("unlink %s failed. %s", path, strerror(-ret));
+ }
+
+out:
+ return ret;
+}
+
+static int process_rmdir(const char *path, void *user)
+{
+ int ret;
+ struct btrfs_receive *r = user;
+ char full_path[PATH_MAX];
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("rmdir: path invalid: %s", path);
+ goto out;
+ }
+
+ if (g_verbose >= 2)
+ fprintf(stderr, "rmdir %s\n", path);
+
+ ret = rmdir(full_path);
+ if (ret < 0) {
+ ret = -errno;
+ error("rmdir %s failed: %s", path, strerror(-ret));
+ }
+
+out:
+ return ret;
+}
+
+static int open_inode_for_write(struct btrfs_receive *r, const char *path)
+{
+ int ret = 0;
+
+ if (r->write_fd != -1) {
+ if (strcmp(r->write_path, path) == 0)
+ goto out;
+ close(r->write_fd);
+ r->write_fd = -1;
+ }
+
+ r->write_fd = open(path, O_RDWR);
+ if (r->write_fd < 0) {
+ ret = -errno;
+ error("cannont open %s: %s", path, strerror(-ret));
+ goto out;
+ }
+ strncpy_null(r->write_path, path);
+
+out:
+ return ret;
+}
+
+static void close_inode_for_write(struct btrfs_receive *r)
+{
+ if(r->write_fd == -1)
+ return;
+
+ close(r->write_fd);
+ r->write_fd = -1;
+ r->write_path[0] = 0;
+}
+
+static int process_write(const char *path, const void *data, u64 offset,
+ u64 len, void *user)
+{
+ int ret = 0;
+ struct btrfs_receive *r = user;
+ char full_path[PATH_MAX];
+ u64 pos = 0;
+ int w;
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("write: path invalid: %s", path);
+ goto out;
+ }
+
+ ret = open_inode_for_write(r, full_path);
+ if (ret < 0)
+ goto out;
+
+ while (pos < len) {
+ w = pwrite(r->write_fd, (char*)data + pos, len - pos,
+ offset + pos);
+ if (w < 0) {
+ ret = -errno;
+ error("writing to %s failed: %s\n",
+ path, strerror(-ret));
+ goto out;
+ }
+ pos += w;
+ }
+
+out:
+ return ret;
+}
+
+static int process_clone(const char *path, u64 offset, u64 len,
+ const u8 *clone_uuid, u64 clone_ctransid,
+ const char *clone_path, u64 clone_offset,
+ void *user)
+{
+ int ret;
+ struct btrfs_receive *r = user;
+ struct btrfs_ioctl_clone_range_args clone_args;
+ struct subvol_info *si = NULL;
+ char full_path[PATH_MAX];
+ char *subvol_path = NULL;
+ char full_clone_path[PATH_MAX];
+ int clone_fd = -1;
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("clone: source path invalid: %s", path);
+ goto out;
+ }
+
+ ret = open_inode_for_write(r, full_path);
+ if (ret < 0)
+ goto out;
+
+ si = subvol_uuid_search(&r->sus, 0, clone_uuid, clone_ctransid, NULL,
+ subvol_search_by_received_uuid);
+ if (!si) {
+ if (memcmp(clone_uuid, r->cur_subvol.received_uuid,
+ BTRFS_UUID_SIZE) == 0) {
+ /* TODO check generation of extent */
+ subvol_path = strdup(r->cur_subvol_path);
+ } else {
+ ret = -ENOENT;
+ error("clone: did not find source subvol");
+ goto out;
+ }
+ } else {
+ /*if (rs_args.ctransid > rs_args.rtransid) {
+ if (!r->force) {
+ ret = -EINVAL;
+ fprintf(stderr, "ERROR: subvolume %s was "
+ "modified after it was "
+ "received.\n",
+ r->subvol_parent_name);
+ goto out;
+ } else {
+ fprintf(stderr, "WARNING: subvolume %s was "
+ "modified after it was "
+ "received.\n",
+ r->subvol_parent_name);
+ }
+ }*/
+ subvol_path = strdup(si->path);
+ }
+
+ ret = path_cat_out(full_clone_path, subvol_path, clone_path);
+ if (ret < 0) {
+ error("clone: target path invalid: %s", clone_path);
+ goto out;
+ }
+
+ clone_fd = openat(r->mnt_fd, full_clone_path, O_RDONLY | O_NOATIME);
+ if (clone_fd < 0) {
+ ret = -errno;
+ error("cannot open %s: %s", full_clone_path, strerror(-ret));
+ goto out;
+ }
+
+ clone_args.src_fd = clone_fd;
+ clone_args.src_offset = clone_offset;
+ clone_args.src_length = len;
+ clone_args.dest_offset = offset;
+ ret = ioctl(r->write_fd, BTRFS_IOC_CLONE_RANGE, &clone_args);
+ if (ret < 0) {
+ ret = -errno;
+ error("failed to clone extents to %s\n%s\n",
+ path, strerror(-ret));
+ goto out;
+ }
+
+out:
+ if (si) {
+ free(si->path);
+ free(si);
+ }
+ free(subvol_path);
+ if (clone_fd != -1)
+ close(clone_fd);
+ return ret;
+}
+
+
+static int process_set_xattr(const char *path, const char *name,
+ const void *data, int len, void *user)
+{
+ int ret = 0;
+ struct btrfs_receive *r = user;
+ char full_path[PATH_MAX];
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("set_xattr: path invalid: %s", path);
+ goto out;
+ }
+
+ if (strcmp("security.capability", name) == 0) {
+ if (g_verbose >= 3)
+ fprintf(stderr, "set_xattr: cache capabilities\n");
+ if (r->cached_capabilities_len)
+ warning("capabilities set multiple times per file: %s",
+ full_path);
+ if (len > sizeof(r->cached_capabilities)) {
+ error("capabilities encoded to %d bytes, buffer too small",
+ len);
+ ret = -E2BIG;
+ goto out;
+ }
+ r->cached_capabilities_len = len;
+ memcpy(r->cached_capabilities, data, len);
+ }
+
+ if (g_verbose >= 2) {
+ fprintf(stderr, "set_xattr %s - name=%s data_len=%d "
+ "data=%.*s\n", path, name, len,
+ len, (char*)data);
+ }
+
+ ret = lsetxattr(full_path, name, data, len, 0);
+ if (ret < 0) {
+ ret = -errno;
+ error("lsetxattr %s %s=%.*s failed: %s",
+ path, name, len, (char*)data, strerror(-ret));
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int process_remove_xattr(const char *path, const char *name, void *user)
+{
+ int ret = 0;
+ struct btrfs_receive *r = user;
+ char full_path[PATH_MAX];
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("remove_xattr: path invalid: %s", path);
+ goto out;
+ }
+
+ if (g_verbose >= 2) {
+ fprintf(stderr, "remove_xattr %s - name=%s\n",
+ path, name);
+ }
+
+ ret = lremovexattr(full_path, name);
+ if (ret < 0) {
+ ret = -errno;
+ error("lremovexattr %s %s failed: %s",
+ path, name, strerror(-ret));
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int process_truncate(const char *path, u64 size, void *user)
+{
+ int ret = 0;
+ struct btrfs_receive *r = user;
+ char full_path[PATH_MAX];
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("truncate: path invalid: %s", path);
+ goto out;
+ }
+
+ if (g_verbose >= 2)
+ fprintf(stderr, "truncate %s size=%llu\n", path, size);
+
+ ret = truncate(full_path, size);
+ if (ret < 0) {
+ ret = -errno;
+ error("truncate %s failed: %s", path, strerror(-ret));
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int process_chmod(const char *path, u64 mode, void *user)
+{
+ int ret = 0;
+ struct btrfs_receive *r = user;
+ char full_path[PATH_MAX];
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("chmod: path invalid: %s", path);
+ goto out;
+ }
+
+ if (g_verbose >= 2)
+ fprintf(stderr, "chmod %s - mode=0%o\n", path, (int)mode);
+
+ ret = chmod(full_path, mode);
+ if (ret < 0) {
+ ret = -errno;
+ error("chmod %s failed: %s", path, strerror(-ret));
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int process_chown(const char *path, u64 uid, u64 gid, void *user)
+{
+ int ret = 0;
+ struct btrfs_receive *r = user;
+ char full_path[PATH_MAX];
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("chown: path invalid: %s", path);
+ goto out;
+ }
+
+ if (g_verbose >= 2)
+ fprintf(stderr, "chown %s - uid=%llu, gid=%llu\n", path,
+ uid, gid);
+
+ ret = lchown(full_path, uid, gid);
+ if (ret < 0) {
+ ret = -errno;
+ error("chown %s failed: %s", path, strerror(-ret));
+ goto out;
+ }
+
+ if (r->cached_capabilities_len) {
+ if (g_verbose >= 2)
+ fprintf(stderr, "chown: restore capabilities\n");
+ ret = lsetxattr(full_path, "security.capability",
+ r->cached_capabilities,
+ r->cached_capabilities_len, 0);
+ memset(r->cached_capabilities, 0,
+ sizeof(r->cached_capabilities));
+ r->cached_capabilities_len = 0;
+ if (ret < 0) {
+ ret = -errno;
+ error("restoring capabilities %s: %s",
+ path, strerror(-ret));
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+static int process_utimes(const char *path, struct timespec *at,
+ struct timespec *mt, struct timespec *ct,
+ void *user)
+{
+ int ret = 0;
+ struct btrfs_receive *r = user;
+ char full_path[PATH_MAX];
+ struct timespec tv[2];
+
+ ret = path_cat_out(full_path, r->full_subvol_path, path);
+ if (ret < 0) {
+ error("utimes: path invalid: %s", path);
+ goto out;
+ }
+
+ if (g_verbose >= 2)
+ fprintf(stderr, "utimes %s\n", path);
+
+ tv[0] = *at;
+ tv[1] = *mt;
+ ret = utimensat(AT_FDCWD, full_path, tv, AT_SYMLINK_NOFOLLOW);
+ if (ret < 0) {
+ ret = -errno;
+ error("utimes %s failed: %s",
+ path, strerror(-ret));
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int process_update_extent(const char *path, u64 offset, u64 len,
+ void *user)
+{
+ if (g_verbose >= 2)
+ fprintf(stderr, "update_extent %s: offset=%llu, len=%llu\n",
+ path, (unsigned long long)offset,
+ (unsigned long long)len);
+
+ /*
+ * Sent with BTRFS_SEND_FLAG_NO_FILE_DATA, nothing to do.
+ */
+
+ return 0;
+}
+
+static struct btrfs_send_ops send_ops = {
+ .subvol = process_subvol,
+ .snapshot = process_snapshot,
+ .mkfile = process_mkfile,
+ .mkdir = process_mkdir,
+ .mknod = process_mknod,
+ .mkfifo = process_mkfifo,
+ .mksock = process_mksock,
+ .symlink = process_symlink,
+ .rename = process_rename,
+ .link = process_link,
+ .unlink = process_unlink,
+ .rmdir = process_rmdir,
+ .write = process_write,
+ .clone = process_clone,
+ .set_xattr = process_set_xattr,
+ .remove_xattr = process_remove_xattr,
+ .truncate = process_truncate,
+ .chmod = process_chmod,
+ .chown = process_chown,
+ .utimes = process_utimes,
+ .update_extent = process_update_extent,
+};
+
+static int do_receive(struct btrfs_receive *r, const char *tomnt,
+ char *realmnt, int r_fd, u64 max_errors)
+{
+ u64 subvol_id;
+ int ret;
+ char *dest_dir_full_path;
+ char root_subvol_path[PATH_MAX];
+ int end = 0;
+
+ dest_dir_full_path = realpath(tomnt, NULL);
+ if (!dest_dir_full_path) {
+ ret = -errno;
+ error("realpath(%s) failed: %s", tomnt, strerror(-ret));
+ goto out;
+ }
+ r->dest_dir_fd = open(dest_dir_full_path, O_RDONLY | O_NOATIME);
+ if (r->dest_dir_fd < 0) {
+ ret = -errno;
+ error("cannot open destination directory %s: %s",
+ dest_dir_full_path, strerror(-ret));
+ goto out;
+ }
+
+ if (realmnt[0]) {
+ r->root_path = realmnt;
+ } else {
+ ret = find_mount_root(dest_dir_full_path, &r->root_path);
+ if (ret < 0) {
+ error("failed to determine mount point for %s: %s",
+ dest_dir_full_path, strerror(-ret));
+ ret = -EINVAL;
+ goto out;
+ }
+ if (ret > 0) {
+ error("%s doesn't belong to btrfs mount point",
+ dest_dir_full_path);
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+ r->mnt_fd = open(r->root_path, O_RDONLY | O_NOATIME);
+ if (r->mnt_fd < 0) {
+ ret = -errno;
+ error("cannot open %s: %s", r->root_path, strerror(-ret));
+ goto out;
+ }
+
+ /*
+ * If we use -m or a default subvol we want to resolve the path to the
+ * subvolume we're sitting in so that we can adjust the paths of any
+ * subvols we want to receive in.
+ */
+ ret = btrfs_list_get_path_rootid(r->mnt_fd, &subvol_id);
+ if (ret) {
+ error("cannot resolve our subvolid: %d",
+ ret);
+ goto out;
+ }
+
+ root_subvol_path[0] = 0;
+ ret = btrfs_subvolid_resolve(r->mnt_fd, root_subvol_path,
+ PATH_MAX, subvol_id);
+ if (ret) {
+ error("cannot resolve our subvol path");
+ goto out;
+ }
+
+ /*
+ * Ok we're inside of a subvol off of the root subvol, we need to
+ * actually set full_root_path.
+ */
+ if (*root_subvol_path)
+ r->full_root_path = root_subvol_path;
+
+ if (r->dest_dir_chroot) {
+ if (chroot(dest_dir_full_path)) {
+ ret = -errno;
+ error("failed to chroot to %s: %s",
+ dest_dir_full_path, strerror(-ret));
+ goto out;
+ }
+ if (chdir("/")) {
+ ret = -errno;
+ error("failed to chdir to / after chroot: %s",
+ strerror(-ret));
+ goto out;
+ }
+ fprintf(stderr, "Chroot to %s\n", dest_dir_full_path);
+ r->root_path = strdup("/");
+ r->dest_dir_path = r->root_path;
+ } else {
+ /*
+ * find_mount_root returns a root_path that is a subpath of
+ * dest_dir_full_path. Now get the other part of root_path,
+ * which is the destination dir relative to root_path.
+ */
+ r->dest_dir_path = dest_dir_full_path + strlen(r->root_path);
+ while (r->dest_dir_path[0] == '/')
+ r->dest_dir_path++;
+ }
+
+ ret = subvol_uuid_search_init(r->mnt_fd, &r->sus);
+ if (ret < 0)
+ goto out;
+
+ while (!end) {
+ if (r->cached_capabilities_len) {
+ if (g_verbose >= 3)
+ fprintf(stderr, "clear cached capabilities\n");
+ memset(r->cached_capabilities, 0,
+ sizeof(r->cached_capabilities));
+ r->cached_capabilities_len = 0;
+ }
+
+ ret = btrfs_read_and_process_send_stream(r_fd, &send_ops, r,
+ r->honor_end_cmd,
+ max_errors);
+ if (ret < 0)
+ goto out;
+ if (ret)
+ end = 1;
+
+ close_inode_for_write(r);
+ ret = finish_subvol(r);
+ if (ret < 0)
+ goto out;
+ }
+ ret = 0;
+
+out:
+ if (r->write_fd != -1) {
+ close(r->write_fd);
+ r->write_fd = -1;
+ }
+
+ if (r->root_path != realmnt)
+ free(r->root_path);
+ r->root_path = NULL;
+ r->dest_dir_path = NULL;
+ free(dest_dir_full_path);
+ subvol_uuid_search_finit(&r->sus);
+ if (r->mnt_fd != -1) {
+ close(r->mnt_fd);
+ r->mnt_fd = -1;
+ }
+ if (r->dest_dir_fd != -1) {
+ close(r->dest_dir_fd);
+ r->dest_dir_fd = -1;
+ }
+
+ return ret;
+}
+
+int cmd_receive(int argc, char **argv)
+{
+ char *tomnt = NULL;
+ char fromfile[PATH_MAX];
+ char realmnt[PATH_MAX];
+ struct btrfs_receive r;
+ int receive_fd = fileno(stdin);
+ u64 max_errors = 1;
+ int ret = 0;
+
+ memset(&r, 0, sizeof(r));
+ r.mnt_fd = -1;
+ r.write_fd = -1;
+ r.dest_dir_fd = -1;
+ r.dest_dir_chroot = 0;
+ realmnt[0] = 0;
+ fromfile[0] = 0;
+
+ while (1) {
+ int c;
+ static const struct option long_opts[] = {
+ { "max-errors", required_argument, NULL, 'E' },
+ { "chroot", no_argument, NULL, 'C' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ c = getopt_long(argc, argv, "Cevf:m:", long_opts, NULL);
+ if (c < 0)
+ break;
+
+ switch (c) {
+ case 'v':
+ g_verbose++;
+ break;
+ case 'f':
+ if (arg_copy_path(fromfile, optarg, sizeof(fromfile))) {
+ error("input file path too long (%zu)",
+ strlen(optarg));
+ ret = 1;
+ goto out;
+ }
+ break;
+ case 'e':
+ r.honor_end_cmd = 1;
+ break;
+ case 'C':
+ r.dest_dir_chroot = 1;
+ break;
+ case 'E':
+ max_errors = arg_strtou64(optarg);
+ break;
+ case 'm':
+ if (arg_copy_path(realmnt, optarg, sizeof(realmnt))) {
+ error("mount point path too long (%zu)",
+ strlen(optarg));
+ ret = 1;
+ goto out;
+ }
+ break;
+ case '?':
+ default:
+ error("receive args invalid");
+ return 1;
+ }
+ }
+
+ if (check_argc_exact(argc - optind, 1))
+ usage(cmd_receive_usage);
+
+ tomnt = argv[optind];
+
+ if (fromfile[0]) {
+ receive_fd = open(fromfile, O_RDONLY | O_NOATIME);
+ if (receive_fd < 0) {
+ error("cannot open %s: %s", fromfile, strerror(errno));
+ goto out;
+ }
+ }
+
+ ret = do_receive(&r, tomnt, realmnt, receive_fd, max_errors);
+ if (receive_fd != fileno(stdin))
+ close(receive_fd);
+
+out:
+
+ return !!ret;
+}
+
+const char * const cmd_receive_usage[] = {
+ "btrfs receive [-ve] [-f <infile>] [--max-errors <N>] <mount>",
+ "Receive subvolumes from stdin.",
+ "Receives one or more subvolumes that were previously",
+ "sent with btrfs send. The received subvolumes are stored",
+ "into <mount>.",
+ "btrfs receive will fail in case a receiving subvolume",
+ "already exists. It will also fail in case a previously",
+ "received subvolume was changed after it was received.",
+ "After receiving a subvolume, it is immediately set to",
+ "read only.\n",
+ "-v Enable verbose debug output. Each",
+ " occurrence of this option increases the",
+ " verbose level more.",
+ "-f <infile> By default, btrfs receive uses stdin",
+ " to receive the subvolumes. Use this",
+ " option to specify a file to use instead.",
+ "-e Terminate after receiving an <end cmd>",
+ " in the data stream. Without this option,",
+ " the receiver terminates only if an error",
+ " is recognized or on EOF.",
+ "-C|--chroot confine the process to <mount> using chroot",
+ "--max-errors <N> Terminate as soon as N errors happened while",
+ " processing commands from the send stream.",
+ " Default value is 1. A value of 0 means no limit.",
+ "-m <mountpoint> The root mount point of the destination fs.",
+ " If you do not have /proc use this to tell us where ",
+ " this file system is mounted.",
+ NULL
+};
diff --git a/cmds-replace.c b/cmds-replace.c
new file mode 100644
index 00000000..6036e2f9
--- /dev/null
+++ b/cmds-replace.c
@@ -0,0 +1,561 @@
+/*
+ * Copyright (C) 2012 STRATO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <assert.h>
+#include <inttypes.h>
+#include <sys/wait.h>
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "ioctl.h"
+#include "utils.h"
+#include "volumes.h"
+#include "disk-io.h"
+
+#include "commands.h"
+
+
+static int print_replace_status(int fd, const char *path, int once);
+static char *time2string(char *buf, size_t s, __u64 t);
+static char *progress2string(char *buf, size_t s, int progress_1000);
+
+
+static const char *replace_dev_result2string(__u64 result)
+{
+ switch (result) {
+ case BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR:
+ return "no error";
+ case BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED:
+ return "not started";
+ case BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED:
+ return "already started";
+ case BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS:
+ return "scrub is in progress";
+ default:
+ return "<illegal result value>";
+ }
+}
+
+static const char * const replace_cmd_group_usage[] = {
+ "btrfs replace <command> [<args>]",
+ NULL
+};
+
+static int dev_replace_cancel_fd = -1;
+static void dev_replace_sigint_handler(int signal)
+{
+ int ret;
+ struct btrfs_ioctl_dev_replace_args args = {0};
+
+ args.cmd = BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL;
+ ret = ioctl(dev_replace_cancel_fd, BTRFS_IOC_DEV_REPLACE, &args);
+ if (ret < 0)
+ perror("Device replace cancel failed");
+}
+
+static int dev_replace_handle_sigint(int fd)
+{
+ struct sigaction sa = {
+ .sa_handler = fd == -1 ? SIG_DFL : dev_replace_sigint_handler
+ };
+
+ dev_replace_cancel_fd = fd;
+ return sigaction(SIGINT, &sa, NULL);
+}
+
+static const char *const cmd_replace_start_usage[] = {
+ "btrfs replace start [-Bfr] <srcdev>|<devid> <targetdev> <mount_point>",
+ "Replace device of a btrfs filesystem.",
+ "On a live filesystem, duplicate the data to the target device which",
+ "is currently stored on the source device. If the source device is not",
+ "available anymore, or if the -r option is set, the data is built",
+ "only using the RAID redundancy mechanisms. After completion of the",
+ "operation, the source device is removed from the filesystem.",
+ "If the <srcdev> is a numerical value, it is assumed to be the device id",
+ "of the filesystem which is mounted at <mount_point>, otherwise it is",
+ "the path to the source device. If the source device is disconnected,",
+ "from the system, you have to use the <devid> parameter format.",
+ "The <targetdev> needs to be same size or larger than the <srcdev>.",
+ "",
+ "-r only read from <srcdev> if no other zero-defect mirror exists",
+ " (enable this if your drive has lots of read errors, the access",
+ " would be very slow)",
+ "-f force using and overwriting <targetdev> even if it looks like",
+ " containing a valid btrfs filesystem. A valid filesystem is",
+ " assumed if a btrfs superblock is found which contains a",
+ " correct checksum. Devices which are currently mounted are",
+ " never allowed to be used as the <targetdev>",
+ "-B do not background",
+ NULL
+};
+
+static int cmd_replace_start(int argc, char **argv)
+{
+ struct btrfs_ioctl_dev_replace_args start_args = {0};
+ struct btrfs_ioctl_dev_replace_args status_args = {0};
+ int ret;
+ int i;
+ int c;
+ int fdmnt = -1;
+ int fddstdev = -1;
+ char *path;
+ char *srcdev;
+ char *dstdev = NULL;
+ int avoid_reading_from_srcdev = 0;
+ int force_using_targetdev = 0;
+ u64 dstdev_block_count;
+ int do_not_background = 0;
+ DIR *dirstream = NULL;
+ u64 srcdev_size;
+ u64 dstdev_size;
+
+ while ((c = getopt(argc, argv, "Brf")) != -1) {
+ switch (c) {
+ case 'B':
+ do_not_background = 1;
+ break;
+ case 'r':
+ avoid_reading_from_srcdev = 1;
+ break;
+ case 'f':
+ force_using_targetdev = 1;
+ break;
+ case '?':
+ default:
+ usage(cmd_replace_start_usage);
+ }
+ }
+
+ start_args.start.cont_reading_from_srcdev_mode =
+ avoid_reading_from_srcdev ?
+ BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID :
+ BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS;
+ if (check_argc_exact(argc - optind, 3))
+ usage(cmd_replace_start_usage);
+ path = argv[optind + 2];
+
+ fdmnt = open_path_or_dev_mnt(path, &dirstream, 1);
+ if (fdmnt < 0)
+ goto leave_with_error;
+
+ /* check for possible errors before backgrounding */
+ status_args.cmd = BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS;
+ status_args.result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_RESULT;
+ ret = ioctl(fdmnt, BTRFS_IOC_DEV_REPLACE, &status_args);
+ if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: ioctl(DEV_REPLACE_STATUS) failed on \"%s\": %s",
+ path, strerror(errno));
+ if (status_args.result != BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_RESULT)
+ fprintf(stderr, ", %s\n",
+ replace_dev_result2string(status_args.result));
+ else
+ fprintf(stderr, "\n");
+ goto leave_with_error;
+ }
+
+ if (status_args.result != BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR) {
+ error("ioctl(DEV_REPLACE_STATUS) on '%s' returns error: %s",
+ path, replace_dev_result2string(status_args.result));
+ goto leave_with_error;
+ }
+
+ if (status_args.status.replace_state ==
+ BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) {
+ error("device replace on '%s' already started", path);
+ goto leave_with_error;
+ }
+
+ srcdev = argv[optind];
+ dstdev = canonicalize_path(argv[optind + 1]);
+ if (!dstdev) {
+ error("cannot canonicalize path '%s': %s",
+ argv[optind + 1], strerror(errno));
+ goto leave_with_error;
+ }
+
+ if (string_is_numerical(srcdev)) {
+ struct btrfs_ioctl_fs_info_args fi_args;
+ struct btrfs_ioctl_dev_info_args *di_args = NULL;
+
+ start_args.start.srcdevid = arg_strtou64(srcdev);
+
+ ret = get_fs_info(path, &fi_args, &di_args);
+ if (ret) {
+ error("failed to get device info: %s", strerror(-ret));
+ free(di_args);
+ goto leave_with_error;
+ }
+ if (!fi_args.num_devices) {
+ error("no devices found");
+ free(di_args);
+ goto leave_with_error;
+ }
+
+ for (i = 0; i < fi_args.num_devices; i++)
+ if (start_args.start.srcdevid == di_args[i].devid)
+ break;
+ srcdev_size = di_args[i].total_bytes;
+ free(di_args);
+ if (i == fi_args.num_devices) {
+ error("'%s' is not a valid devid for filesystem '%s'",
+ srcdev, path);
+ goto leave_with_error;
+ }
+ } else if (is_block_device(srcdev) > 0) {
+ strncpy((char *)start_args.start.srcdev_name, srcdev,
+ BTRFS_DEVICE_PATH_NAME_MAX);
+ start_args.start.srcdevid = 0;
+ srcdev_size = get_partition_size(srcdev);
+ } else {
+ error("source device must be a block device or a devid");
+ goto leave_with_error;
+ }
+
+ ret = test_dev_for_mkfs(dstdev, force_using_targetdev);
+ if (ret)
+ goto leave_with_error;
+
+ dstdev_size = get_partition_size(dstdev);
+ if (srcdev_size > dstdev_size) {
+ error("target device smaller than source device (required %llu bytes)",
+ srcdev_size);
+ goto leave_with_error;
+ }
+
+ fddstdev = open(dstdev, O_RDWR);
+ if (fddstdev < 0) {
+ error("unable to open %s: %s", dstdev, strerror(errno));
+ goto leave_with_error;
+ }
+ strncpy((char *)start_args.start.tgtdev_name, dstdev,
+ BTRFS_DEVICE_PATH_NAME_MAX);
+ ret = btrfs_prepare_device(fddstdev, dstdev, 1, &dstdev_block_count, 0,
+ 0);
+ if (ret)
+ goto leave_with_error;
+
+ close(fddstdev);
+ fddstdev = -1;
+ free(dstdev);
+ dstdev = NULL;
+
+ dev_replace_handle_sigint(fdmnt);
+ if (!do_not_background) {
+ if (daemon(0, 0) < 0) {
+ error("backgrounding failed: %s", strerror(errno));
+ goto leave_with_error;
+ }
+ }
+
+ start_args.cmd = BTRFS_IOCTL_DEV_REPLACE_CMD_START;
+ start_args.result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_RESULT;
+ ret = ioctl(fdmnt, BTRFS_IOC_DEV_REPLACE, &start_args);
+ if (do_not_background) {
+ if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: ioctl(DEV_REPLACE_START) failed on \"%s\": %s",
+ path, strerror(errno));
+ if (start_args.result != BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_RESULT)
+ fprintf(stderr, ", %s\n",
+ replace_dev_result2string(start_args.result));
+ else
+ fprintf(stderr, "\n");
+
+ if (errno == EOPNOTSUPP)
+ warning("device replace of RAID5/6 not supported with this kernel");
+
+ goto leave_with_error;
+ }
+
+ if (start_args.result !=
+ BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR) {
+ error("ioctl(DEV_REPLACE_START) on '%s' returns error: %s",
+ path,
+ replace_dev_result2string(start_args.result));
+ goto leave_with_error;
+ }
+ }
+ close_file_or_dir(fdmnt, dirstream);
+ return 0;
+
+leave_with_error:
+ if (dstdev)
+ free(dstdev);
+ if (fdmnt != -1)
+ close(fdmnt);
+ if (fddstdev != -1)
+ close(fddstdev);
+ return 1;
+}
+
+static const char *const cmd_replace_status_usage[] = {
+ "btrfs replace status [-1] <mount_point>",
+ "Print status and progress information of a running device replace",
+ "operation",
+ "",
+ "-1 print once instead of print continuously until the replace",
+ " operation finishes (or is canceled)",
+ NULL
+};
+
+static int cmd_replace_status(int argc, char **argv)
+{
+ int fd;
+ int c;
+ char *path;
+ int once = 0;
+ int ret;
+ DIR *dirstream = NULL;
+
+ while ((c = getopt(argc, argv, "1")) != -1) {
+ switch (c) {
+ case '1':
+ once = 1;
+ break;
+ case '?':
+ default:
+ usage(cmd_replace_status_usage);
+ }
+ }
+
+ if (check_argc_exact(argc - optind, 1))
+ usage(cmd_replace_status_usage);
+
+ path = argv[optind];
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ ret = print_replace_status(fd, path, once);
+ close_file_or_dir(fd, dirstream);
+ return !!ret;
+}
+
+static int print_replace_status(int fd, const char *path, int once)
+{
+ struct btrfs_ioctl_dev_replace_args args = {0};
+ struct btrfs_ioctl_dev_replace_status_params *status;
+ int ret;
+ int prevent_loop = 0;
+ int skip_stats;
+ int num_chars;
+ char string1[80];
+ char string2[80];
+ char string3[80];
+
+ for (;;) {
+ args.cmd = BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS;
+ args.result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_RESULT;
+ ret = ioctl(fd, BTRFS_IOC_DEV_REPLACE, &args);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: ioctl(DEV_REPLACE_STATUS) failed on \"%s\": %s",
+ path, strerror(errno));
+ if (args.result != BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_RESULT)
+ fprintf(stderr, ", %s\n",
+ replace_dev_result2string(args.result));
+ else
+ fprintf(stderr, "\n");
+ return ret;
+ }
+
+ if (args.result != BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR) {
+ error("ioctl(DEV_REPLACE_STATUS) on '%s' returns error: %s",
+ path,
+ replace_dev_result2string(args.result));
+ return -1;
+ }
+
+ status = &args.status;
+
+ skip_stats = 0;
+ num_chars = 0;
+ switch (status->replace_state) {
+ case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
+ num_chars =
+ printf("%s done",
+ progress2string(string3,
+ sizeof(string3),
+ status->progress_1000));
+ break;
+ case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
+ prevent_loop = 1;
+ printf("Started on %s, finished on %s",
+ time2string(string1, sizeof(string1),
+ status->time_started),
+ time2string(string2, sizeof(string2),
+ status->time_stopped));
+ break;
+ case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
+ prevent_loop = 1;
+ printf("Started on %s, canceled on %s at %s",
+ time2string(string1, sizeof(string1),
+ status->time_started),
+ time2string(string2, sizeof(string2),
+ status->time_stopped),
+ progress2string(string3, sizeof(string3),
+ status->progress_1000));
+ break;
+ case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
+ prevent_loop = 1;
+ printf("Started on %s, suspended on %s at %s",
+ time2string(string1, sizeof(string1),
+ status->time_started),
+ time2string(string2, sizeof(string2),
+ status->time_stopped),
+ progress2string(string3, sizeof(string3),
+ status->progress_1000));
+ break;
+ case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
+ prevent_loop = 1;
+ skip_stats = 1;
+ printf("Never started");
+ break;
+ default:
+ error("unknown status from ioctl DEV_REPLACE_STATUS on '%s': %llu\n",
+ path, status->replace_state);
+ return -EINVAL;
+ }
+
+ if (!skip_stats)
+ num_chars += printf(
+ ", %llu write errs, %llu uncorr. read errs",
+ (unsigned long long)status->num_write_errors,
+ (unsigned long long)
+ status->num_uncorrectable_read_errors);
+ if (once || prevent_loop) {
+ printf("\n");
+ break;
+ }
+
+ fflush(stdout);
+ sleep(1);
+ while (num_chars > 0) {
+ putchar('\b');
+ num_chars--;
+ }
+ }
+
+ return 0;
+}
+
+static char *
+time2string(char *buf, size_t s, __u64 t)
+{
+ struct tm t_tm;
+ time_t t_time_t;
+
+ t_time_t = (time_t)t;
+ assert((__u64)t_time_t == t);
+ localtime_r(&t_time_t, &t_tm);
+ strftime(buf, s, "%e.%b %T", &t_tm);
+ return buf;
+}
+
+static char *
+progress2string(char *buf, size_t s, int progress_1000)
+{
+ snprintf(buf, s, "%d.%01d%%", progress_1000 / 10, progress_1000 % 10);
+ assert(s > 0);
+ buf[s - 1] = '\0';
+ return buf;
+}
+
+static const char *const cmd_replace_cancel_usage[] = {
+ "btrfs replace cancel <mount_point>",
+ "Cancel a running device replace operation.",
+ NULL
+};
+
+static int cmd_replace_cancel(int argc, char **argv)
+{
+ struct btrfs_ioctl_dev_replace_args args = {0};
+ int ret;
+ int c;
+ int fd;
+ int e;
+ char *path;
+ DIR *dirstream = NULL;
+
+ while ((c = getopt(argc, argv, "")) != -1) {
+ switch (c) {
+ case '?':
+ default:
+ usage(cmd_replace_cancel_usage);
+ }
+ }
+
+ if (check_argc_exact(argc - optind, 1))
+ usage(cmd_replace_cancel_usage);
+
+ path = argv[optind];
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ args.cmd = BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL;
+ args.result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_RESULT;
+ ret = ioctl(fd, BTRFS_IOC_DEV_REPLACE, &args);
+ e = errno;
+ close_file_or_dir(fd, dirstream);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: ioctl(DEV_REPLACE_CANCEL) failed on \"%s\": %s",
+ path, strerror(e));
+ if (args.result != BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_RESULT)
+ fprintf(stderr, ", %s\n",
+ replace_dev_result2string(args.result));
+ else
+ fprintf(stderr, "\n");
+ return 1;
+ }
+ if (args.result == BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED) {
+ printf("INFO: ioctl(DEV_REPLACE_CANCEL)\"%s\": %s\n",
+ path, replace_dev_result2string(args.result));
+ return 2;
+ }
+ return 0;
+}
+
+static const char replace_cmd_group_info[] =
+"replace a device in the filesystem";
+
+const struct cmd_group replace_cmd_group = {
+ replace_cmd_group_usage, replace_cmd_group_info, {
+ { "start", cmd_replace_start, cmd_replace_start_usage, NULL,
+ 0 },
+ { "status", cmd_replace_status, cmd_replace_status_usage, NULL,
+ 0 },
+ { "cancel", cmd_replace_cancel, cmd_replace_cancel_usage, NULL,
+ 0 },
+ NULL_CMD_STRUCT
+ }
+};
+
+int cmd_replace(int argc, char **argv)
+{
+ return handle_command_group(&replace_cmd_group, argc, argv);
+}
diff --git a/cmds-rescue.c b/cmds-rescue.c
new file mode 100644
index 00000000..98954254
--- /dev/null
+++ b/cmds-rescue.c
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2013 SUSE. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "kerncompat.h"
+
+#include <getopt.h>
+#include "ctree.h"
+#include "transaction.h"
+#include "disk-io.h"
+#include "commands.h"
+#include "utils.h"
+
+static const char * const rescue_cmd_group_usage[] = {
+ "btrfs rescue <command> [options] <path>",
+ NULL
+};
+
+int btrfs_recover_chunk_tree(char *path, int verbose, int yes);
+int btrfs_recover_superblocks(char *path, int verbose, int yes);
+
+static const char * const cmd_rescue_chunk_recover_usage[] = {
+ "btrfs rescue chunk-recover [options] <device>",
+ "Recover the chunk tree by scanning the devices one by one.",
+ "",
+ "-y Assume an answer of `yes' to all questions",
+ "-v Verbose mode",
+ "-h Help",
+ NULL
+};
+
+static int cmd_rescue_chunk_recover(int argc, char *argv[])
+{
+ int ret = 0;
+ char *file;
+ int yes = 0;
+ int verbose = 0;
+
+ while (1) {
+ int c = getopt(argc, argv, "yvh");
+ if (c < 0)
+ break;
+ switch (c) {
+ case 'y':
+ yes = 1;
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ case 'h':
+ default:
+ usage(cmd_rescue_chunk_recover_usage);
+ }
+ }
+
+ argc = argc - optind;
+ if (check_argc_exact(argc, 1))
+ usage(cmd_rescue_chunk_recover_usage);
+
+ file = argv[optind];
+
+ ret = check_mounted(file);
+ if (ret < 0) {
+ error("could not check mount status: %s", strerror(-ret));
+ return 1;
+ } else if (ret) {
+ error("the device is busy");
+ return 1;
+ }
+
+ ret = btrfs_recover_chunk_tree(file, verbose, yes);
+ if (!ret) {
+ fprintf(stdout, "Chunk tree recovered successfully\n");
+ } else if (ret > 0) {
+ ret = 0;
+ fprintf(stdout, "Chunk tree recovery aborted\n");
+ } else {
+ fprintf(stdout, "Chunk tree recovery failed\n");
+ }
+ return ret;
+}
+
+static const char * const cmd_rescue_super_recover_usage[] = {
+ "btrfs rescue super-recover [options] <device>",
+ "Recover bad superblocks from good copies",
+ "",
+ "-y Assume an answer of `yes' to all questions",
+ "-v Verbose mode",
+ NULL
+};
+
+/*
+ * return codes:
+ * 0 : All superblocks are valid, no need to recover
+ * 1 : Usage or syntax error
+ * 2 : Recover all bad superblocks successfully
+ * 3 : Fail to Recover bad supeblocks
+ * 4 : Abort to recover bad superblocks
+ */
+static int cmd_rescue_super_recover(int argc, char **argv)
+{
+ int ret;
+ int verbose = 0;
+ int yes = 0;
+ char *dname;
+
+ while (1) {
+ int c = getopt(argc, argv, "vy");
+ if (c < 0)
+ break;
+ switch (c) {
+ case 'v':
+ verbose = 1;
+ break;
+ case 'y':
+ yes = 1;
+ break;
+ default:
+ usage(cmd_rescue_super_recover_usage);
+ }
+ }
+ argc = argc - optind;
+ if (check_argc_exact(argc, 1))
+ usage(cmd_rescue_super_recover_usage);
+
+ dname = argv[optind];
+ ret = check_mounted(dname);
+ if (ret < 0) {
+ error("could not check mount status: %s", strerror(-ret));
+ return 1;
+ } else if (ret) {
+ error("the device is busy");
+ return 1;
+ }
+ ret = btrfs_recover_superblocks(dname, verbose, yes);
+ return ret;
+}
+
+static const char * const cmd_rescue_zero_log_usage[] = {
+ "btrfs rescue zero-log <device>",
+ "Clear the tree log. Usable if it's corrupted and prevents mount.",
+ "",
+ NULL
+};
+
+static int cmd_rescue_zero_log(int argc, char **argv)
+{
+ struct btrfs_root *root;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_super_block *sb;
+ char *devname;
+ int ret;
+
+ if (check_argc_exact(argc, 2))
+ usage(cmd_rescue_zero_log_usage);
+
+ devname = argv[optind];
+ ret = check_mounted(devname);
+ if (ret < 0) {
+ error("could not check mount status: %s", strerror(-ret));
+ goto out;
+ } else if (ret) {
+ error("%s is currently mounted", devname);
+ ret = -EBUSY;
+ }
+
+ root = open_ctree(devname, 0, OPEN_CTREE_WRITES | OPEN_CTREE_PARTIAL);
+ if (!root) {
+ error("could not open ctree");
+ return 1;
+ }
+
+ sb = root->fs_info->super_copy;
+ printf("Clearing log on %s, previous log_root %llu, level %u\n",
+ devname,
+ (unsigned long long)btrfs_super_log_root(sb),
+ (unsigned)btrfs_super_log_root_level(sb));
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_super_log_root(sb, 0);
+ btrfs_set_super_log_root_level(sb, 0);
+ btrfs_commit_transaction(trans, root);
+ close_ctree(root);
+
+out:
+ return !!ret;
+}
+
+static const char rescue_cmd_group_info[] =
+"toolbox for specific rescue operations";
+
+const struct cmd_group rescue_cmd_group = {
+ rescue_cmd_group_usage, rescue_cmd_group_info, {
+ { "chunk-recover", cmd_rescue_chunk_recover,
+ cmd_rescue_chunk_recover_usage, NULL, 0},
+ { "super-recover", cmd_rescue_super_recover,
+ cmd_rescue_super_recover_usage, NULL, 0},
+ { "zero-log", cmd_rescue_zero_log, cmd_rescue_zero_log_usage, NULL, 0},
+ NULL_CMD_STRUCT
+ }
+};
+
+int cmd_rescue(int argc, char **argv)
+{
+ return handle_command_group(&rescue_cmd_group, argc, argv);
+}
diff --git a/cmds-restore.c b/cmds-restore.c
new file mode 100644
index 00000000..dd0b2427
--- /dev/null
+++ b/cmds-restore.c
@@ -0,0 +1,1612 @@
+/*
+ * Copyright (C) 2011 Red Hat. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+
+#include "kerncompat.h"
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <lzo/lzoconf.h>
+#include <lzo/lzo1x.h>
+#include <zlib.h>
+#include <regex.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "volumes.h"
+#include "utils.h"
+#include "commands.h"
+
+static char fs_name[PATH_MAX];
+static char path_name[PATH_MAX];
+static char symlink_target[PATH_MAX];
+static int get_snaps = 0;
+static int verbose = 0;
+static int restore_metadata = 0;
+static int restore_symlinks = 0;
+static int ignore_errors = 0;
+static int overwrite = 0;
+static int get_xattrs = 0;
+static int dry_run = 0;
+
+#define LZO_LEN 4
+#define PAGE_CACHE_SIZE 4096
+#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3)
+
+static int decompress_zlib(char *inbuf, char *outbuf, u64 compress_len,
+ u64 decompress_len)
+{
+ z_stream strm;
+ int ret;
+
+ memset(&strm, 0, sizeof(strm));
+ ret = inflateInit(&strm);
+ if (ret != Z_OK) {
+ fprintf(stderr, "inflate init returnd %d\n", ret);
+ return -1;
+ }
+
+ strm.avail_in = compress_len;
+ strm.next_in = (unsigned char *)inbuf;
+ strm.avail_out = decompress_len;
+ strm.next_out = (unsigned char *)outbuf;
+ ret = inflate(&strm, Z_NO_FLUSH);
+ if (ret != Z_STREAM_END) {
+ (void)inflateEnd(&strm);
+ fprintf(stderr, "failed to inflate: %d\n", ret);
+ return -1;
+ }
+
+ (void)inflateEnd(&strm);
+ return 0;
+}
+static inline size_t read_compress_length(unsigned char *buf)
+{
+ __le32 dlen;
+ memcpy(&dlen, buf, LZO_LEN);
+ return le32_to_cpu(dlen);
+}
+
+static int decompress_lzo(unsigned char *inbuf, char *outbuf, u64 compress_len,
+ u64 *decompress_len)
+{
+ size_t new_len;
+ size_t in_len;
+ size_t out_len = 0;
+ size_t tot_len;
+ size_t tot_in;
+ int ret;
+
+ ret = lzo_init();
+ if (ret != LZO_E_OK) {
+ fprintf(stderr, "lzo init returned %d\n", ret);
+ return -1;
+ }
+
+ tot_len = read_compress_length(inbuf);
+ inbuf += LZO_LEN;
+ tot_in = LZO_LEN;
+
+ while (tot_in < tot_len) {
+ size_t mod_page;
+ size_t rem_page;
+ in_len = read_compress_length(inbuf);
+
+ if ((tot_in + LZO_LEN + in_len) > tot_len) {
+ fprintf(stderr, "bad compress length %lu\n",
+ (unsigned long)in_len);
+ return -1;
+ }
+
+ inbuf += LZO_LEN;
+ tot_in += LZO_LEN;
+
+ new_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
+ ret = lzo1x_decompress_safe((const unsigned char *)inbuf, in_len,
+ (unsigned char *)outbuf,
+ (void *)&new_len, NULL);
+ if (ret != LZO_E_OK) {
+ fprintf(stderr, "failed to inflate: %d\n", ret);
+ return -1;
+ }
+ out_len += new_len;
+ outbuf += new_len;
+ inbuf += in_len;
+ tot_in += in_len;
+
+ /*
+ * If the 4 byte header does not fit to the rest of the page we
+ * have to move to the next one, unless we read some garbage
+ */
+ mod_page = tot_in % PAGE_CACHE_SIZE;
+ rem_page = PAGE_CACHE_SIZE - mod_page;
+ if (rem_page < LZO_LEN) {
+ inbuf += rem_page;
+ tot_in += rem_page;
+ }
+ }
+
+ *decompress_len = out_len;
+
+ return 0;
+}
+
+static int decompress(char *inbuf, char *outbuf, u64 compress_len,
+ u64 *decompress_len, int compress)
+{
+ switch (compress) {
+ case BTRFS_COMPRESS_ZLIB:
+ return decompress_zlib(inbuf, outbuf, compress_len,
+ *decompress_len);
+ case BTRFS_COMPRESS_LZO:
+ return decompress_lzo((unsigned char *)inbuf, outbuf, compress_len,
+ decompress_len);
+ default:
+ break;
+ }
+
+ fprintf(stderr, "invalid compression type: %d\n", compress);
+ return -1;
+}
+
+static int next_leaf(struct btrfs_root *root, struct btrfs_path *path)
+{
+ int slot;
+ int level = 1;
+ int offset = 1;
+ struct extent_buffer *c;
+ struct extent_buffer *next = NULL;
+
+again:
+ for (; level < BTRFS_MAX_LEVEL; level++) {
+ if (path->nodes[level])
+ break;
+ }
+
+ if (level >= BTRFS_MAX_LEVEL)
+ return 1;
+
+ slot = path->slots[level] + 1;
+
+ while(level < BTRFS_MAX_LEVEL) {
+ if (!path->nodes[level])
+ return 1;
+
+ slot = path->slots[level] + offset;
+ c = path->nodes[level];
+ if (slot >= btrfs_header_nritems(c)) {
+ level++;
+ if (level == BTRFS_MAX_LEVEL)
+ return 1;
+ offset = 1;
+ continue;
+ }
+
+ if (path->reada)
+ reada_for_search(root, path, level, slot, 0);
+
+ next = read_node_slot(root, c, slot);
+ if (extent_buffer_uptodate(next))
+ break;
+ offset++;
+ }
+ path->slots[level] = slot;
+ while(1) {
+ level--;
+ c = path->nodes[level];
+ free_extent_buffer(c);
+ path->nodes[level] = next;
+ path->slots[level] = 0;
+ if (!level)
+ break;
+ if (path->reada)
+ reada_for_search(root, path, level, 0, 0);
+ next = read_node_slot(root, next, 0);
+ if (!extent_buffer_uptodate(next))
+ goto again;
+ }
+ return 0;
+}
+
+static int copy_one_inline(int fd, struct btrfs_path *path, u64 pos)
+{
+ struct extent_buffer *leaf = path->nodes[0];
+ struct btrfs_file_extent_item *fi;
+ char buf[4096];
+ char *outbuf;
+ u64 ram_size;
+ ssize_t done;
+ unsigned long ptr;
+ int ret;
+ int len;
+ int inline_item_len;
+ int compress;
+
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ ptr = btrfs_file_extent_inline_start(fi);
+ len = btrfs_file_extent_inline_len(leaf, path->slots[0], fi);
+ inline_item_len = btrfs_file_extent_inline_item_len(leaf, btrfs_item_nr(path->slots[0]));
+ read_extent_buffer(leaf, buf, ptr, inline_item_len);
+
+ compress = btrfs_file_extent_compression(leaf, fi);
+ if (compress == BTRFS_COMPRESS_NONE) {
+ done = pwrite(fd, buf, len, pos);
+ if (done < len) {
+ fprintf(stderr, "Short inline write, wanted %d, did "
+ "%zd: %d\n", len, done, errno);
+ return -1;
+ }
+ return 0;
+ }
+
+ ram_size = btrfs_file_extent_ram_bytes(leaf, fi);
+ outbuf = calloc(1, ram_size);
+ if (!outbuf) {
+ fprintf(stderr, "No memory\n");
+ return -ENOMEM;
+ }
+
+ ret = decompress(buf, outbuf, len, &ram_size, compress);
+ if (ret) {
+ free(outbuf);
+ return ret;
+ }
+
+ done = pwrite(fd, outbuf, ram_size, pos);
+ free(outbuf);
+ if (done < ram_size) {
+ fprintf(stderr, "Short compressed inline write, wanted %Lu, "
+ "did %zd: %d\n", ram_size, done, errno);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int copy_one_extent(struct btrfs_root *root, int fd,
+ struct extent_buffer *leaf,
+ struct btrfs_file_extent_item *fi, u64 pos)
+{
+ struct btrfs_multi_bio *multi = NULL;
+ struct btrfs_device *device;
+ char *inbuf, *outbuf = NULL;
+ ssize_t done, total = 0;
+ u64 bytenr;
+ u64 ram_size;
+ u64 disk_size;
+ u64 num_bytes;
+ u64 length;
+ u64 size_left;
+ u64 dev_bytenr;
+ u64 offset;
+ u64 count = 0;
+ int compress;
+ int ret;
+ int dev_fd;
+ int mirror_num = 1;
+ int num_copies;
+
+ compress = btrfs_file_extent_compression(leaf, fi);
+ bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ disk_size = btrfs_file_extent_disk_num_bytes(leaf, fi);
+ ram_size = btrfs_file_extent_ram_bytes(leaf, fi);
+ offset = btrfs_file_extent_offset(leaf, fi);
+ num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
+ size_left = disk_size;
+ if (compress == BTRFS_COMPRESS_NONE)
+ bytenr += offset;
+
+ if (verbose && offset)
+ printf("offset is %Lu\n", offset);
+ /* we found a hole */
+ if (disk_size == 0)
+ return 0;
+
+ inbuf = malloc(size_left);
+ if (!inbuf) {
+ fprintf(stderr, "No memory\n");
+ return -ENOMEM;
+ }
+
+ if (compress != BTRFS_COMPRESS_NONE) {
+ outbuf = calloc(1, ram_size);
+ if (!outbuf) {
+ fprintf(stderr, "No memory\n");
+ free(inbuf);
+ return -ENOMEM;
+ }
+ }
+again:
+ length = size_left;
+ ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
+ bytenr, &length, &multi, mirror_num, NULL);
+ if (ret) {
+ fprintf(stderr, "Error mapping block %d\n", ret);
+ goto out;
+ }
+ device = multi->stripes[0].dev;
+ dev_fd = device->fd;
+ device->total_ios++;
+ dev_bytenr = multi->stripes[0].physical;
+ kfree(multi);
+
+ if (size_left < length)
+ length = size_left;
+
+ done = pread(dev_fd, inbuf+count, length, dev_bytenr);
+ /* Need both checks, or we miss negative values due to u64 conversion */
+ if (done < 0 || done < length) {
+ num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
+ bytenr, length);
+ mirror_num++;
+ /* mirror_num is 1-indexed, so num_copies is a valid mirror. */
+ if (mirror_num > num_copies) {
+ ret = -1;
+ fprintf(stderr, "Exhausted mirrors trying to read\n");
+ goto out;
+ }
+ fprintf(stderr, "Trying another mirror\n");
+ goto again;
+ }
+
+ mirror_num = 1;
+ size_left -= length;
+ count += length;
+ bytenr += length;
+ if (size_left)
+ goto again;
+
+ if (compress == BTRFS_COMPRESS_NONE) {
+ while (total < num_bytes) {
+ done = pwrite(fd, inbuf+total, num_bytes-total,
+ pos+total);
+ if (done < 0) {
+ ret = -1;
+ fprintf(stderr, "Error writing: %d %s\n", errno, strerror(errno));
+ goto out;
+ }
+ total += done;
+ }
+ ret = 0;
+ goto out;
+ }
+
+ ret = decompress(inbuf, outbuf, disk_size, &ram_size, compress);
+ if (ret) {
+ num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
+ bytenr, length);
+ mirror_num++;
+ if (mirror_num >= num_copies) {
+ ret = -1;
+ goto out;
+ }
+ fprintf(stderr, "Trying another mirror\n");
+ goto again;
+ }
+
+ while (total < num_bytes) {
+ done = pwrite(fd, outbuf + offset + total,
+ num_bytes - total,
+ pos + total);
+ if (done < 0) {
+ ret = -1;
+ goto out;
+ }
+ total += done;
+ }
+out:
+ free(inbuf);
+ free(outbuf);
+ return ret;
+}
+
+enum loop_response {
+ LOOP_STOP,
+ LOOP_CONTINUE,
+ LOOP_DONTASK
+};
+
+static enum loop_response ask_to_continue(const char *file)
+{
+ char buf[2];
+ char *ret;
+
+ printf("We seem to be looping a lot on %s, do you want to keep going "
+ "on ? (y/N/a): ", file);
+again:
+ ret = fgets(buf, 2, stdin);
+ if (*ret == '\n' || tolower(*ret) == 'n')
+ return LOOP_STOP;
+ if (tolower(*ret) == 'a')
+ return LOOP_DONTASK;
+ if (tolower(*ret) != 'y') {
+ printf("Please enter one of 'y', 'n', or 'a': ");
+ goto again;
+ }
+
+ return LOOP_CONTINUE;
+}
+
+
+static int set_file_xattrs(struct btrfs_root *root, u64 inode,
+ int fd, const char *file_name)
+{
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_dir_item *di;
+ u32 name_len = 0;
+ u32 data_len = 0;
+ u32 len = 0;
+ u32 cur, total_len;
+ char *name = NULL;
+ char *data = NULL;
+ int ret = 0;
+
+ key.objectid = inode;
+ key.type = BTRFS_XATTR_ITEM_KEY;
+ key.offset = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ leaf = path->nodes[0];
+ while (1) {
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ do {
+ ret = next_leaf(root, path);
+ if (ret < 0) {
+ fprintf(stderr,
+ "Error searching for extended attributes: %d\n",
+ ret);
+ goto out;
+ } else if (ret) {
+ /* No more leaves to search */
+ ret = 0;
+ goto out;
+ }
+ leaf = path->nodes[0];
+ } while (!leaf);
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.type != BTRFS_XATTR_ITEM_KEY || key.objectid != inode)
+ break;
+ cur = 0;
+ total_len = btrfs_item_size_nr(leaf, path->slots[0]);
+ di = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_dir_item);
+
+ while (cur < total_len) {
+ len = btrfs_dir_name_len(leaf, di);
+ if (len > name_len) {
+ free(name);
+ name = (char *) malloc(len + 1);
+ if (!name) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ read_extent_buffer(leaf, name,
+ (unsigned long)(di + 1), len);
+ name[len] = '\0';
+ name_len = len;
+
+ len = btrfs_dir_data_len(leaf, di);
+ if (len > data_len) {
+ free(data);
+ data = (char *) malloc(len);
+ if (!data) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ read_extent_buffer(leaf, data,
+ (unsigned long)(di + 1) + name_len,
+ len);
+ data_len = len;
+
+ if (fsetxattr(fd, name, data, data_len, 0))
+ fprintf(stderr,
+ "Error setting extended attribute %s on file %s: %s\n",
+ name, file_name, strerror(errno));
+
+ len = sizeof(*di) + name_len + data_len;
+ cur += len;
+ di = (struct btrfs_dir_item *)((char *)di + len);
+ }
+ path->slots[0]++;
+ }
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ free(name);
+ free(data);
+
+ return ret;
+}
+
+static int copy_metadata(struct btrfs_root *root, int fd,
+ struct btrfs_key *key)
+{
+ struct btrfs_path *path;
+ struct btrfs_inode_item *inode_item;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ fprintf(stderr, "ERROR: Ran out of memory\n");
+ return -ENOMEM;
+ }
+
+ ret = btrfs_lookup_inode(NULL, root, path, key, 0);
+ if (ret == 0) {
+ struct btrfs_timespec *bts;
+ struct timespec times[2];
+
+ inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+
+ ret = fchown(fd, btrfs_inode_uid(path->nodes[0], inode_item),
+ btrfs_inode_gid(path->nodes[0], inode_item));
+ if (ret) {
+ fprintf(stderr, "ERROR: Failed to change owner: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = fchmod(fd, btrfs_inode_mode(path->nodes[0], inode_item));
+ if (ret) {
+ fprintf(stderr, "ERROR: Failed to change mode: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ bts = btrfs_inode_atime(inode_item);
+ times[0].tv_sec = btrfs_timespec_sec(path->nodes[0], bts);
+ times[0].tv_nsec = btrfs_timespec_nsec(path->nodes[0], bts);
+
+ bts = btrfs_inode_mtime(inode_item);
+ times[1].tv_sec = btrfs_timespec_sec(path->nodes[0], bts);
+ times[1].tv_nsec = btrfs_timespec_nsec(path->nodes[0], bts);
+
+ ret = futimens(fd, times);
+ if (ret) {
+ fprintf(stderr, "ERROR: Failed to set times: %s\n",
+ strerror(errno));
+ goto out;
+ }
+ }
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int copy_file(struct btrfs_root *root, int fd, struct btrfs_key *key,
+ const char *file)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_path *path;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_inode_item *inode_item;
+ struct btrfs_timespec *bts;
+ struct btrfs_key found_key;
+ int ret;
+ int extent_type;
+ int compression;
+ int loops = 0;
+ u64 found_size = 0;
+ struct timespec times[2];
+ int times_ok = 0;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ fprintf(stderr, "Ran out of memory\n");
+ return -ENOMEM;
+ }
+
+ ret = btrfs_lookup_inode(NULL, root, path, key, 0);
+ if (ret == 0) {
+ inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ found_size = btrfs_inode_size(path->nodes[0], inode_item);
+
+ if (restore_metadata) {
+ /*
+ * Change the ownership and mode now, set times when
+ * copyout is finished.
+ */
+
+ ret = fchown(fd, btrfs_inode_uid(path->nodes[0], inode_item),
+ btrfs_inode_gid(path->nodes[0], inode_item));
+ if (ret && !ignore_errors)
+ goto out;
+
+ ret = fchmod(fd, btrfs_inode_mode(path->nodes[0], inode_item));
+ if (ret && !ignore_errors)
+ goto out;
+
+ bts = btrfs_inode_atime(inode_item);
+ times[0].tv_sec = btrfs_timespec_sec(path->nodes[0], bts);
+ times[0].tv_nsec = btrfs_timespec_nsec(path->nodes[0], bts);
+
+ bts = btrfs_inode_mtime(inode_item);
+ times[1].tv_sec = btrfs_timespec_sec(path->nodes[0], bts);
+ times[1].tv_nsec = btrfs_timespec_nsec(path->nodes[0], bts);
+ times_ok = 1;
+ }
+ }
+ btrfs_release_path(path);
+
+ key->offset = 0;
+ key->type = BTRFS_EXTENT_DATA_KEY;
+
+ ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Error searching %d\n", ret);
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ while (!leaf) {
+ ret = next_leaf(root, path);
+ if (ret < 0) {
+ fprintf(stderr, "Error getting next leaf %d\n",
+ ret);
+ goto out;
+ } else if (ret > 0) {
+ /* No more leaves to search */
+ ret = 0;
+ goto out;
+ }
+ leaf = path->nodes[0];
+ }
+
+ while (1) {
+ if (loops >= 0 && loops++ >= 1024) {
+ enum loop_response resp;
+
+ resp = ask_to_continue(file);
+ if (resp == LOOP_STOP)
+ break;
+ else if (resp == LOOP_CONTINUE)
+ loops = 0;
+ else if (resp == LOOP_DONTASK)
+ loops = -1;
+ }
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ do {
+ ret = next_leaf(root, path);
+ if (ret < 0) {
+ fprintf(stderr, "Error searching %d\n", ret);
+ goto out;
+ } else if (ret) {
+ /* No more leaves to search */
+ btrfs_free_path(path);
+ goto set_size;
+ }
+ leaf = path->nodes[0];
+ } while (!leaf);
+ continue;
+ }
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.objectid != key->objectid)
+ break;
+ if (found_key.type != key->type)
+ break;
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ extent_type = btrfs_file_extent_type(leaf, fi);
+ compression = btrfs_file_extent_compression(leaf, fi);
+ if (compression >= BTRFS_COMPRESS_LAST) {
+ fprintf(stderr, "Don't support compression yet %d\n",
+ compression);
+ ret = -1;
+ goto out;
+ }
+
+ if (extent_type == BTRFS_FILE_EXTENT_PREALLOC)
+ goto next;
+ if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+ ret = copy_one_inline(fd, path, found_key.offset);
+ if (ret)
+ goto out;
+ } else if (extent_type == BTRFS_FILE_EXTENT_REG) {
+ ret = copy_one_extent(root, fd, leaf, fi,
+ found_key.offset);
+ if (ret)
+ goto out;
+ } else {
+ printf("Weird extent type %d\n", extent_type);
+ }
+next:
+ path->slots[0]++;
+ }
+
+ btrfs_free_path(path);
+set_size:
+ if (found_size) {
+ ret = ftruncate(fd, (loff_t)found_size);
+ if (ret)
+ return ret;
+ }
+ if (get_xattrs) {
+ ret = set_file_xattrs(root, key->objectid, fd, file);
+ if (ret)
+ return ret;
+ }
+ if (restore_metadata && times_ok) {
+ ret = futimens(fd, times);
+ if (ret)
+ return ret;
+ }
+ return 0;
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * returns:
+ * 0 if the file exists and should be skipped.
+ * 1 if the file does NOT exist
+ * 2 if the file exists but is OK to overwrite
+ */
+static int overwrite_ok(const char * path)
+{
+ static int warn = 0;
+ struct stat st;
+ int ret;
+
+ /* don't be fooled by symlinks */
+ ret = fstatat(-1, path_name, &st, AT_SYMLINK_NOFOLLOW);
+
+ if (!ret) {
+ if (overwrite)
+ return 2;
+
+ if (verbose || !warn)
+ printf("Skipping existing file"
+ " %s\n", path);
+ if (!warn)
+ printf("If you wish to overwrite use -o\n");
+ warn = 1;
+ return 0;
+ }
+ return 1;
+}
+
+static int copy_symlink(struct btrfs_root *root, struct btrfs_key *key,
+ const char *file)
+{
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_file_extent_item *extent_item;
+ struct btrfs_inode_item *inode_item;
+ u32 len;
+ u32 name_offset;
+ int ret;
+ struct btrfs_timespec *bts;
+ struct timespec times[2];
+
+ ret = overwrite_ok(path_name);
+ if (ret == 0)
+ return 0; /* skip this file */
+
+ /* symlink() can't overwrite, so unlink first */
+ if (ret == 2) {
+ ret = unlink(path_name);
+ if (ret) {
+ fprintf(stderr, "failed to unlink '%s' for overwrite\n",
+ path_name);
+ return ret;
+ }
+ }
+
+ key->type = BTRFS_EXTENT_DATA_KEY;
+ key->offset = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ leaf = path->nodes[0];
+ if (!leaf) {
+ fprintf(stderr, "Error getting leaf for symlink '%s'\n", file);
+ ret = -1;
+ goto out;
+ }
+
+ extent_item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+
+ len = btrfs_file_extent_inline_item_len(leaf,
+ btrfs_item_nr(path->slots[0]));
+ if (len >= PATH_MAX) {
+ fprintf(stderr, "Symlink '%s' target length %d is longer than PATH_MAX\n",
+ fs_name, len);
+ ret = -1;
+ goto out;
+ }
+
+ name_offset = (unsigned long) extent_item
+ + offsetof(struct btrfs_file_extent_item, disk_bytenr);
+ read_extent_buffer(leaf, symlink_target, name_offset, len);
+
+ symlink_target[len] = 0;
+
+ if (!dry_run) {
+ ret = symlink(symlink_target, path_name);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to restore symlink '%s': %s\n",
+ path_name, strerror(errno));
+ goto out;
+ }
+ }
+ printf("SYMLINK: '%s' => '%s'\n", path_name, symlink_target);
+
+ ret = 0;
+ if (!restore_metadata)
+ goto out;
+
+ /*
+ * Symlink metadata operates differently than files/directories, so do
+ * our own work here.
+ */
+ key->type = BTRFS_INODE_ITEM_KEY;
+ key->offset = 0;
+
+ btrfs_release_path(path);
+
+ ret = btrfs_lookup_inode(NULL, root, path, key, 0);
+ if (ret) {
+ fprintf(stderr, "Failed to lookup inode for '%s'\n", file);
+ goto out;
+ }
+
+ inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+
+ ret = fchownat(-1, file, btrfs_inode_uid(path->nodes[0], inode_item),
+ btrfs_inode_gid(path->nodes[0], inode_item),
+ AT_SYMLINK_NOFOLLOW);
+ if (ret) {
+ fprintf(stderr, "Failed to change owner: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ bts = btrfs_inode_atime(inode_item);
+ times[0].tv_sec = btrfs_timespec_sec(path->nodes[0], bts);
+ times[0].tv_nsec = btrfs_timespec_nsec(path->nodes[0], bts);
+
+ bts = btrfs_inode_mtime(inode_item);
+ times[1].tv_sec = btrfs_timespec_sec(path->nodes[0], bts);
+ times[1].tv_nsec = btrfs_timespec_nsec(path->nodes[0], bts);
+
+ ret = utimensat(-1, file, times, AT_SYMLINK_NOFOLLOW);
+ if (ret)
+ fprintf(stderr, "Failed to set times: %s\n", strerror(errno));
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int search_dir(struct btrfs_root *root, struct btrfs_key *key,
+ const char *output_rootdir, const char *in_dir,
+ const regex_t *mreg)
+{
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_dir_item *dir_item;
+ struct btrfs_key found_key, location;
+ char filename[BTRFS_NAME_LEN + 1];
+ unsigned long name_ptr;
+ int name_len;
+ int ret = 0;
+ int fd;
+ int loops = 0;
+ u8 type;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ fprintf(stderr, "Ran out of memory\n");
+ return -ENOMEM;
+ }
+
+ key->offset = 0;
+ key->type = BTRFS_DIR_INDEX_KEY;
+
+ ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Error searching %d\n", ret);
+ goto out;
+ }
+
+ ret = 0;
+
+ leaf = path->nodes[0];
+ while (!leaf) {
+ if (verbose > 1)
+ printf("No leaf after search, looking for the next "
+ "leaf\n");
+ ret = next_leaf(root, path);
+ if (ret < 0) {
+ fprintf(stderr, "Error getting next leaf %d\n",
+ ret);
+ goto out;
+ } else if (ret > 0) {
+ /* No more leaves to search */
+ if (verbose)
+ printf("Reached the end of the tree looking "
+ "for the directory\n");
+ ret = 0;
+ goto out;
+ }
+ leaf = path->nodes[0];
+ }
+
+ while (leaf) {
+ if (loops++ >= 1024) {
+ printf("We have looped trying to restore files in %s "
+ "too many times to be making progress, "
+ "stopping\n", in_dir);
+ break;
+ }
+
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ do {
+ ret = next_leaf(root, path);
+ if (ret < 0) {
+ fprintf(stderr, "Error searching %d\n",
+ ret);
+ goto out;
+ } else if (ret > 0) {
+ /* No more leaves to search */
+ if (verbose)
+ printf("Reached the end of "
+ "the tree searching the"
+ " directory\n");
+ ret = 0;
+ goto out;
+ }
+ leaf = path->nodes[0];
+ } while (!leaf);
+ continue;
+ }
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.objectid != key->objectid) {
+ if (verbose > 1)
+ printf("Found objectid=%Lu, key=%Lu\n",
+ found_key.objectid, key->objectid);
+ break;
+ }
+ if (found_key.type != key->type) {
+ if (verbose > 1)
+ printf("Found type=%u, want=%u\n",
+ found_key.type, key->type);
+ break;
+ }
+ dir_item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_dir_item);
+ name_ptr = (unsigned long)(dir_item + 1);
+ name_len = btrfs_dir_name_len(leaf, dir_item);
+ read_extent_buffer(leaf, filename, name_ptr, name_len);
+ filename[name_len] = '\0';
+ type = btrfs_dir_type(leaf, dir_item);
+ btrfs_dir_item_key_to_cpu(leaf, dir_item, &location);
+
+ /* full path from root of btrfs being restored */
+ snprintf(fs_name, PATH_MAX, "%s/%s", in_dir, filename);
+
+ if (mreg && REG_NOMATCH == regexec(mreg, fs_name, 0, NULL, 0))
+ goto next;
+
+ /* full path from system root */
+ snprintf(path_name, PATH_MAX, "%s%s", output_rootdir, fs_name);
+
+ /*
+ * Restore directories, files, symlinks and metadata.
+ */
+ if (type == BTRFS_FT_REG_FILE) {
+ if (!overwrite_ok(path_name))
+ goto next;
+
+ if (verbose)
+ printf("Restoring %s\n", path_name);
+ if (dry_run)
+ goto next;
+ fd = open(path_name, O_CREAT|O_WRONLY, 0644);
+ if (fd < 0) {
+ fprintf(stderr, "Error creating %s: %d\n",
+ path_name, errno);
+ if (ignore_errors)
+ goto next;
+ ret = -1;
+ goto out;
+ }
+ loops = 0;
+ ret = copy_file(root, fd, &location, path_name);
+ close(fd);
+ if (ret) {
+ fprintf(stderr, "Error copying data for %s\n",
+ path_name);
+ if (ignore_errors)
+ goto next;
+ goto out;
+ }
+ } else if (type == BTRFS_FT_DIR) {
+ struct btrfs_root *search_root = root;
+ char *dir = strdup(fs_name);
+
+ if (!dir) {
+ fprintf(stderr, "Ran out of memory\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (location.type == BTRFS_ROOT_ITEM_KEY) {
+ /*
+ * If we are a snapshot and this is the index
+ * object to ourselves just skip it.
+ */
+ if (location.objectid ==
+ root->root_key.objectid) {
+ free(dir);
+ goto next;
+ }
+
+ location.offset = (u64)-1;
+ search_root = btrfs_read_fs_root(root->fs_info,
+ &location);
+ if (IS_ERR(search_root)) {
+ free(dir);
+ fprintf(stderr, "Error reading "
+ "subvolume %s: %lu\n",
+ path_name,
+ PTR_ERR(search_root));
+ if (ignore_errors)
+ goto next;
+ ret = PTR_ERR(search_root);
+ goto out;
+ }
+
+ /*
+ * A subvolume will have a key.offset of 0, a
+ * snapshot will have key.offset of a transid.
+ */
+ if (search_root->root_key.offset != 0 &&
+ get_snaps == 0) {
+ free(dir);
+ printf("Skipping snapshot %s\n",
+ filename);
+ goto next;
+ }
+ location.objectid = BTRFS_FIRST_FREE_OBJECTID;
+ }
+
+ if (verbose)
+ printf("Restoring %s\n", path_name);
+
+ errno = 0;
+ if (dry_run)
+ ret = 0;
+ else
+ ret = mkdir(path_name, 0755);
+ if (ret && errno != EEXIST) {
+ free(dir);
+ fprintf(stderr, "Error mkdiring %s: %d\n",
+ path_name, errno);
+ if (ignore_errors)
+ goto next;
+ ret = -1;
+ goto out;
+ }
+ loops = 0;
+ ret = search_dir(search_root, &location,
+ output_rootdir, dir, mreg);
+ free(dir);
+ if (ret) {
+ fprintf(stderr, "Error searching %s\n",
+ path_name);
+ if (ignore_errors)
+ goto next;
+ goto out;
+ }
+ } else if (type == BTRFS_FT_SYMLINK) {
+ if (restore_symlinks)
+ ret = copy_symlink(root, &location, path_name);
+ if (ret < 0) {
+ if (ignore_errors)
+ goto next;
+ btrfs_free_path(path);
+ return ret;
+ }
+ }
+next:
+ path->slots[0]++;
+ }
+
+ if (restore_metadata) {
+ snprintf(path_name, PATH_MAX, "%s%s", output_rootdir, in_dir);
+ fd = open(path_name, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "ERROR: Failed to access %s to restore metadata\n",
+ path_name);
+ if (!ignore_errors) {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ /*
+ * Set owner/mode/time on the directory as well
+ */
+ key->type = BTRFS_INODE_ITEM_KEY;
+ ret = copy_metadata(root, fd, key);
+ close(fd);
+ if (ret && !ignore_errors)
+ goto out;
+ }
+ }
+
+ if (verbose)
+ printf("Done searching %s\n", in_dir);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int do_list_roots(struct btrfs_root *root)
+{
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct btrfs_disk_key disk_key;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_root_item ri;
+ unsigned long offset;
+ int slot;
+ int ret;
+
+ root = root->fs_info->tree_root;
+ path = btrfs_alloc_path();
+ if (!path) {
+ fprintf(stderr, "Failed to alloc path\n");
+ return -ENOMEM;
+ }
+
+ key.offset = 0;
+ key.objectid = 0;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to do search %d\n", ret);
+ btrfs_free_path(path);
+ return -1;
+ }
+
+ leaf = path->nodes[0];
+
+ while (1) {
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret)
+ break;
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ }
+ btrfs_item_key(leaf, &disk_key, slot);
+ btrfs_disk_key_to_cpu(&found_key, &disk_key);
+ if (btrfs_key_type(&found_key) != BTRFS_ROOT_ITEM_KEY) {
+ path->slots[0]++;
+ continue;
+ }
+
+ offset = btrfs_item_ptr_offset(leaf, slot);
+ read_extent_buffer(leaf, &ri, offset, sizeof(ri));
+ printf(" tree ");
+ btrfs_print_key(&disk_key);
+ printf(" %Lu level %d\n", btrfs_root_bytenr(&ri),
+ btrfs_root_level(&ri));
+ path->slots[0]++;
+ }
+ btrfs_free_path(path);
+
+ return 0;
+}
+
+static struct btrfs_root *open_fs(const char *dev, u64 root_location,
+ int super_mirror, int list_roots)
+{
+ struct btrfs_fs_info *fs_info = NULL;
+ struct btrfs_root *root = NULL;
+ u64 bytenr;
+ int i;
+
+ for (i = super_mirror; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ fs_info = open_ctree_fs_info(dev, bytenr, root_location,
+ OPEN_CTREE_PARTIAL);
+ if (fs_info)
+ break;
+ fprintf(stderr, "Could not open root, trying backup super\n");
+ }
+
+ if (!fs_info)
+ return NULL;
+
+ /*
+ * All we really need to succeed is reading the chunk tree, everything
+ * else we can do by hand, since we only need to read the tree root and
+ * the fs_root.
+ */
+ if (!extent_buffer_uptodate(fs_info->tree_root->node)) {
+ u64 generation;
+
+ root = fs_info->tree_root;
+ if (!root_location)
+ root_location = btrfs_super_root(fs_info->super_copy);
+ generation = btrfs_super_generation(fs_info->super_copy);
+ root->node = read_tree_block(root, root_location,
+ root->leafsize, generation);
+ if (!extent_buffer_uptodate(root->node)) {
+ fprintf(stderr, "Error opening tree root\n");
+ close_ctree(root);
+ return NULL;
+ }
+ }
+
+ if (!list_roots && !fs_info->fs_root) {
+ struct btrfs_key key;
+
+ key.objectid = BTRFS_FS_TREE_OBJECTID;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ fs_info->fs_root = btrfs_read_fs_root_no_cache(fs_info, &key);
+ if (IS_ERR(fs_info->fs_root)) {
+ fprintf(stderr, "Couldn't read fs root: %ld\n",
+ PTR_ERR(fs_info->fs_root));
+ close_ctree(fs_info->tree_root);
+ return NULL;
+ }
+ }
+
+ if (list_roots && do_list_roots(fs_info->tree_root)) {
+ close_ctree(fs_info->tree_root);
+ return NULL;
+ }
+
+ return fs_info->fs_root;
+}
+
+static int find_first_dir(struct btrfs_root *root, u64 *objectid)
+{
+ struct btrfs_path *path;
+ struct btrfs_key found_key;
+ struct btrfs_key key;
+ int ret = -1;
+ int i;
+
+ key.objectid = 0;
+ key.type = BTRFS_DIR_INDEX_KEY;
+ key.offset = 0;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ fprintf(stderr, "Ran out of memory\n");
+ return ret;
+ }
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Error searching %d\n", ret);
+ goto out;
+ }
+
+ if (!path->nodes[0]) {
+ fprintf(stderr, "No leaf!\n");
+ goto out;
+ }
+again:
+ for (i = path->slots[0];
+ i < btrfs_header_nritems(path->nodes[0]); i++) {
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key, i);
+ if (found_key.type != key.type)
+ continue;
+
+ printf("Using objectid %Lu for first dir\n",
+ found_key.objectid);
+ *objectid = found_key.objectid;
+ ret = 0;
+ goto out;
+ }
+ do {
+ ret = next_leaf(root, path);
+ if (ret < 0) {
+ fprintf(stderr, "Error getting next leaf %d\n",
+ ret);
+ goto out;
+ } else if (ret > 0) {
+ fprintf(stderr, "No more leaves\n");
+ goto out;
+ }
+ } while (!path->nodes[0]);
+ if (path->nodes[0])
+ goto again;
+ printf("Couldn't find a dir index item\n");
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+const char * const cmd_restore_usage[] = {
+ "btrfs restore [options] <device> <path> | -l <device>",
+ "Try to restore files from a damaged filesystem (unmounted)",
+ "",
+ "-s|--snapshots get snapshots",
+ "-x|--xattr get extended attributes",
+ "-m|--metadata restore owner, mode and times",
+ "-S|--symlinks restore symbolic links",
+ "-v|--verbose verbose",
+ "-i|--ignore-errors ignore errors",
+ "-o|--overwrite overwrite",
+ "-t <bytenr> tree location",
+ "-f <bytenr> filesystem location",
+ "-u|--super <mirror> super mirror",
+ "-r|--root <rootid> root objectid",
+ "-d find dir",
+ "-l|--list-roots list tree roots",
+ "-D|--dry-run dry run (only list files that would be recovered)",
+ "--path-regex <regex>",
+ " restore only filenames matching regex,",
+ " you have to use following syntax (possibly quoted):",
+ " ^/(|home(|/username(|/Desktop(|/.*))))$",
+ "-c ignore case (--path-regex only)",
+ NULL
+};
+
+int cmd_restore(int argc, char **argv)
+{
+ struct btrfs_root *root;
+ struct btrfs_key key;
+ char dir_name[PATH_MAX];
+ u64 tree_location = 0;
+ u64 fs_location = 0;
+ u64 root_objectid = 0;
+ int len;
+ int ret;
+ int super_mirror = 0;
+ int find_dir = 0;
+ int list_roots = 0;
+ const char *match_regstr = NULL;
+ int match_cflags = REG_EXTENDED | REG_NOSUB | REG_NEWLINE;
+ regex_t match_reg, *mreg = NULL;
+ char reg_err[256];
+
+ while (1) {
+ int opt;
+ static const struct option long_options[] = {
+ { "path-regex", required_argument, NULL, 256},
+ { "dry-run", no_argument, NULL, 'D'},
+ { "metadata", no_argument, NULL, 'm'},
+ { "symlinks", no_argument, NULL, 'S'},
+ { "snapshots", no_argument, NULL, 's'},
+ { "xattr", no_argument, NULL, 'x'},
+ { "verbose", no_argument, NULL, 'v'},
+ { "ignore-errors", no_argument, NULL, 'i'},
+ { "overwrite", no_argument, NULL, 'o'},
+ { "super", required_argument, NULL, 'u'},
+ { "root", required_argument, NULL, 'r'},
+ { "list-roots", no_argument, NULL, 'l'},
+ { NULL, 0, NULL, 0}
+ };
+
+ opt = getopt_long(argc, argv, "sSxviot:u:dmf:r:lDc", long_options,
+ NULL);
+ if (opt < 0)
+ break;
+
+ switch (opt) {
+ case 's':
+ get_snaps = 1;
+ break;
+ case 'v':
+ verbose++;
+ break;
+ case 'i':
+ ignore_errors = 1;
+ break;
+ case 'o':
+ overwrite = 1;
+ break;
+ case 't':
+ tree_location = arg_strtou64(optarg);
+ break;
+ case 'f':
+ fs_location = arg_strtou64(optarg);
+ break;
+ case 'u':
+ super_mirror = arg_strtou64(optarg);
+ if (super_mirror >= BTRFS_SUPER_MIRROR_MAX) {
+ fprintf(stderr, "Super mirror not "
+ "valid\n");
+ exit(1);
+ }
+ break;
+ case 'd':
+ find_dir = 1;
+ break;
+ case 'r':
+ root_objectid = arg_strtou64(optarg);
+ if (!is_fstree(root_objectid)) {
+ fprintf(stderr, "objectid %llu is not a valid fs/file tree\n",
+ root_objectid);
+ exit(1);
+ }
+ break;
+ case 'l':
+ list_roots = 1;
+ break;
+ case 'm':
+ restore_metadata = 1;
+ break;
+ case 'S':
+ restore_symlinks = 1;
+ break;
+ case 'D':
+ dry_run = 1;
+ break;
+ case 'c':
+ match_cflags |= REG_ICASE;
+ break;
+ /* long option without single letter alternative */
+ case 256:
+ match_regstr = optarg;
+ break;
+ case 'x':
+ get_xattrs = 1;
+ break;
+ default:
+ usage(cmd_restore_usage);
+ }
+ }
+
+ if (!list_roots && check_argc_min(argc - optind, 2))
+ usage(cmd_restore_usage);
+ else if (list_roots && check_argc_min(argc - optind, 1))
+ usage(cmd_restore_usage);
+
+ if (fs_location && root_objectid) {
+ fprintf(stderr, "don't use -f and -r at the same time.\n");
+ return 1;
+ }
+
+ if ((ret = check_mounted(argv[optind])) < 0) {
+ fprintf(stderr, "Could not check mount status: %s\n",
+ strerror(-ret));
+ return 1;
+ } else if (ret) {
+ fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
+ return 1;
+ }
+
+ root = open_fs(argv[optind], tree_location, super_mirror, list_roots);
+ if (root == NULL)
+ return 1;
+
+ if (list_roots)
+ goto out;
+
+ if (fs_location != 0) {
+ free_extent_buffer(root->node);
+ root->node = read_tree_block(root, fs_location, root->leafsize, 0);
+ if (!extent_buffer_uptodate(root->node)) {
+ fprintf(stderr, "Failed to read fs location\n");
+ ret = 1;
+ goto out;
+ }
+ }
+
+ memset(path_name, 0, PATH_MAX);
+
+ if (strlen(argv[optind + 1]) >= PATH_MAX) {
+ fprintf(stderr, "ERROR: path too long\n");
+ ret = 1;
+ goto out;
+ }
+ strncpy(dir_name, argv[optind + 1], sizeof dir_name);
+ dir_name[sizeof dir_name - 1] = 0;
+
+ /* Strip the trailing / on the dir name */
+ len = strlen(dir_name);
+ while (len && dir_name[--len] == '/') {
+ dir_name[len] = '\0';
+ }
+
+ if (root_objectid != 0) {
+ struct btrfs_root *orig_root = root;
+
+ key.objectid = root_objectid;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ root = btrfs_read_fs_root(orig_root->fs_info, &key);
+ if (IS_ERR(root)) {
+ fprintf(stderr, "fail to read root %llu: %s\n",
+ root_objectid, strerror(-PTR_ERR(root)));
+ root = orig_root;
+ ret = 1;
+ goto out;
+ }
+ key.type = 0;
+ key.offset = 0;
+ }
+
+ if (find_dir) {
+ ret = find_first_dir(root, &key.objectid);
+ if (ret)
+ goto out;
+ } else {
+ key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+ }
+
+ if (match_regstr) {
+ ret = regcomp(&match_reg, match_regstr, match_cflags);
+ if (ret) {
+ regerror(ret, &match_reg, reg_err, sizeof(reg_err));
+ fprintf(stderr, "Regex compile failed: %s\n", reg_err);
+ goto out;
+ }
+ mreg = &match_reg;
+ }
+
+ if (dry_run)
+ printf("This is a dry-run, no files are going to be restored\n");
+
+ ret = search_dir(root, &key, dir_name, "", mreg);
+
+out:
+ if (mreg)
+ regfree(mreg);
+ close_ctree(root);
+ return !!ret;
+}
diff --git a/cmds-scrub.c b/cmds-scrub.c
new file mode 100644
index 00000000..da614f2f
--- /dev/null
+++ b/cmds-scrub.c
@@ -0,0 +1,1803 @@
+/*
+ * Copyright (C) 2011 STRATO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "kerncompat.h"
+#include "androidcompat.h"
+
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/syscall.h>
+#include <poll.h>
+#include <sys/file.h>
+#include <uuid/uuid.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <ctype.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <limits.h>
+
+#include "ctree.h"
+#include "ioctl.h"
+#include "utils.h"
+#include "volumes.h"
+#include "disk-io.h"
+
+#include "commands.h"
+
+static const char * const scrub_cmd_group_usage[] = {
+ "btrfs scrub <command> [options] <path>|<device>",
+ NULL
+};
+
+#define SCRUB_DATA_FILE "/var/lib/btrfs/scrub.status"
+#define SCRUB_PROGRESS_SOCKET_PATH "/var/lib/btrfs/scrub.progress"
+#define SCRUB_FILE_VERSION_PREFIX "scrub status"
+#define SCRUB_FILE_VERSION "1"
+
+struct scrub_stats {
+ time_t t_start;
+ time_t t_resumed;
+ u64 duration;
+ u64 finished;
+ u64 canceled;
+ int in_progress;
+};
+
+/* TBD: replace with #include "linux/ioprio.h" in some years */
+#if !defined (IOPRIO_H)
+#define IOPRIO_WHO_PROCESS 1
+#define IOPRIO_CLASS_SHIFT 13
+#define IOPRIO_PRIO_VALUE(class, data) \
+ (((class) << IOPRIO_CLASS_SHIFT) | (data))
+#define IOPRIO_CLASS_IDLE 3
+#endif
+
+struct scrub_progress {
+ struct btrfs_ioctl_scrub_args scrub_args;
+ int fd;
+ int ret;
+ int skip;
+ struct scrub_stats stats;
+ struct scrub_file_record *resumed;
+ int ioctl_errno;
+ pthread_mutex_t progress_mutex;
+ int ioprio_class;
+ int ioprio_classdata;
+};
+
+struct scrub_file_record {
+ u8 fsid[BTRFS_FSID_SIZE];
+ u64 devid;
+ struct scrub_stats stats;
+ struct btrfs_scrub_progress p;
+};
+
+struct scrub_progress_cycle {
+ int fdmnt;
+ int prg_fd;
+ int do_record;
+ struct btrfs_ioctl_fs_info_args *fi;
+ struct scrub_progress *progress;
+ struct scrub_progress *shared_progress;
+ pthread_mutex_t *write_mutex;
+};
+
+struct scrub_fs_stat {
+ struct btrfs_scrub_progress p;
+ struct scrub_stats s;
+ int i;
+};
+
+static void print_scrub_full(struct btrfs_scrub_progress *sp)
+{
+ printf("\tdata_extents_scrubbed: %lld\n", sp->data_extents_scrubbed);
+ printf("\ttree_extents_scrubbed: %lld\n", sp->tree_extents_scrubbed);
+ printf("\tdata_bytes_scrubbed: %lld\n", sp->data_bytes_scrubbed);
+ printf("\ttree_bytes_scrubbed: %lld\n", sp->tree_bytes_scrubbed);
+ printf("\tread_errors: %lld\n", sp->read_errors);
+ printf("\tcsum_errors: %lld\n", sp->csum_errors);
+ printf("\tverify_errors: %lld\n", sp->verify_errors);
+ printf("\tno_csum: %lld\n", sp->no_csum);
+ printf("\tcsum_discards: %lld\n", sp->csum_discards);
+ printf("\tsuper_errors: %lld\n", sp->super_errors);
+ printf("\tmalloc_errors: %lld\n", sp->malloc_errors);
+ printf("\tuncorrectable_errors: %lld\n", sp->uncorrectable_errors);
+ printf("\tunverified_errors: %lld\n", sp->unverified_errors);
+ printf("\tcorrected_errors: %lld\n", sp->corrected_errors);
+ printf("\tlast_physical: %lld\n", sp->last_physical);
+}
+
+#define PRINT_SCRUB_ERROR(test, desc) do { \
+ if (test) \
+ printf(" %s=%llu", desc, test); \
+} while (0)
+
+static void print_scrub_summary(struct btrfs_scrub_progress *p)
+{
+ u64 err_cnt;
+ u64 err_cnt2;
+
+ err_cnt = p->read_errors +
+ p->csum_errors +
+ p->verify_errors +
+ p->super_errors;
+
+ err_cnt2 = p->corrected_errors + p->uncorrectable_errors;
+
+ if (p->malloc_errors)
+ printf("*** WARNING: memory allocation failed while scrubbing. "
+ "results may be inaccurate\n");
+
+ printf("\ttotal bytes scrubbed: %s with %llu errors\n",
+ pretty_size(p->data_bytes_scrubbed + p->tree_bytes_scrubbed),
+ max(err_cnt, err_cnt2));
+
+ if (err_cnt || err_cnt2) {
+ printf("\terror details:");
+ PRINT_SCRUB_ERROR(p->read_errors, "read");
+ PRINT_SCRUB_ERROR(p->super_errors, "super");
+ PRINT_SCRUB_ERROR(p->verify_errors, "verify");
+ PRINT_SCRUB_ERROR(p->csum_errors, "csum");
+ printf("\n");
+ printf("\tcorrected errors: %llu, uncorrectable errors: %llu, "
+ "unverified errors: %llu\n", p->corrected_errors,
+ p->uncorrectable_errors, p->unverified_errors);
+ }
+}
+
+#define _SCRUB_FS_STAT(p, name, fs_stat) do { \
+ fs_stat->p.name += p->name; \
+} while (0)
+
+#define _SCRUB_FS_STAT_MIN(ss, name, fs_stat) \
+do { \
+ if (fs_stat->s.name > ss->name) { \
+ fs_stat->s.name = ss->name; \
+ } \
+} while (0)
+
+#define _SCRUB_FS_STAT_ZMIN(ss, name, fs_stat) \
+do { \
+ if (!fs_stat->s.name || fs_stat->s.name > ss->name) { \
+ fs_stat->s.name = ss->name; \
+ } \
+} while (0)
+
+#define _SCRUB_FS_STAT_ZMAX(ss, name, fs_stat) \
+do { \
+ if (!(fs_stat)->s.name || (fs_stat)->s.name < (ss)->name) { \
+ (fs_stat)->s.name = (ss)->name; \
+ } \
+} while (0)
+
+static void add_to_fs_stat(struct btrfs_scrub_progress *p,
+ struct scrub_stats *ss,
+ struct scrub_fs_stat *fs_stat)
+{
+ _SCRUB_FS_STAT(p, data_extents_scrubbed, fs_stat);
+ _SCRUB_FS_STAT(p, tree_extents_scrubbed, fs_stat);
+ _SCRUB_FS_STAT(p, data_bytes_scrubbed, fs_stat);
+ _SCRUB_FS_STAT(p, tree_bytes_scrubbed, fs_stat);
+ _SCRUB_FS_STAT(p, read_errors, fs_stat);
+ _SCRUB_FS_STAT(p, csum_errors, fs_stat);
+ _SCRUB_FS_STAT(p, verify_errors, fs_stat);
+ _SCRUB_FS_STAT(p, no_csum, fs_stat);
+ _SCRUB_FS_STAT(p, csum_discards, fs_stat);
+ _SCRUB_FS_STAT(p, super_errors, fs_stat);
+ _SCRUB_FS_STAT(p, malloc_errors, fs_stat);
+ _SCRUB_FS_STAT(p, uncorrectable_errors, fs_stat);
+ _SCRUB_FS_STAT(p, corrected_errors, fs_stat);
+ _SCRUB_FS_STAT(p, last_physical, fs_stat);
+ _SCRUB_FS_STAT_ZMIN(ss, t_start, fs_stat);
+ _SCRUB_FS_STAT_ZMIN(ss, t_resumed, fs_stat);
+ _SCRUB_FS_STAT_ZMAX(ss, duration, fs_stat);
+ _SCRUB_FS_STAT_ZMAX(ss, canceled, fs_stat);
+ _SCRUB_FS_STAT_MIN(ss, finished, fs_stat);
+}
+
+static void init_fs_stat(struct scrub_fs_stat *fs_stat)
+{
+ memset(fs_stat, 0, sizeof(*fs_stat));
+ fs_stat->s.finished = 1;
+}
+
+static void _print_scrub_ss(struct scrub_stats *ss)
+{
+ char t[4096];
+ struct tm tm;
+ time_t seconds;
+ unsigned hours;
+
+ if (!ss || !ss->t_start) {
+ printf("\tno stats available\n");
+ return;
+ }
+ if (ss->t_resumed) {
+ localtime_r(&ss->t_resumed, &tm);
+ strftime(t, sizeof(t), "%c", &tm);
+ t[sizeof(t) - 1] = '\0';
+ printf("\tscrub resumed at %s", t);
+ } else {
+ localtime_r(&ss->t_start, &tm);
+ strftime(t, sizeof(t), "%c", &tm);
+ t[sizeof(t) - 1] = '\0';
+ printf("\tscrub started at %s", t);
+ }
+
+ seconds = ss->duration;
+ hours = ss->duration / (60 * 60);
+ gmtime_r(&seconds, &tm);
+ strftime(t, sizeof(t), "%M:%S", &tm);
+ if (ss->in_progress)
+ printf(", running for %02u:%s\n", hours, t);
+ else if (ss->canceled)
+ printf(" and was aborted after %02u:%s\n", hours, t);
+ else if (ss->finished)
+ printf(" and finished after %02u:%s\n", hours, t);
+ else
+ printf(", interrupted after %02u:%s, not running\n",
+ hours, t);
+}
+
+static void print_scrub_dev(struct btrfs_ioctl_dev_info_args *di,
+ struct btrfs_scrub_progress *p, int raw,
+ const char *append, struct scrub_stats *ss)
+{
+ printf("scrub device %s (id %llu) %s\n", di->path, di->devid,
+ append ? append : "");
+
+ _print_scrub_ss(ss);
+
+ if (p) {
+ if (raw)
+ print_scrub_full(p);
+ else
+ print_scrub_summary(p);
+ }
+}
+
+static void print_fs_stat(struct scrub_fs_stat *fs_stat, int raw)
+{
+ _print_scrub_ss(&fs_stat->s);
+
+ if (raw)
+ print_scrub_full(&fs_stat->p);
+ else
+ print_scrub_summary(&fs_stat->p);
+}
+
+static void free_history(struct scrub_file_record **last_scrubs)
+{
+ struct scrub_file_record **l = last_scrubs;
+ if (!l || IS_ERR(l))
+ return;
+ while (*l)
+ free(*l++);
+ free(last_scrubs);
+}
+
+/*
+ * cancels a running scrub and makes the master process record the current
+ * progress status before exiting.
+ */
+static int cancel_fd = -1;
+static void scrub_sigint_record_progress(int signal)
+{
+ int ret;
+
+ ret = ioctl(cancel_fd, BTRFS_IOC_SCRUB_CANCEL, NULL);
+ if (ret < 0)
+ perror("Scrub cancel failed");
+}
+
+static int scrub_handle_sigint_parent(void)
+{
+ struct sigaction sa = {
+ .sa_handler = SIG_IGN,
+ .sa_flags = SA_RESTART,
+ };
+
+ return sigaction(SIGINT, &sa, NULL);
+}
+
+static int scrub_handle_sigint_child(int fd)
+{
+ struct sigaction sa = {
+ .sa_handler = fd == -1 ? SIG_DFL : scrub_sigint_record_progress,
+ };
+
+ cancel_fd = fd;
+ return sigaction(SIGINT, &sa, NULL);
+}
+
+static int scrub_datafile(const char *fn_base, const char *fn_local,
+ const char *fn_tmp, char *datafile, int size)
+{
+ int ret;
+ int end = size - 2;
+
+ datafile[end + 1] = '\0';
+ strncpy(datafile, fn_base, end);
+ ret = strlen(datafile);
+
+ if (ret + 1 > end)
+ return -EOVERFLOW;
+
+ datafile[ret] = '.';
+ strncpy(datafile + ret + 1, fn_local, end - ret - 1);
+ ret = strlen(datafile);
+
+ if (ret + 1 > end)
+ return -EOVERFLOW;
+
+ if (fn_tmp) {
+ datafile[ret] = '_';
+ strncpy(datafile + ret + 1, fn_tmp, end - ret - 1);
+ ret = strlen(datafile);
+
+ if (ret > end)
+ return -EOVERFLOW;
+ }
+
+ return 0;
+}
+
+static int scrub_open_file(const char *datafile, int m)
+{
+ int fd;
+ int ret;
+
+ fd = open(datafile, m, 0600);
+ if (fd < 0)
+ return -errno;
+
+ ret = flock(fd, LOCK_EX|LOCK_NB);
+ if (ret) {
+ ret = errno;
+ close(fd);
+ return -ret;
+ }
+
+ return fd;
+}
+
+static int scrub_open_file_r(const char *fn_base, const char *fn_local)
+{
+ int ret;
+ char datafile[PATH_MAX];
+ ret = scrub_datafile(fn_base, fn_local, NULL,
+ datafile, sizeof(datafile));
+ if (ret < 0)
+ return ret;
+ return scrub_open_file(datafile, O_RDONLY);
+}
+
+static int scrub_open_file_w(const char *fn_base, const char *fn_local,
+ const char *tmp)
+{
+ int ret;
+ char datafile[PATH_MAX];
+ ret = scrub_datafile(fn_base, fn_local, tmp,
+ datafile, sizeof(datafile));
+ if (ret < 0)
+ return ret;
+ return scrub_open_file(datafile, O_WRONLY|O_CREAT);
+}
+
+static int scrub_rename_file(const char *fn_base, const char *fn_local,
+ const char *tmp)
+{
+ int ret;
+ char datafile_old[PATH_MAX];
+ char datafile_new[PATH_MAX];
+ ret = scrub_datafile(fn_base, fn_local, tmp,
+ datafile_old, sizeof(datafile_old));
+ if (ret < 0)
+ return ret;
+ ret = scrub_datafile(fn_base, fn_local, NULL,
+ datafile_new, sizeof(datafile_new));
+ if (ret < 0)
+ return ret;
+ ret = rename(datafile_old, datafile_new);
+ return ret ? -errno : 0;
+}
+
+#define _SCRUB_KVREAD(ret, i, name, avail, l, dest) if (ret == 0) { \
+ ret = scrub_kvread(i, sizeof(#name), avail, l, #name, dest.name); \
+}
+
+/*
+ * returns 0 if the key did not match (nothing was read)
+ * 1 if the key did match (success)
+ * -1 if the key did match and an error occured
+ */
+static int scrub_kvread(int *i, int len, int avail, const char *buf,
+ const char *key, u64 *dest)
+{
+ int j;
+
+ if (*i + len + 1 < avail && strncmp(&buf[*i], key, len - 1) == 0) {
+ *i += len - 1;
+ if (buf[*i] != ':')
+ return -1;
+ *i += 1;
+ for (j = 0; isdigit(buf[*i + j]) && *i + j < avail; ++j)
+ ;
+ if (*i + j >= avail)
+ return -1;
+ *dest = atoll(&buf[*i]);
+ *i += j;
+ return 1;
+ }
+
+ return 0;
+}
+
+#define _SCRUB_INVALID do { \
+ if (report_errors) \
+ warning("invalid data on line %d pos " \
+ "%d state %d (near \"%.*s\") at %s:%d", \
+ lineno, i, state, 20 > avail ? avail : 20, \
+ l + i, __FILE__, __LINE__); \
+ goto skip; \
+} while (0)
+
+static struct scrub_file_record **scrub_read_file(int fd, int report_errors)
+{
+ int avail = 0;
+ int old_avail = 0;
+ char l[16 * 1024];
+ int state = 0;
+ int curr = -1;
+ int i = 0;
+ int j;
+ int ret;
+ int eof = 0;
+ int lineno = 0;
+ u64 version;
+ char empty_uuid[BTRFS_FSID_SIZE] = {0};
+ struct scrub_file_record **p = NULL;
+
+again:
+ old_avail = avail - i;
+ BUG_ON(old_avail < 0);
+ if (old_avail)
+ memmove(l, l + i, old_avail);
+ avail = read(fd, l + old_avail, sizeof(l) - old_avail);
+ if (avail == 0)
+ eof = 1;
+ if (avail == 0 && old_avail == 0) {
+ if (curr >= 0 &&
+ memcmp(p[curr]->fsid, empty_uuid, BTRFS_FSID_SIZE) == 0) {
+ p[curr] = NULL;
+ } else if (curr == -1) {
+ p = ERR_PTR(-ENODATA);
+ }
+ return p;
+ }
+ if (avail == -1) {
+ free_history(p);
+ return ERR_PTR(-errno);
+ }
+ avail += old_avail;
+
+ i = 0;
+ while (i < avail) {
+ void *tmp;
+
+ switch (state) {
+ case 0: /* start of file */
+ ret = scrub_kvread(&i,
+ sizeof(SCRUB_FILE_VERSION_PREFIX), avail, l,
+ SCRUB_FILE_VERSION_PREFIX, &version);
+ if (ret != 1)
+ _SCRUB_INVALID;
+ if (version != atoll(SCRUB_FILE_VERSION))
+ return ERR_PTR(-ENOTSUP);
+ state = 6;
+ continue;
+ case 1: /* start of line, alloc */
+ /*
+ * this state makes sure we have a complete line in
+ * further processing, so we don't need wrap-tracking
+ * everywhere.
+ */
+ if (!eof && !memchr(l + i, '\n', avail - i))
+ goto again;
+ ++lineno;
+ if (curr > -1 && memcmp(p[curr]->fsid, empty_uuid,
+ BTRFS_FSID_SIZE) == 0) {
+ state = 2;
+ continue;
+ }
+ ++curr;
+ tmp = p;
+ p = realloc(p, (curr + 2) * sizeof(*p));
+ if (!p) {
+ free_history(tmp);
+ return ERR_PTR(-errno);
+ }
+ p[curr] = malloc(sizeof(**p));
+ if (!p[curr]) {
+ free_history(p);
+ return ERR_PTR(-errno);
+ }
+ memset(p[curr], 0, sizeof(**p));
+ p[curr + 1] = NULL;
+ ++state;
+ /* fall through */
+ case 2: /* start of line, skip space */
+ while (isspace(l[i]) && i < avail) {
+ if (l[i] == '\n')
+ ++lineno;
+ ++i;
+ }
+ if (i >= avail ||
+ (!eof && !memchr(l + i, '\n', avail - i)))
+ goto again;
+ ++state;
+ /* fall through */
+ case 3: /* read fsid */
+ if (i == avail)
+ continue;
+ for (j = 0; l[i + j] != ':' && i + j < avail; ++j)
+ ;
+ if (i + j + 1 >= avail)
+ _SCRUB_INVALID;
+ if (j != BTRFS_UUID_UNPARSED_SIZE - 1)
+ _SCRUB_INVALID;
+ l[i + j] = '\0';
+ ret = uuid_parse(l + i, p[curr]->fsid);
+ if (ret)
+ _SCRUB_INVALID;
+ i += j + 1;
+ ++state;
+ /* fall through */
+ case 4: /* read dev id */
+ for (j = 0; isdigit(l[i + j]) && i+j < avail; ++j)
+ ;
+ if (j == 0 || i + j + 1 >= avail)
+ _SCRUB_INVALID;
+ p[curr]->devid = atoll(&l[i]);
+ i += j + 1;
+ ++state;
+ /* fall through */
+ case 5: /* read key/value pair */
+ ret = 0;
+ _SCRUB_KVREAD(ret, &i, data_extents_scrubbed, avail, l,
+ &p[curr]->p);
+ _SCRUB_KVREAD(ret, &i, data_extents_scrubbed, avail, l,
+ &p[curr]->p);
+ _SCRUB_KVREAD(ret, &i, tree_extents_scrubbed, avail, l,
+ &p[curr]->p);
+ _SCRUB_KVREAD(ret, &i, data_bytes_scrubbed, avail, l,
+ &p[curr]->p);
+ _SCRUB_KVREAD(ret, &i, tree_bytes_scrubbed, avail, l,
+ &p[curr]->p);
+ _SCRUB_KVREAD(ret, &i, read_errors, avail, l,
+ &p[curr]->p);
+ _SCRUB_KVREAD(ret, &i, csum_errors, avail, l,
+ &p[curr]->p);
+ _SCRUB_KVREAD(ret, &i, verify_errors, avail, l,
+ &p[curr]->p);
+ _SCRUB_KVREAD(ret, &i, no_csum, avail, l,
+ &p[curr]->p);
+ _SCRUB_KVREAD(ret, &i, csum_discards, avail, l,
+ &p[curr]->p);
+ _SCRUB_KVREAD(ret, &i, super_errors, avail, l,
+ &p[curr]->p);
+ _SCRUB_KVREAD(ret, &i, malloc_errors, avail, l,
+ &p[curr]->p);
+ _SCRUB_KVREAD(ret, &i, uncorrectable_errors, avail, l,
+ &p[curr]->p);
+ _SCRUB_KVREAD(ret, &i, corrected_errors, avail, l,
+ &p[curr]->p);
+ _SCRUB_KVREAD(ret, &i, last_physical, avail, l,
+ &p[curr]->p);
+ _SCRUB_KVREAD(ret, &i, finished, avail, l,
+ &p[curr]->stats);
+ _SCRUB_KVREAD(ret, &i, t_start, avail, l,
+ (u64 *)&p[curr]->stats);
+ _SCRUB_KVREAD(ret, &i, t_resumed, avail, l,
+ (u64 *)&p[curr]->stats);
+ _SCRUB_KVREAD(ret, &i, duration, avail, l,
+ (u64 *)&p[curr]->stats);
+ _SCRUB_KVREAD(ret, &i, canceled, avail, l,
+ &p[curr]->stats);
+ if (ret != 1)
+ _SCRUB_INVALID;
+ ++state;
+ /* fall through */
+ case 6: /* after number */
+ if (l[i] == '|')
+ state = 5;
+ else if (l[i] == '\n')
+ state = 1;
+ else
+ _SCRUB_INVALID;
+ ++i;
+ continue;
+ case 99: /* skip rest of line */
+skip:
+ state = 99;
+ do {
+ ++i;
+ if (l[i - 1] == '\n') {
+ state = 1;
+ break;
+ }
+ } while (i < avail);
+ continue;
+ }
+ BUG();
+ }
+ goto again;
+}
+
+static int scrub_write_buf(int fd, const void *data, int len)
+{
+ int ret;
+ ret = write(fd, data, len);
+ return ret - len;
+}
+
+static int scrub_writev(int fd, char *buf, int max, const char *fmt, ...)
+ __attribute__ ((format (printf, 4, 5)));
+static int scrub_writev(int fd, char *buf, int max, const char *fmt, ...)
+{
+ int ret;
+ va_list args;
+
+ va_start(args, fmt);
+ ret = vsnprintf(buf, max, fmt, args);
+ va_end(args);
+ if (ret >= max)
+ return ret - max;
+ return scrub_write_buf(fd, buf, ret);
+}
+
+#define _SCRUB_SUM(dest, data, name) dest->scrub_args.progress.name = \
+ data->resumed->p.name + data->scrub_args.progress.name
+
+static struct scrub_progress *scrub_resumed_stats(struct scrub_progress *data,
+ struct scrub_progress *dest)
+{
+ if (!data->resumed || data->skip)
+ return data;
+
+ _SCRUB_SUM(dest, data, data_extents_scrubbed);
+ _SCRUB_SUM(dest, data, tree_extents_scrubbed);
+ _SCRUB_SUM(dest, data, data_bytes_scrubbed);
+ _SCRUB_SUM(dest, data, tree_bytes_scrubbed);
+ _SCRUB_SUM(dest, data, read_errors);
+ _SCRUB_SUM(dest, data, csum_errors);
+ _SCRUB_SUM(dest, data, verify_errors);
+ _SCRUB_SUM(dest, data, no_csum);
+ _SCRUB_SUM(dest, data, csum_discards);
+ _SCRUB_SUM(dest, data, super_errors);
+ _SCRUB_SUM(dest, data, malloc_errors);
+ _SCRUB_SUM(dest, data, uncorrectable_errors);
+ _SCRUB_SUM(dest, data, corrected_errors);
+ _SCRUB_SUM(dest, data, last_physical);
+ dest->stats.canceled = data->stats.canceled;
+ dest->stats.finished = data->stats.finished;
+ dest->stats.t_resumed = data->stats.t_start;
+ dest->stats.t_start = data->resumed->stats.t_start;
+ dest->stats.duration = data->resumed->stats.duration +
+ data->stats.duration;
+ dest->scrub_args.devid = data->scrub_args.devid;
+ return dest;
+}
+
+#define _SCRUB_KVWRITE(fd, buf, name, use) \
+ scrub_kvwrite(fd, buf, sizeof(buf), #name, \
+ use->scrub_args.progress.name)
+
+#define _SCRUB_KVWRITE_STATS(fd, buf, name, use) \
+ scrub_kvwrite(fd, buf, sizeof(buf), #name, \
+ use->stats.name)
+
+static int scrub_kvwrite(int fd, char *buf, int max, const char *key, u64 val)
+{
+ return scrub_writev(fd, buf, max, "|%s:%lld", key, val);
+}
+
+static int scrub_write_file(int fd, const char *fsid,
+ struct scrub_progress *data, int n)
+{
+ int ret = 0;
+ int i;
+ char buf[1024];
+ struct scrub_progress local;
+ struct scrub_progress *use;
+
+ if (n < 1)
+ return -EINVAL;
+
+ /* each -1 is to subtract one \0 byte, the + 2 is for ':' and '\n' */
+ ret = scrub_write_buf(fd, SCRUB_FILE_VERSION_PREFIX ":"
+ SCRUB_FILE_VERSION "\n",
+ (sizeof(SCRUB_FILE_VERSION_PREFIX) - 1) +
+ (sizeof(SCRUB_FILE_VERSION) - 1) + 2);
+ if (ret)
+ return -EOVERFLOW;
+
+ for (i = 0; i < n; ++i) {
+ use = scrub_resumed_stats(&data[i], &local);
+ if (scrub_write_buf(fd, fsid, strlen(fsid)) ||
+ scrub_write_buf(fd, ":", 1) ||
+ scrub_writev(fd, buf, sizeof(buf), "%lld",
+ use->scrub_args.devid) ||
+ scrub_write_buf(fd, buf, ret) ||
+ _SCRUB_KVWRITE(fd, buf, data_extents_scrubbed, use) ||
+ _SCRUB_KVWRITE(fd, buf, tree_extents_scrubbed, use) ||
+ _SCRUB_KVWRITE(fd, buf, data_bytes_scrubbed, use) ||
+ _SCRUB_KVWRITE(fd, buf, tree_bytes_scrubbed, use) ||
+ _SCRUB_KVWRITE(fd, buf, read_errors, use) ||
+ _SCRUB_KVWRITE(fd, buf, csum_errors, use) ||
+ _SCRUB_KVWRITE(fd, buf, verify_errors, use) ||
+ _SCRUB_KVWRITE(fd, buf, no_csum, use) ||
+ _SCRUB_KVWRITE(fd, buf, csum_discards, use) ||
+ _SCRUB_KVWRITE(fd, buf, super_errors, use) ||
+ _SCRUB_KVWRITE(fd, buf, malloc_errors, use) ||
+ _SCRUB_KVWRITE(fd, buf, uncorrectable_errors, use) ||
+ _SCRUB_KVWRITE(fd, buf, corrected_errors, use) ||
+ _SCRUB_KVWRITE(fd, buf, last_physical, use) ||
+ _SCRUB_KVWRITE_STATS(fd, buf, t_start, use) ||
+ _SCRUB_KVWRITE_STATS(fd, buf, t_resumed, use) ||
+ _SCRUB_KVWRITE_STATS(fd, buf, duration, use) ||
+ _SCRUB_KVWRITE_STATS(fd, buf, canceled, use) ||
+ _SCRUB_KVWRITE_STATS(fd, buf, finished, use) ||
+ scrub_write_buf(fd, "\n", 1)) {
+ return -EOVERFLOW;
+ }
+ }
+
+ return 0;
+}
+
+static int scrub_write_progress(pthread_mutex_t *m, const char *fsid,
+ struct scrub_progress *data, int n)
+{
+ int ret;
+ int err;
+ int fd = -1;
+ int old;
+
+ ret = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old);
+ if (ret) {
+ err = -ret;
+ goto out3;
+ }
+
+ ret = pthread_mutex_lock(m);
+ if (ret) {
+ err = -ret;
+ goto out2;
+ }
+
+ fd = scrub_open_file_w(SCRUB_DATA_FILE, fsid, "tmp");
+ if (fd < 0) {
+ err = fd;
+ goto out1;
+ }
+ err = scrub_write_file(fd, fsid, data, n);
+ if (err)
+ goto out1;
+ err = scrub_rename_file(SCRUB_DATA_FILE, fsid, "tmp");
+ if (err)
+ goto out1;
+
+out1:
+ if (fd >= 0) {
+ ret = close(fd);
+ if (ret)
+ err = -errno;
+ }
+
+ ret = pthread_mutex_unlock(m);
+ if (ret && !err)
+ err = -ret;
+
+out2:
+ ret = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old);
+ if (ret && !err)
+ err = -ret;
+
+out3:
+ return err;
+}
+
+static void *scrub_one_dev(void *ctx)
+{
+ struct scrub_progress *sp = ctx;
+ int ret;
+ struct timeval tv;
+
+ sp->stats.canceled = 0;
+ sp->stats.duration = 0;
+ sp->stats.finished = 0;
+
+ ret = syscall(SYS_ioprio_set, IOPRIO_WHO_PROCESS, 0,
+ IOPRIO_PRIO_VALUE(sp->ioprio_class,
+ sp->ioprio_classdata));
+ if (ret)
+ warning("setting ioprio failed: %s (ignored)",
+ strerror(errno));
+
+ ret = ioctl(sp->fd, BTRFS_IOC_SCRUB, &sp->scrub_args);
+ gettimeofday(&tv, NULL);
+ sp->ret = ret;
+ sp->stats.duration = tv.tv_sec - sp->stats.t_start;
+ sp->stats.canceled = !!ret;
+ sp->ioctl_errno = errno;
+ ret = pthread_mutex_lock(&sp->progress_mutex);
+ if (ret)
+ return ERR_PTR(-ret);
+ sp->stats.finished = 1;
+ ret = pthread_mutex_unlock(&sp->progress_mutex);
+ if (ret)
+ return ERR_PTR(-ret);
+
+ return NULL;
+}
+
+static void *progress_one_dev(void *ctx)
+{
+ struct scrub_progress *sp = ctx;
+
+ sp->ret = ioctl(sp->fd, BTRFS_IOC_SCRUB_PROGRESS, &sp->scrub_args);
+ sp->ioctl_errno = errno;
+
+ return NULL;
+}
+
+/* nb: returns a negative errno via ERR_PTR */
+static void *scrub_progress_cycle(void *ctx)
+{
+ int ret = 0;
+ int perr = 0; /* positive / pthread error returns */
+ int old;
+ int i;
+ char fsid[BTRFS_UUID_UNPARSED_SIZE];
+ struct scrub_progress *sp;
+ struct scrub_progress *sp_last;
+ struct scrub_progress *sp_shared;
+ struct timeval tv;
+ struct scrub_progress_cycle *spc = ctx;
+ int ndev = spc->fi->num_devices;
+ int this = 1;
+ int last = 0;
+ int peer_fd = -1;
+ struct pollfd accept_poll_fd = {
+ .fd = spc->prg_fd,
+ .events = POLLIN,
+ .revents = 0,
+ };
+ struct pollfd write_poll_fd = {
+ .events = POLLOUT,
+ .revents = 0,
+ };
+ struct sockaddr_un peer;
+ socklen_t peer_size = sizeof(peer);
+
+ perr = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ if (perr)
+ goto out;
+
+ uuid_unparse(spc->fi->fsid, fsid);
+
+ for (i = 0; i < ndev; ++i) {
+ sp = &spc->progress[i];
+ sp_last = &spc->progress[i + ndev];
+ sp_shared = &spc->shared_progress[i];
+ sp->scrub_args.devid = sp_last->scrub_args.devid =
+ sp_shared->scrub_args.devid;
+ sp->fd = sp_last->fd = spc->fdmnt;
+ sp->stats.t_start = sp_last->stats.t_start =
+ sp_shared->stats.t_start;
+ sp->resumed = sp_last->resumed = sp_shared->resumed;
+ sp->skip = sp_last->skip = sp_shared->skip;
+ sp->stats.finished = sp_last->stats.finished =
+ sp_shared->stats.finished;
+ }
+
+ while (1) {
+ ret = poll(&accept_poll_fd, 1, 5 * 1000);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+ if (ret)
+ peer_fd = accept(spc->prg_fd, (struct sockaddr *)&peer,
+ &peer_size);
+ gettimeofday(&tv, NULL);
+ this = (this + 1)%2;
+ last = (last + 1)%2;
+ for (i = 0; i < ndev; ++i) {
+ sp = &spc->progress[this * ndev + i];
+ sp_last = &spc->progress[last * ndev + i];
+ sp_shared = &spc->shared_progress[i];
+ if (sp->stats.finished)
+ continue;
+ progress_one_dev(sp);
+ sp->stats.duration = tv.tv_sec - sp->stats.t_start;
+ if (!sp->ret)
+ continue;
+ if (sp->ioctl_errno != ENOTCONN &&
+ sp->ioctl_errno != ENODEV) {
+ ret = -sp->ioctl_errno;
+ goto out;
+ }
+ /*
+ * scrub finished or device removed, check the
+ * finished flag. if unset, just use the last
+ * result we got for the current write and go
+ * on. flag should be set on next cycle, then.
+ */
+ perr = pthread_setcancelstate(
+ PTHREAD_CANCEL_DISABLE, &old);
+ if (perr)
+ goto out;
+ perr = pthread_mutex_lock(&sp_shared->progress_mutex);
+ if (perr)
+ goto out;
+ if (!sp_shared->stats.finished) {
+ perr = pthread_mutex_unlock(
+ &sp_shared->progress_mutex);
+ if (perr)
+ goto out;
+ perr = pthread_setcancelstate(
+ PTHREAD_CANCEL_ENABLE, &old);
+ if (perr)
+ goto out;
+ memcpy(sp, sp_last, sizeof(*sp));
+ continue;
+ }
+ perr = pthread_mutex_unlock(&sp_shared->progress_mutex);
+ if (perr)
+ goto out;
+ perr = pthread_setcancelstate(
+ PTHREAD_CANCEL_ENABLE, &old);
+ if (perr)
+ goto out;
+ memcpy(sp, sp_shared, sizeof(*sp));
+ memcpy(sp_last, sp_shared, sizeof(*sp));
+ }
+ if (peer_fd != -1) {
+ write_poll_fd.fd = peer_fd;
+ ret = poll(&write_poll_fd, 1, 0);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+ if (ret) {
+ ret = scrub_write_file(
+ peer_fd, fsid,
+ &spc->progress[this * ndev], ndev);
+ if (ret)
+ goto out;
+ }
+ close(peer_fd);
+ peer_fd = -1;
+ }
+ if (!spc->do_record)
+ continue;
+ ret = scrub_write_progress(spc->write_mutex, fsid,
+ &spc->progress[this * ndev], ndev);
+ if (ret)
+ goto out;
+ }
+out:
+ if (peer_fd != -1)
+ close(peer_fd);
+ if (perr)
+ ret = -perr;
+ return ERR_PTR(ret);
+}
+
+static struct scrub_file_record *last_dev_scrub(
+ struct scrub_file_record *const *const past_scrubs, u64 devid)
+{
+ int i;
+
+ if (!past_scrubs || IS_ERR(past_scrubs))
+ return NULL;
+
+ for (i = 0; past_scrubs[i]; ++i)
+ if (past_scrubs[i]->devid == devid)
+ return past_scrubs[i];
+
+ return NULL;
+}
+
+static int mkdir_p(char *path)
+{
+ int i;
+ int ret;
+
+ for (i = 1; i < strlen(path); ++i) {
+ if (path[i] != '/')
+ continue;
+ path[i] = '\0';
+ ret = mkdir(path, 0777);
+ if (ret && errno != EEXIST)
+ return -errno;
+ path[i] = '/';
+ }
+
+ return 0;
+}
+
+static int is_scrub_running_on_fs(struct btrfs_ioctl_fs_info_args *fi_args,
+ struct btrfs_ioctl_dev_info_args *di_args,
+ struct scrub_file_record **past_scrubs)
+{
+ int i;
+
+ if (!fi_args || !di_args || !past_scrubs)
+ return 0;
+
+ for (i = 0; i < fi_args->num_devices; i++) {
+ struct scrub_file_record *sfr =
+ last_dev_scrub(past_scrubs, di_args[i].devid);
+
+ if (!sfr)
+ continue;
+ if (!(sfr->stats.finished || sfr->stats.canceled))
+ return 1;
+ }
+ return 0;
+}
+
+static int is_scrub_running_in_kernel(int fd,
+ struct btrfs_ioctl_dev_info_args *di_args, u64 max_devices)
+{
+ struct scrub_progress sp;
+ int i;
+ int ret;
+
+ for (i = 0; i < max_devices; i++) {
+ memset(&sp, 0, sizeof(sp));
+ sp.scrub_args.devid = di_args[i].devid;
+ ret = ioctl(fd, BTRFS_IOC_SCRUB_PROGRESS, &sp.scrub_args);
+ if (!ret)
+ return 1;
+ }
+
+ return 0;
+}
+
+static const char * const cmd_scrub_start_usage[];
+static const char * const cmd_scrub_resume_usage[];
+
+static int scrub_start(int argc, char **argv, int resume)
+{
+ int fdmnt;
+ int prg_fd = -1;
+ int fdres = -1;
+ int ret;
+ pid_t pid;
+ int c;
+ int i;
+ int err = 0;
+ int e_uncorrectable = 0;
+ int e_correctable = 0;
+ int print_raw = 0;
+ char *path;
+ int do_background = 1;
+ int do_wait = 0;
+ int do_print = 0;
+ int do_quiet = 0;
+ int do_record = 1;
+ int readonly = 0;
+ int do_stats_per_dev = 0;
+ int ioprio_class = IOPRIO_CLASS_IDLE;
+ int ioprio_classdata = 0;
+ int n_start = 0;
+ int n_skip = 0;
+ int n_resume = 0;
+ struct btrfs_ioctl_fs_info_args fi_args;
+ struct btrfs_ioctl_dev_info_args *di_args = NULL;
+ struct scrub_progress *sp = NULL;
+ struct scrub_fs_stat fs_stat;
+ struct timeval tv;
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ };
+ pthread_t *t_devs = NULL;
+ pthread_t t_prog;
+ struct scrub_file_record **past_scrubs = NULL;
+ struct scrub_file_record *last_scrub = NULL;
+ char *datafile = strdup(SCRUB_DATA_FILE);
+ char fsid[BTRFS_UUID_UNPARSED_SIZE];
+ char sock_path[PATH_MAX] = "";
+ struct scrub_progress_cycle spc;
+ pthread_mutex_t spc_write_mutex = PTHREAD_MUTEX_INITIALIZER;
+ void *terr;
+ u64 devid;
+ DIR *dirstream = NULL;
+ int force = 0;
+ int nothing_to_resume = 0;
+
+ optind = 1;
+ while ((c = getopt(argc, argv, "BdqrRc:n:f")) != -1) {
+ switch (c) {
+ case 'B':
+ do_background = 0;
+ do_wait = 1;
+ do_print = 1;
+ break;
+ case 'd':
+ do_stats_per_dev = 1;
+ break;
+ case 'q':
+ do_quiet = 1;
+ break;
+ case 'r':
+ readonly = 1;
+ break;
+ case 'R':
+ print_raw = 1;
+ break;
+ case 'c':
+ ioprio_class = (int)strtol(optarg, NULL, 10);
+ break;
+ case 'n':
+ ioprio_classdata = (int)strtol(optarg, NULL, 10);
+ break;
+ case 'f':
+ force = 1;
+ break;
+ case '?':
+ default:
+ usage(resume ? cmd_scrub_resume_usage :
+ cmd_scrub_start_usage);
+ }
+ }
+
+ /* try to catch most error cases before forking */
+
+ if (check_argc_exact(argc - optind, 1)) {
+ usage(resume ? cmd_scrub_resume_usage :
+ cmd_scrub_start_usage);
+ }
+
+ spc.progress = NULL;
+ if (do_quiet && do_print)
+ do_print = 0;
+
+ if (mkdir_p(datafile)) {
+ warning_on(!do_quiet,
+ "cannot create scrub data file, mkdir %s failed: %s. Status recording disabled",
+ datafile, strerror(errno));
+ do_record = 0;
+ }
+ free(datafile);
+
+ path = argv[optind];
+
+ fdmnt = open_path_or_dev_mnt(path, &dirstream, !do_quiet);
+ if (fdmnt < 0)
+ return 1;
+
+ ret = get_fs_info(path, &fi_args, &di_args);
+ if (ret) {
+ error_on(!do_quiet,
+ "getting dev info for scrub failed: %s",
+ strerror(-ret));
+ err = 1;
+ goto out;
+ }
+ if (!fi_args.num_devices) {
+ error_on(!do_quiet, "no devices found");
+ err = 1;
+ goto out;
+ }
+
+ uuid_unparse(fi_args.fsid, fsid);
+ fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid);
+ if (fdres < 0 && fdres != -ENOENT) {
+ warning_on(!do_quiet, "failed to open status file: %s",
+ strerror(-fdres));
+ } else if (fdres >= 0) {
+ past_scrubs = scrub_read_file(fdres, !do_quiet);
+ if (IS_ERR(past_scrubs))
+ warning_on(!do_quiet, "failed to read status file: %s",
+ strerror(-PTR_ERR(past_scrubs)));
+ close(fdres);
+ }
+
+ /*
+ * Check for stale information in the status file, ie. if it's
+ * canceled=0, finished=0 but no scrub is running.
+ */
+ if (!is_scrub_running_in_kernel(fdmnt, di_args, fi_args.num_devices))
+ force = 1;
+
+ /*
+ * check whether any involved device is already busy running a
+ * scrub. This would cause damaged status messages and the state
+ * "aborted" without the explanation that a scrub was already
+ * running. Therefore check it first, prevent it and give some
+ * feedback to the user if scrub is already running.
+ * Note that if scrub is started with a block device as the
+ * parameter, only that particular block device is checked. It
+ * is a normal mode of operation to start scrub on multiple
+ * single devices, there is no reason to prevent this.
+ */
+ if (!force && is_scrub_running_on_fs(&fi_args, di_args, past_scrubs)) {
+ error_on(!do_quiet,
+ "Scrub is already running.\n"
+ "To cancel use 'btrfs scrub cancel %s'.\n"
+ "To see the status use 'btrfs scrub status [-d] %s'",
+ path, path);
+ err = 1;
+ goto out;
+ }
+
+ t_devs = malloc(fi_args.num_devices * sizeof(*t_devs));
+ sp = calloc(fi_args.num_devices, sizeof(*sp));
+ spc.progress = calloc(fi_args.num_devices * 2, sizeof(*spc.progress));
+
+ if (!t_devs || !sp || !spc.progress) {
+ error_on(!do_quiet, "scrub failed: %s", strerror(errno));
+ err = 1;
+ goto out;
+ }
+
+ for (i = 0; i < fi_args.num_devices; ++i) {
+ devid = di_args[i].devid;
+ ret = pthread_mutex_init(&sp[i].progress_mutex, NULL);
+ if (ret) {
+ error_on(!do_quiet, "pthread_mutex_init failed: %s",
+ strerror(ret));
+ err = 1;
+ goto out;
+ }
+ last_scrub = last_dev_scrub(past_scrubs, devid);
+ sp[i].scrub_args.devid = devid;
+ sp[i].fd = fdmnt;
+ if (resume && last_scrub && (last_scrub->stats.canceled ||
+ !last_scrub->stats.finished)) {
+ ++n_resume;
+ sp[i].scrub_args.start = last_scrub->p.last_physical;
+ sp[i].resumed = last_scrub;
+ } else if (resume) {
+ ++n_skip;
+ sp[i].skip = 1;
+ sp[i].resumed = last_scrub;
+ continue;
+ } else {
+ ++n_start;
+ sp[i].scrub_args.start = 0ll;
+ sp[i].resumed = NULL;
+ }
+ sp[i].skip = 0;
+ sp[i].scrub_args.end = (u64)-1ll;
+ sp[i].scrub_args.flags = readonly ? BTRFS_SCRUB_READONLY : 0;
+ sp[i].ioprio_class = ioprio_class;
+ sp[i].ioprio_classdata = ioprio_classdata;
+ }
+
+ if (!n_start && !n_resume) {
+ if (!do_quiet)
+ printf("scrub: nothing to resume for %s, fsid %s\n",
+ path, fsid);
+ nothing_to_resume = 1;
+ goto out;
+ }
+
+ ret = prg_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ while (ret != -1) {
+ ret = scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid, NULL,
+ sock_path, sizeof(sock_path));
+ /* ignore EOVERFLOW, try using a shorter path for the socket */
+ addr.sun_path[sizeof(addr.sun_path) - 1] = '\0';
+ strncpy(addr.sun_path, sock_path, sizeof(addr.sun_path) - 1);
+ ret = bind(prg_fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret != -1 || errno != EADDRINUSE)
+ break;
+ /*
+ * bind failed with EADDRINUSE. so let's see if anyone answers
+ * when we make a call to the socket ...
+ */
+ ret = connect(prg_fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (!ret || errno != ECONNREFUSED) {
+ /* ... yes, so scrub must be running. error out */
+ error("scrub already running");
+ close(prg_fd);
+ prg_fd = -1;
+ goto out;
+ }
+ /*
+ * ... no, this means someone left us alone with an unused
+ * socket in the file system. remove it and try again.
+ */
+ ret = unlink(sock_path);
+ }
+ if (ret != -1)
+ ret = listen(prg_fd, 100);
+ if (ret == -1) {
+ warning_on(!do_quiet,
+ "failed to open the progress status socket at %s: %s. Progress cannot be queried",
+ sock_path[0] ? sock_path :
+ SCRUB_PROGRESS_SOCKET_PATH, strerror(errno));
+ if (prg_fd != -1) {
+ close(prg_fd);
+ prg_fd = -1;
+ if (sock_path[0])
+ unlink(sock_path);
+ }
+ }
+
+ if (do_record) {
+ /* write all-zero progress file for a start */
+ ret = scrub_write_progress(&spc_write_mutex, fsid, sp,
+ fi_args.num_devices);
+ if (ret) {
+ warning_on(!do_quiet,
+ "failed to write the progress status file: %s. Status recording disabled",
+ strerror(-ret));
+ do_record = 0;
+ }
+ }
+
+ if (do_background) {
+ pid = fork();
+ if (pid == -1) {
+ error_on(!do_quiet, "cannot scrub, fork failed: %s",
+ strerror(errno));
+ err = 1;
+ goto out;
+ }
+
+ if (pid) {
+ int stat;
+ scrub_handle_sigint_parent();
+ if (!do_quiet)
+ printf("scrub %s on %s, fsid %s (pid=%d)\n",
+ n_start ? "started" : "resumed",
+ path, fsid, pid);
+ if (!do_wait) {
+ err = 0;
+ goto out;
+ }
+ ret = wait(&stat);
+ if (ret != pid) {
+ error_on(!do_quiet, "wait failed (ret=%d): %s",
+ ret, strerror(errno));
+ err = 1;
+ goto out;
+ }
+ if (!WIFEXITED(stat) || WEXITSTATUS(stat)) {
+ error_on(!do_quiet, "scrub process failed");
+ err = WIFEXITED(stat) ? WEXITSTATUS(stat) : -1;
+ goto out;
+ }
+ err = 0;
+ goto out;
+ }
+ }
+
+ scrub_handle_sigint_child(fdmnt);
+
+ for (i = 0; i < fi_args.num_devices; ++i) {
+ if (sp[i].skip) {
+ sp[i].scrub_args.progress = sp[i].resumed->p;
+ sp[i].stats = sp[i].resumed->stats;
+ sp[i].ret = 0;
+ sp[i].stats.finished = 1;
+ continue;
+ }
+ devid = di_args[i].devid;
+ gettimeofday(&tv, NULL);
+ sp[i].stats.t_start = tv.tv_sec;
+ ret = pthread_create(&t_devs[i], NULL,
+ scrub_one_dev, &sp[i]);
+ if (ret) {
+ if (do_print)
+ error("creating scrub_one_dev[%llu] thread failed: %s",
+ devid, strerror(ret));
+ err = 1;
+ goto out;
+ }
+ }
+
+ spc.fdmnt = fdmnt;
+ spc.prg_fd = prg_fd;
+ spc.do_record = do_record;
+ spc.write_mutex = &spc_write_mutex;
+ spc.shared_progress = sp;
+ spc.fi = &fi_args;
+ ret = pthread_create(&t_prog, NULL, scrub_progress_cycle, &spc);
+ if (ret) {
+ if (do_print)
+ error("creating progress thread failed: %s",
+ strerror(ret));
+ err = 1;
+ goto out;
+ }
+
+ err = 0;
+ for (i = 0; i < fi_args.num_devices; ++i) {
+ if (sp[i].skip)
+ continue;
+ devid = di_args[i].devid;
+ ret = pthread_join(t_devs[i], NULL);
+ if (ret) {
+ if (do_print)
+ error("pthread_join failed for scrub_one_dev[%llu]: %s",
+ devid, strerror(ret));
+ ++err;
+ continue;
+ }
+ if (sp[i].ret) {
+ switch (sp[i].ioctl_errno) {
+ case ENODEV:
+ if (do_print)
+ warning("device %lld not present",
+ devid);
+ continue;
+ case ECANCELED:
+ ++err;
+ break;
+ default:
+ if (do_print)
+ error("scrubbing %s failed for device id %lld: ret=%d, errno=%d (%s)",
+ path, devid,
+ sp[i].ret, sp[i].ioctl_errno,
+ strerror(sp[i].ioctl_errno));
+ ++err;
+ continue;
+ }
+ }
+ if (sp[i].scrub_args.progress.uncorrectable_errors > 0)
+ e_uncorrectable++;
+ if (sp[i].scrub_args.progress.corrected_errors > 0
+ || sp[i].scrub_args.progress.unverified_errors > 0)
+ e_correctable++;
+ }
+
+ if (do_print) {
+ const char *append = "done";
+ if (!do_stats_per_dev)
+ init_fs_stat(&fs_stat);
+ for (i = 0; i < fi_args.num_devices; ++i) {
+ if (do_stats_per_dev) {
+ print_scrub_dev(&di_args[i],
+ &sp[i].scrub_args.progress,
+ print_raw,
+ sp[i].ret ? "canceled" : "done",
+ &sp[i].stats);
+ } else {
+ if (sp[i].ret)
+ append = "canceled";
+ add_to_fs_stat(&sp[i].scrub_args.progress,
+ &sp[i].stats, &fs_stat);
+ }
+ }
+ if (!do_stats_per_dev) {
+ printf("scrub %s for %s\n", append, fsid);
+ print_fs_stat(&fs_stat, print_raw);
+ }
+ }
+
+ ret = pthread_cancel(t_prog);
+ if (!ret)
+ ret = pthread_join(t_prog, &terr);
+
+ /* check for errors from the handling of the progress thread */
+ if (do_print && ret) {
+ error("progress thread handling failed: %s",
+ strerror(ret));
+ }
+
+ /* check for errors returned from the progress thread itself */
+ if (do_print && terr && terr != PTHREAD_CANCELED)
+ error("recording progress failed: %s",
+ strerror(-PTR_ERR(terr)));
+
+ if (do_record) {
+ ret = scrub_write_progress(&spc_write_mutex, fsid, sp,
+ fi_args.num_devices);
+ if (ret && do_print)
+ error("failed to record the result: %s",
+ strerror(-ret));
+ }
+
+ scrub_handle_sigint_child(-1);
+
+out:
+ free_history(past_scrubs);
+ free(di_args);
+ free(t_devs);
+ free(sp);
+ free(spc.progress);
+ if (prg_fd > -1) {
+ close(prg_fd);
+ if (sock_path[0])
+ unlink(sock_path);
+ }
+ close_file_or_dir(fdmnt, dirstream);
+
+ if (err)
+ return 1;
+ if (nothing_to_resume)
+ return 2;
+ if (e_uncorrectable) {
+ error_on(!do_quiet, "there are uncorrectable errors");
+ return 3;
+ }
+ if (e_correctable)
+ warning_on(!do_quiet,
+ "errors detected during scrubbing, corrected");
+
+ return 0;
+}
+
+static const char * const cmd_scrub_start_usage[] = {
+ "btrfs scrub start [-BdqrRf] [-c ioprio_class -n ioprio_classdata] <path>|<device>",
+ "Start a new scrub. If a scrub is already running, the new one fails.",
+ "",
+ "-B do not background",
+ "-d stats per device (-B only)",
+ "-q be quiet",
+ "-r read only mode",
+ "-R raw print mode, print full data instead of summary",
+ "-c set ioprio class (see ionice(1) manpage)",
+ "-n set ioprio classdata (see ionice(1) manpage)",
+ "-f force starting new scrub even if a scrub is already running",
+ " this is useful when scrub stats record file is damaged",
+ NULL
+};
+
+static int cmd_scrub_start(int argc, char **argv)
+{
+ return scrub_start(argc, argv, 0);
+}
+
+static const char * const cmd_scrub_cancel_usage[] = {
+ "btrfs scrub cancel <path>|<device>",
+ "Cancel a running scrub",
+ NULL
+};
+
+static int cmd_scrub_cancel(int argc, char **argv)
+{
+ char *path;
+ int ret;
+ int fdmnt = -1;
+ DIR *dirstream = NULL;
+
+ if (check_argc_exact(argc, 2))
+ usage(cmd_scrub_cancel_usage);
+
+ path = argv[1];
+
+ fdmnt = open_path_or_dev_mnt(path, &dirstream, 1);
+ if (fdmnt < 0) {
+ ret = 1;
+ goto out;
+ }
+
+ ret = ioctl(fdmnt, BTRFS_IOC_SCRUB_CANCEL, NULL);
+
+ if (ret < 0) {
+ error("scrub cancel failed on %s: %s", path,
+ errno == ENOTCONN ? "not running" : strerror(errno));
+ if (errno == ENOTCONN)
+ ret = 2;
+ else
+ ret = 1;
+ goto out;
+ }
+
+ ret = 0;
+ printf("scrub cancelled\n");
+
+out:
+ close_file_or_dir(fdmnt, dirstream);
+ return ret;
+}
+
+static const char * const cmd_scrub_resume_usage[] = {
+ "btrfs scrub resume [-BdqrR] [-c ioprio_class -n ioprio_classdata] <path>|<device>",
+ "Resume previously canceled or interrupted scrub",
+ "",
+ "-B do not background",
+ "-d stats per device (-B only)",
+ "-q be quiet",
+ "-r read only mode",
+ "-R raw print mode, print full data instead of summary",
+ "-c set ioprio class (see ionice(1) manpage)",
+ "-n set ioprio classdata (see ionice(1) manpage)",
+ NULL
+};
+
+static int cmd_scrub_resume(int argc, char **argv)
+{
+ return scrub_start(argc, argv, 1);
+}
+
+static const char * const cmd_scrub_status_usage[] = {
+ "btrfs scrub status [-dR] <path>|<device>",
+ "Show status of running or finished scrub",
+ "",
+ "-d stats per device",
+ "-R print raw stats",
+ NULL
+};
+
+static int cmd_scrub_status(int argc, char **argv)
+{
+ char *path;
+ struct btrfs_ioctl_fs_info_args fi_args;
+ struct btrfs_ioctl_dev_info_args *di_args = NULL;
+ struct scrub_file_record **past_scrubs = NULL;
+ struct scrub_file_record *last_scrub;
+ struct scrub_fs_stat fs_stat;
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ };
+ int in_progress;
+ int ret;
+ int i;
+ int fdmnt;
+ int print_raw = 0;
+ int do_stats_per_dev = 0;
+ int c;
+ char fsid[BTRFS_UUID_UNPARSED_SIZE];
+ int fdres = -1;
+ int err = 0;
+ DIR *dirstream = NULL;
+
+ optind = 1;
+ while ((c = getopt(argc, argv, "dR")) != -1) {
+ switch (c) {
+ case 'd':
+ do_stats_per_dev = 1;
+ break;
+ case 'R':
+ print_raw = 1;
+ break;
+ case '?':
+ default:
+ usage(cmd_scrub_status_usage);
+ }
+ }
+
+ if (check_argc_exact(argc - optind, 1))
+ usage(cmd_scrub_status_usage);
+
+ path = argv[optind];
+
+ fdmnt = open_path_or_dev_mnt(path, &dirstream, 1);
+ if (fdmnt < 0)
+ return 1;
+
+ ret = get_fs_info(path, &fi_args, &di_args);
+ if (ret) {
+ error("getting dev info for scrub failed: %s",
+ strerror(-ret));
+ err = 1;
+ goto out;
+ }
+ if (!fi_args.num_devices) {
+ error("no devices found");
+ err = 1;
+ goto out;
+ }
+
+ uuid_unparse(fi_args.fsid, fsid);
+
+ fdres = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fdres == -1) {
+ error("failed to create socket to receive progress information: %s",
+ strerror(errno));
+ err = 1;
+ goto out;
+ }
+ scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid,
+ NULL, addr.sun_path, sizeof(addr.sun_path));
+ /* ignore EOVERFLOW, just use shorter name and hope for the best */
+ addr.sun_path[sizeof(addr.sun_path) - 1] = '\0';
+ ret = connect(fdres, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret == -1) {
+ close(fdres);
+ fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid);
+ if (fdres < 0 && fdres != -ENOENT) {
+ warning("failed to open status file: %s",
+ strerror(-fdres));
+ err = 1;
+ goto out;
+ }
+ }
+
+ if (fdres >= 0) {
+ past_scrubs = scrub_read_file(fdres, 1);
+ if (IS_ERR(past_scrubs))
+ warning("failed to read status: %s",
+ strerror(-PTR_ERR(past_scrubs)));
+ }
+ in_progress = is_scrub_running_in_kernel(fdmnt, di_args, fi_args.num_devices);
+
+ printf("scrub status for %s\n", fsid);
+
+ if (do_stats_per_dev) {
+ for (i = 0; i < fi_args.num_devices; ++i) {
+ last_scrub = last_dev_scrub(past_scrubs,
+ di_args[i].devid);
+ if (!last_scrub) {
+ print_scrub_dev(&di_args[i], NULL, print_raw,
+ NULL, NULL);
+ continue;
+ }
+ last_scrub->stats.in_progress = in_progress;
+ print_scrub_dev(&di_args[i], &last_scrub->p, print_raw,
+ last_scrub->stats.finished ?
+ "history" : "status",
+ &last_scrub->stats);
+ }
+ } else {
+ init_fs_stat(&fs_stat);
+ fs_stat.s.in_progress = in_progress;
+ for (i = 0; i < fi_args.num_devices; ++i) {
+ last_scrub = last_dev_scrub(past_scrubs,
+ di_args[i].devid);
+ if (!last_scrub)
+ continue;
+ add_to_fs_stat(&last_scrub->p, &last_scrub->stats,
+ &fs_stat);
+ }
+ print_fs_stat(&fs_stat, print_raw);
+ }
+
+out:
+ free_history(past_scrubs);
+ free(di_args);
+ if (fdres > -1)
+ close(fdres);
+ close_file_or_dir(fdmnt, dirstream);
+
+ return !!err;
+}
+
+static const char scrub_cmd_group_info[] =
+"verify checksums of data and metadata";
+
+const struct cmd_group scrub_cmd_group = {
+ scrub_cmd_group_usage, scrub_cmd_group_info, {
+ { "start", cmd_scrub_start, cmd_scrub_start_usage, NULL, 0 },
+ { "cancel", cmd_scrub_cancel, cmd_scrub_cancel_usage, NULL, 0 },
+ { "resume", cmd_scrub_resume, cmd_scrub_resume_usage, NULL, 0 },
+ { "status", cmd_scrub_status, cmd_scrub_status_usage, NULL, 0 },
+ NULL_CMD_STRUCT
+ }
+};
+
+int cmd_scrub(int argc, char **argv)
+{
+ return handle_command_group(&scrub_cmd_group, argc, argv);
+}
diff --git a/cmds-send.c b/cmds-send.c
new file mode 100644
index 00000000..3e34d75b
--- /dev/null
+++ b/cmds-send.c
@@ -0,0 +1,741 @@
+/*
+ * Copyright (C) 2012 Alexander Block. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+
+#include "kerncompat.h"
+
+#include <unistd.h>
+#include <stdint.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <math.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <libgen.h>
+#include <mntent.h>
+#include <assert.h>
+#include <getopt.h>
+#include <uuid/uuid.h>
+#include <limits.h>
+
+#include "ctree.h"
+#include "ioctl.h"
+#include "commands.h"
+#include "list.h"
+#include "utils.h"
+
+#include "send.h"
+#include "send-utils.h"
+
+static int g_verbose = 0;
+
+struct btrfs_send {
+ int send_fd;
+ int dump_fd;
+ int mnt_fd;
+
+ u64 *clone_sources;
+ u64 clone_sources_count;
+
+ char *root_path;
+ struct subvol_uuid_search sus;
+};
+
+static int get_root_id(struct btrfs_send *s, const char *path, u64 *root_id)
+{
+ struct subvol_info *si;
+
+ si = subvol_uuid_search(&s->sus, 0, NULL, 0, path,
+ subvol_search_by_path);
+ if (!si)
+ return -ENOENT;
+ *root_id = si->root_id;
+ free(si->path);
+ free(si);
+ return 0;
+}
+
+static struct subvol_info *get_parent(struct btrfs_send *s, u64 root_id)
+{
+ struct subvol_info *si_tmp;
+ struct subvol_info *si;
+
+ si_tmp = subvol_uuid_search(&s->sus, root_id, NULL, 0, NULL,
+ subvol_search_by_root_id);
+ if (!si_tmp)
+ return NULL;
+
+ si = subvol_uuid_search(&s->sus, 0, si_tmp->parent_uuid, 0, NULL,
+ subvol_search_by_uuid);
+ free(si_tmp->path);
+ free(si_tmp);
+ return si;
+}
+
+static int find_good_parent(struct btrfs_send *s, u64 root_id, u64 *found)
+{
+ int ret;
+ struct subvol_info *parent = NULL;
+ struct subvol_info *parent2 = NULL;
+ struct subvol_info *best_parent = NULL;
+ __s64 tmp;
+ u64 best_diff = (u64)-1;
+ int i;
+
+ parent = get_parent(s, root_id);
+ if (!parent) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ for (i = 0; i < s->clone_sources_count; i++) {
+ if (s->clone_sources[i] == parent->root_id) {
+ best_parent = parent;
+ parent = NULL;
+ goto out_found;
+ }
+ }
+
+ for (i = 0; i < s->clone_sources_count; i++) {
+ parent2 = get_parent(s, s->clone_sources[i]);
+ if (!parent2)
+ continue;
+ if (parent2->root_id != parent->root_id) {
+ free(parent2->path);
+ free(parent2);
+ parent2 = NULL;
+ continue;
+ }
+
+ free(parent2->path);
+ free(parent2);
+ parent2 = subvol_uuid_search(&s->sus, s->clone_sources[i], NULL,
+ 0, NULL, subvol_search_by_root_id);
+
+ if (!parent2) {
+ ret = -ENOENT;
+ goto out;
+ }
+ tmp = parent2->ctransid - parent->ctransid;
+ if (tmp < 0)
+ tmp *= -1;
+ if (tmp < best_diff) {
+ if (best_parent) {
+ free(best_parent->path);
+ free(best_parent);
+ }
+ best_parent = parent2;
+ parent2 = NULL;
+ best_diff = tmp;
+ } else {
+ free(parent2->path);
+ free(parent2);
+ parent2 = NULL;
+ }
+ }
+
+ if (!best_parent) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+out_found:
+ *found = best_parent->root_id;
+ ret = 0;
+
+out:
+ if (parent) {
+ free(parent->path);
+ free(parent);
+ }
+ if (best_parent) {
+ free(best_parent->path);
+ free(best_parent);
+ }
+ return ret;
+}
+
+static int add_clone_source(struct btrfs_send *s, u64 root_id)
+{
+ void *tmp;
+
+ tmp = s->clone_sources;
+ s->clone_sources = realloc(s->clone_sources,
+ sizeof(*s->clone_sources) * (s->clone_sources_count + 1));
+
+ if (!s->clone_sources) {
+ free(tmp);
+ return -ENOMEM;
+ }
+ s->clone_sources[s->clone_sources_count++] = root_id;
+
+ return 0;
+}
+
+static int write_buf(int fd, const void *buf, int size)
+{
+ int ret;
+ int pos = 0;
+
+ while (pos < size) {
+ ret = write(fd, (char*)buf + pos, size - pos);
+ if (ret < 0) {
+ ret = -errno;
+ error("failed to dump stream: %s", strerror(-ret));
+ goto out;
+ }
+ if (!ret) {
+ ret = -EIO;
+ error("failed to dump stream: %s", strerror(-ret));
+ goto out;
+ }
+ pos += ret;
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static void *dump_thread(void *arg_)
+{
+ int ret;
+ struct btrfs_send *s = (struct btrfs_send*)arg_;
+ char buf[4096];
+ int readed;
+
+ while (1) {
+ readed = read(s->send_fd, buf, sizeof(buf));
+ if (readed < 0) {
+ ret = -errno;
+ error("failed to read stream from kernel: %s\n",
+ strerror(-ret));
+ goto out;
+ }
+ if (!readed) {
+ ret = 0;
+ goto out;
+ }
+ ret = write_buf(s->dump_fd, buf, readed);
+ if (ret < 0)
+ goto out;
+ }
+
+out:
+ if (ret < 0) {
+ exit(-ret);
+ }
+
+ return ERR_PTR(ret);
+}
+
+static int do_send(struct btrfs_send *send, u64 parent_root_id,
+ int is_first_subvol, int is_last_subvol, char *subvol,
+ u64 flags)
+{
+ int ret;
+ pthread_t t_read;
+ struct btrfs_ioctl_send_args io_send;
+ void *t_err = NULL;
+ int subvol_fd = -1;
+ int pipefd[2] = {-1, -1};
+
+ subvol_fd = openat(send->mnt_fd, subvol, O_RDONLY | O_NOATIME);
+ if (subvol_fd < 0) {
+ ret = -errno;
+ error("cannot open %s: %s", subvol, strerror(-ret));
+ goto out;
+ }
+
+ ret = pipe(pipefd);
+ if (ret < 0) {
+ ret = -errno;
+ error("pipe failed: %s", strerror(-ret));
+ goto out;
+ }
+
+ memset(&io_send, 0, sizeof(io_send));
+ io_send.send_fd = pipefd[1];
+ send->send_fd = pipefd[0];
+
+ if (!ret)
+ ret = pthread_create(&t_read, NULL, dump_thread,
+ send);
+ if (ret) {
+ ret = -ret;
+ error("thread setup failed: %s", strerror(-ret));
+ goto out;
+ }
+
+ io_send.flags = flags;
+ io_send.clone_sources = (__u64*)send->clone_sources;
+ io_send.clone_sources_count = send->clone_sources_count;
+ io_send.parent_root = parent_root_id;
+ if (!is_first_subvol)
+ io_send.flags |= BTRFS_SEND_FLAG_OMIT_STREAM_HEADER;
+ if (!is_last_subvol)
+ io_send.flags |= BTRFS_SEND_FLAG_OMIT_END_CMD;
+ ret = ioctl(subvol_fd, BTRFS_IOC_SEND, &io_send);
+ if (ret < 0) {
+ ret = -errno;
+ error("send ioctl failed with %d: %s", ret, strerror(-ret));
+ if (ret == -EINVAL && (!is_first_subvol || !is_last_subvol))
+ fprintf(stderr,
+ "Try upgrading your kernel or don't use -e.\n");
+ goto out;
+ }
+ if (g_verbose > 0)
+ fprintf(stderr, "BTRFS_IOC_SEND returned %d\n", ret);
+
+ if (g_verbose > 0)
+ fprintf(stderr, "joining genl thread\n");
+
+ close(pipefd[1]);
+ pipefd[1] = -1;
+
+ ret = pthread_join(t_read, &t_err);
+ if (ret) {
+ ret = -ret;
+ error("pthread_join failed: %s", strerror(-ret));
+ goto out;
+ }
+ if (t_err) {
+ ret = (long int)t_err;
+ error("failed to process send stream, ret=%ld (%s)",
+ (long int)t_err, strerror(-ret));
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (subvol_fd != -1)
+ close(subvol_fd);
+ if (pipefd[0] != -1)
+ close(pipefd[0]);
+ if (pipefd[1] != -1)
+ close(pipefd[1]);
+ return ret;
+}
+
+char *get_subvol_name(char *mnt, char *full_path)
+{
+ int len = strlen(mnt);
+ if (!len)
+ return full_path;
+ if (mnt[len - 1] != '/')
+ len += 1;
+
+ return full_path + len;
+}
+
+static int init_root_path(struct btrfs_send *s, const char *subvol)
+{
+ int ret = 0;
+
+ if (s->root_path)
+ goto out;
+
+ ret = find_mount_root(subvol, &s->root_path);
+ if (ret < 0) {
+ error("failed to determine mount point for %s: %s",
+ subvol, strerror(-ret));
+ ret = -EINVAL;
+ goto out;
+ }
+ if (ret > 0) {
+ error("%s doesn't belong to btrfs mount point", subvol);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ s->mnt_fd = open(s->root_path, O_RDONLY | O_NOATIME);
+ if (s->mnt_fd < 0) {
+ ret = -errno;
+ error("cannot open '%s': %s", s->root_path, strerror(-ret));
+ goto out;
+ }
+
+ ret = subvol_uuid_search_init(s->mnt_fd, &s->sus);
+ if (ret < 0) {
+ error("failed to initialize subvol search: %s",
+ strerror(-ret));
+ goto out;
+ }
+
+out:
+ return ret;
+
+}
+
+static int is_subvol_ro(struct btrfs_send *s, char *subvol)
+{
+ int ret;
+ u64 flags;
+ int fd = -1;
+
+ fd = openat(s->mnt_fd, subvol, O_RDONLY | O_NOATIME);
+ if (fd < 0) {
+ ret = -errno;
+ error("cannot open %s: %s", subvol, strerror(-ret));
+ goto out;
+ }
+
+ ret = ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags);
+ if (ret < 0) {
+ ret = -errno;
+ error("failed to get flags for subvolume %s: %s",
+ subvol, strerror(-ret));
+ goto out;
+ }
+
+ if (flags & BTRFS_SUBVOL_RDONLY)
+ ret = 1;
+ else
+ ret = 0;
+
+out:
+ if (fd != -1)
+ close(fd);
+
+ return ret;
+}
+
+int cmd_send(int argc, char **argv)
+{
+ char *subvol = NULL;
+ int ret;
+ char outname[PATH_MAX];
+ struct btrfs_send send;
+ u32 i;
+ char *mount_root = NULL;
+ char *snapshot_parent = NULL;
+ u64 root_id = 0;
+ u64 parent_root_id = 0;
+ int full_send = 1;
+ int new_end_cmd_semantic = 0;
+ u64 send_flags = 0;
+
+ memset(&send, 0, sizeof(send));
+ send.dump_fd = fileno(stdout);
+ outname[0] = 0;
+
+ while (1) {
+ enum { GETOPT_VAL_SEND_NO_DATA = 256 };
+ static const struct option long_options[] = {
+ { "no-data", no_argument, NULL, GETOPT_VAL_SEND_NO_DATA }
+ };
+ int c = getopt_long(argc, argv, "vec:f:i:p:", long_options, NULL);
+
+ if (c < 0)
+ break;
+
+ switch (c) {
+ case 'v':
+ g_verbose++;
+ break;
+ case 'e':
+ new_end_cmd_semantic = 1;
+ break;
+ case 'c':
+ subvol = realpath(optarg, NULL);
+ if (!subvol) {
+ ret = -errno;
+ error("realpath %s failed: %s\n", optarg, strerror(-ret));
+ goto out;
+ }
+
+ ret = init_root_path(&send, subvol);
+ if (ret < 0)
+ goto out;
+
+ ret = get_root_id(&send, get_subvol_name(send.root_path, subvol),
+ &root_id);
+ if (ret < 0) {
+ error("cannot resolve rootid for %s", subvol);
+ goto out;
+ }
+
+ ret = is_subvol_ro(&send, subvol);
+ if (ret < 0)
+ goto out;
+ if (!ret) {
+ ret = -EINVAL;
+ error("cloned subvolume %s is not read-only", subvol);
+ goto out;
+ }
+
+ ret = add_clone_source(&send, root_id);
+ if (ret < 0) {
+ error("not enough memory");
+ goto out;
+ }
+ subvol_uuid_search_finit(&send.sus);
+ free(subvol);
+ subvol = NULL;
+ if (send.mnt_fd >= 0) {
+ close(send.mnt_fd);
+ send.mnt_fd = -1;
+ }
+ free(send.root_path);
+ send.root_path = NULL;
+ full_send = 0;
+ break;
+ case 'f':
+ if (arg_copy_path(outname, optarg, sizeof(outname))) {
+ error("output file path too long (%zu)", strlen(optarg));
+ ret = 1;
+ goto out;
+ }
+ break;
+ case 'p':
+ if (snapshot_parent) {
+ error("you cannot have more than one parent (-p)");
+ ret = 1;
+ goto out;
+ }
+ snapshot_parent = realpath(optarg, NULL);
+ if (!snapshot_parent) {
+ ret = -errno;
+ error("realpath %s failed: %s", optarg, strerror(-ret));
+ goto out;
+ }
+
+ ret = is_subvol_ro(&send, snapshot_parent);
+ if (ret < 0)
+ goto out;
+ if (!ret) {
+ ret = -EINVAL;
+ error("parent subvolume %s is not read-only",
+ snapshot_parent);
+ goto out;
+ }
+
+ full_send = 0;
+ break;
+ case 'i':
+ error("option -i was removed, use -c instead");
+ ret = 1;
+ goto out;
+ case GETOPT_VAL_SEND_NO_DATA:
+ send_flags |= BTRFS_SEND_FLAG_NO_FILE_DATA;
+ break;
+ case '?':
+ default:
+ error("send arguments invalid");
+ ret = 1;
+ goto out;
+ }
+ }
+
+ if (check_argc_min(argc - optind, 1))
+ usage(cmd_send_usage);
+
+ if (outname[0]) {
+ send.dump_fd = creat(outname, 0600);
+ if (send.dump_fd == -1) {
+ ret = -errno;
+ error("cannot create '%s': %s", outname, strerror(-ret));
+ goto out;
+ }
+ }
+
+ if (isatty(send.dump_fd)) {
+ error(
+ "not dumping send stream into a terminal, redirect it into a file");
+ ret = 1;
+ goto out;
+ }
+
+ /* use first send subvol to determine mount_root */
+ subvol = argv[optind];
+
+ subvol = realpath(argv[optind], NULL);
+ if (!subvol) {
+ ret = -errno;
+ error("unable to resolve %s", argv[optind]);
+ goto out;
+ }
+
+ ret = init_root_path(&send, subvol);
+ if (ret < 0)
+ goto out;
+
+ if (snapshot_parent != NULL) {
+ ret = get_root_id(&send,
+ get_subvol_name(send.root_path, snapshot_parent),
+ &parent_root_id);
+ if (ret < 0) {
+ error("could not resolve rootid for %s", snapshot_parent);
+ goto out;
+ }
+
+ ret = add_clone_source(&send, parent_root_id);
+ if (ret < 0) {
+ error("not enough memory");
+ goto out;
+ }
+ }
+
+ for (i = optind; i < argc; i++) {
+ free(subvol);
+ subvol = realpath(argv[i], NULL);
+ if (!subvol) {
+ ret = -errno;
+ error("unable to resolve %s", argv[i]);
+ goto out;
+ }
+
+ ret = find_mount_root(subvol, &mount_root);
+ if (ret < 0) {
+ error("find_mount_root failed on %s: %s", subvol,
+ strerror(-ret));
+ goto out;
+ }
+ if (ret > 0) {
+ error("%s does not belong to btrfs mount point",
+ subvol);
+ ret = -EINVAL;
+ goto out;
+ }
+ if (strcmp(send.root_path, mount_root) != 0) {
+ ret = -EINVAL;
+ error("all subvolumes must be from the same filesystem");
+ goto out;
+ }
+ free(mount_root);
+
+ ret = is_subvol_ro(&send, subvol);
+ if (ret < 0)
+ goto out;
+ if (!ret) {
+ ret = -EINVAL;
+ error("subvolum %s is not read-only", subvol);
+ goto out;
+ }
+ }
+
+ if (send_flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
+ printf("Mode NO_FILE_DATA enabled\n");
+
+ for (i = optind; i < argc; i++) {
+ int is_first_subvol;
+ int is_last_subvol;
+
+ free(subvol);
+ subvol = argv[i];
+
+ fprintf(stderr, "At subvol %s\n", subvol);
+
+ subvol = realpath(subvol, NULL);
+ if (!subvol) {
+ ret = -errno;
+ error("realpath %s failed: %s", argv[i], strerror(-ret));
+ goto out;
+ }
+
+ if (!full_send && !parent_root_id) {
+ ret = find_good_parent(&send, root_id, &parent_root_id);
+ if (ret < 0) {
+ error("parent determination failed for %lld",
+ root_id);
+ goto out;
+ }
+ }
+
+ ret = is_subvol_ro(&send, subvol);
+ if (ret < 0)
+ goto out;
+ if (!ret) {
+ ret = -EINVAL;
+ error("subvolume %s is not read-only", subvol);
+ goto out;
+ }
+
+ if (new_end_cmd_semantic) {
+ /* require new kernel */
+ is_first_subvol = (i == optind);
+ is_last_subvol = (i == argc - 1);
+ } else {
+ /* be compatible to old and new kernel */
+ is_first_subvol = 1;
+ is_last_subvol = 1;
+ }
+ ret = do_send(&send, parent_root_id, is_first_subvol,
+ is_last_subvol, subvol, send_flags);
+ if (ret < 0)
+ goto out;
+
+ /* done with this subvol, so add it to the clone sources */
+ ret = add_clone_source(&send, root_id);
+ if (ret < 0) {
+ error("not enough memory");
+ goto out;
+ }
+
+ parent_root_id = 0;
+ full_send = 0;
+ }
+
+ ret = 0;
+
+out:
+ free(subvol);
+ free(snapshot_parent);
+ free(send.clone_sources);
+ if (send.mnt_fd >= 0)
+ close(send.mnt_fd);
+ free(send.root_path);
+ subvol_uuid_search_finit(&send.sus);
+ return !!ret;
+}
+
+const char * const cmd_send_usage[] = {
+ "btrfs send [-ve] [-p <parent>] [-c <clone-src>] [-f <outfile>] <subvol> [<subvol>...]",
+ "Send the subvolume(s) to stdout.",
+ "Sends the subvolume(s) specified by <subvol> to stdout.",
+ "<subvol> should be read-only here.",
+ "By default, this will send the whole subvolume. To do an incremental",
+ "send, use '-p <parent>'. If you want to allow btrfs to clone from",
+ "any additional local snapshots, use '-c <clone-src>' (multiple times",
+ "where applicable). You must not specify clone sources unless you",
+ "guarantee that these snapshots are exactly in the same state on both",
+ "sides, the sender and the receiver. It is allowed to omit the",
+ "'-p <parent>' option when '-c <clone-src>' options are given, in",
+ "which case 'btrfs send' will determine a suitable parent among the",
+ "clone sources itself.",
+ "\n",
+ "-v Enable verbose debug output. Each occurrence of",
+ " this option increases the verbose level more.",
+ "-e If sending multiple subvols at once, use the new",
+ " format and omit the end-cmd between the subvols.",
+ "-p <parent> Send an incremental stream from <parent> to",
+ " <subvol>.",
+ "-c <clone-src> Use this snapshot as a clone source for an ",
+ " incremental send (multiple allowed)",
+ "-f <outfile> Output is normally written to stdout. To write to",
+ " a file, use this option. An alternative would be to",
+ " use pipes.",
+ "--no-data send in NO_FILE_DATA mode, Note: the output stream",
+ " does not contain any file data and thus cannot be used",
+ " to transfer changes. This mode is faster and useful to",
+ " show the differences in metadata.",
+ NULL
+};
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
new file mode 100644
index 00000000..9d9b0af7
--- /dev/null
+++ b/cmds-subvolume.c
@@ -0,0 +1,1347 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/vfs.h>
+#include <libgen.h>
+#include <limits.h>
+#include <getopt.h>
+#include <uuid/uuid.h>
+#include <linux/magic.h>
+
+#include "kerncompat.h"
+#include "ioctl.h"
+#include "qgroup.h"
+
+#include "ctree.h"
+#include "commands.h"
+#include "utils.h"
+#include "btrfs-list.h"
+#include "utils.h"
+
+static int is_subvolume_cleaned(int fd, u64 subvolid)
+{
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+
+ sk->tree_id = BTRFS_ROOT_TREE_OBJECTID;
+ sk->min_objectid = subvolid;
+ sk->max_objectid = subvolid;
+ sk->min_type = BTRFS_ROOT_ITEM_KEY;
+ sk->max_type = BTRFS_ROOT_ITEM_KEY;
+ sk->min_offset = 0;
+ sk->max_offset = (u64)-1;
+ sk->min_transid = 0;
+ sk->max_transid = (u64)-1;
+ sk->nr_items = 1;
+
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0)
+ return -errno;
+
+ if (sk->nr_items == 0)
+ return 1;
+
+ return 0;
+}
+
+static int wait_for_subvolume_cleaning(int fd, int count, u64 *ids,
+ int sleep_interval)
+{
+ int ret;
+ int i;
+
+ while (1) {
+ int clean = 1;
+
+ for (i = 0; i < count; i++) {
+ if (!ids[i])
+ continue;
+ ret = is_subvolume_cleaned(fd, ids[i]);
+ if (ret < 0) {
+ error(
+ "cannot read status of dead subvolume %llu: %s",
+ (unsigned long long)ids[i], strerror(-ret));
+ return ret;
+ }
+ if (ret) {
+ printf("Subvolume id %llu is gone\n", ids[i]);
+ ids[i] = 0;
+ } else {
+ clean = 0;
+ }
+ }
+ if (clean)
+ break;
+ sleep(sleep_interval);
+ }
+
+ return 0;
+}
+
+static const char * const subvolume_cmd_group_usage[] = {
+ "btrfs subvolume <command> <args>",
+ NULL
+};
+
+static const char * const cmd_subvol_create_usage[] = {
+ "btrfs subvolume create [-i <qgroupid>] [<dest>/]<name>",
+ "Create a subvolume",
+ "Create a subvolume <name> in <dest>. If <dest> is not given",
+ "subvolume <name> will be created in the current directory.",
+ "",
+ "-i <qgroupid> add the newly created subvolume to a qgroup. This",
+ " option can be given multiple times.",
+ NULL
+};
+
+static int cmd_subvol_create(int argc, char **argv)
+{
+ int retval, res, len;
+ int fddst = -1;
+ char *dupname = NULL;
+ char *dupdir = NULL;
+ char *newname;
+ char *dstdir;
+ char *dst;
+ struct btrfs_qgroup_inherit *inherit = NULL;
+ DIR *dirstream = NULL;
+
+ optind = 1;
+ while (1) {
+ int c = getopt(argc, argv, "c:i:v");
+ if (c < 0)
+ break;
+
+ switch (c) {
+ case 'c':
+ res = qgroup_inherit_add_copy(&inherit, optarg, 0);
+ if (res) {
+ retval = res;
+ goto out;
+ }
+ break;
+ case 'i':
+ res = qgroup_inherit_add_group(&inherit, optarg);
+ if (res) {
+ retval = res;
+ goto out;
+ }
+ break;
+ default:
+ usage(cmd_subvol_create_usage);
+ }
+ }
+
+ if (check_argc_exact(argc - optind, 1))
+ usage(cmd_subvol_create_usage);
+
+ dst = argv[optind];
+
+ retval = 1; /* failure */
+ res = test_isdir(dst);
+ if (res < 0 && res != -ENOENT) {
+ error("cannot access %s: %s", dst, strerror(-res));
+ goto out;
+ }
+ if (res >= 0) {
+ error("target path already exists: %s", dst);
+ goto out;
+ }
+
+ dupname = strdup(dst);
+ newname = basename(dupname);
+ dupdir = strdup(dst);
+ dstdir = dirname(dupdir);
+
+ if (!test_issubvolname(newname)) {
+ error("invalid subvolume name: %s", newname);
+ goto out;
+ }
+
+ len = strlen(newname);
+ if (len == 0 || len >= BTRFS_VOL_NAME_MAX) {
+ error("subvolume name too long: %s", newname);
+ goto out;
+ }
+
+ fddst = btrfs_open_dir(dstdir, &dirstream, 1);
+ if (fddst < 0)
+ goto out;
+
+ printf("Create subvolume '%s/%s'\n", dstdir, newname);
+ if (inherit) {
+ struct btrfs_ioctl_vol_args_v2 args;
+
+ memset(&args, 0, sizeof(args));
+ strncpy_null(args.name, newname);
+ args.flags |= BTRFS_SUBVOL_QGROUP_INHERIT;
+ args.size = qgroup_inherit_size(inherit);
+ args.qgroup_inherit = inherit;
+
+ res = ioctl(fddst, BTRFS_IOC_SUBVOL_CREATE_V2, &args);
+ } else {
+ struct btrfs_ioctl_vol_args args;
+
+ memset(&args, 0, sizeof(args));
+ strncpy_null(args.name, newname);
+
+ res = ioctl(fddst, BTRFS_IOC_SUBVOL_CREATE, &args);
+ }
+
+ if (res < 0) {
+ error("cannot create subvolume: %s", strerror(errno));
+ goto out;
+ }
+
+ retval = 0; /* success */
+out:
+ close_file_or_dir(fddst, dirstream);
+ free(inherit);
+ free(dupname);
+ free(dupdir);
+
+ return retval;
+}
+
+/*
+ * Test if path is a subvolume
+ * Returns:
+ * 0 - path exists but it is not a subvolume
+ * 1 - path exists and it is a subvolume
+ * < 0 - error
+ */
+int test_issubvolume(const char *path)
+{
+ struct stat st;
+ struct statfs stfs;
+ int res;
+
+ res = stat(path, &st);
+ if (res < 0)
+ return -errno;
+
+ if (st.st_ino != BTRFS_FIRST_FREE_OBJECTID || !S_ISDIR(st.st_mode))
+ return 0;
+
+ res = statfs(path, &stfs);
+ if (res < 0)
+ return -errno;
+
+ return (int)stfs.f_type == BTRFS_SUPER_MAGIC;
+}
+
+static int wait_for_commit(int fd)
+{
+ int ret;
+
+ ret = ioctl(fd, BTRFS_IOC_START_SYNC, NULL);
+ if (ret < 0)
+ return ret;
+ return ioctl(fd, BTRFS_IOC_WAIT_SYNC, NULL);
+}
+
+static const char * const cmd_subvol_delete_usage[] = {
+ "btrfs subvolume delete [options] <subvolume> [<subvolume>...]",
+ "Delete subvolume(s)",
+ "Delete subvolumes from the filesystem. The corresponding directory",
+ "is removed instantly but the data blocks are removed later.",
+ "The deletion does not involve full commit by default due to",
+ "performance reasons (as a consequence, the subvolume may appear again",
+ "after a crash). Use one of the --commit options to wait until the",
+ "operation is safely stored on the media.",
+ "",
+ "-c|--commit-after wait for transaction commit at the end of the operation",
+ "-C|--commit-each wait for transaction commit after deleting each subvolume",
+ NULL
+};
+
+static int cmd_subvol_delete(int argc, char **argv)
+{
+ int res, ret = 0;
+ int cnt;
+ int fd = -1;
+ struct btrfs_ioctl_vol_args args;
+ char *dname, *vname, *cpath;
+ char *dupdname = NULL;
+ char *dupvname = NULL;
+ char *path;
+ DIR *dirstream = NULL;
+ int verbose = 0;
+ int commit_mode = 0;
+
+ optind = 1;
+ while (1) {
+ int c;
+ static const struct option long_options[] = {
+ {"commit-after", no_argument, NULL, 'c'}, /* commit mode 1 */
+ {"commit-each", no_argument, NULL, 'C'}, /* commit mode 2 */
+ {NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long(argc, argv, "cC", long_options, NULL);
+ if (c < 0)
+ break;
+
+ switch(c) {
+ case 'c':
+ commit_mode = 1;
+ break;
+ case 'C':
+ commit_mode = 2;
+ break;
+ case 'v':
+ verbose++;
+ break;
+ default:
+ usage(cmd_subvol_delete_usage);
+ }
+ }
+
+ if (check_argc_min(argc - optind, 1))
+ usage(cmd_subvol_delete_usage);
+
+ if (verbose > 0) {
+ printf("Transaction commit: %s\n",
+ !commit_mode ? "none (default)" :
+ commit_mode == 1 ? "at the end" : "after each");
+ }
+
+ cnt = optind;
+
+again:
+ path = argv[cnt];
+
+ res = test_issubvolume(path);
+ if (res < 0) {
+ error("cannot access subvolume %s: %s", path, strerror(-res));
+ ret = 1;
+ goto out;
+ }
+ if (!res) {
+ error("not a subvolume: %s", path);
+ ret = 1;
+ goto out;
+ }
+
+ cpath = realpath(path, NULL);
+ if (!cpath) {
+ ret = errno;
+ error("cannot find real path for '%s': %s",
+ path, strerror(errno));
+ goto out;
+ }
+ dupdname = strdup(cpath);
+ dname = dirname(dupdname);
+ dupvname = strdup(cpath);
+ vname = basename(dupvname);
+ free(cpath);
+
+ fd = btrfs_open_dir(dname, &dirstream, 1);
+ if (fd < 0) {
+ ret = 1;
+ goto out;
+ }
+
+ printf("Delete subvolume (%s): '%s/%s'\n",
+ commit_mode == 2 || (commit_mode == 1 && cnt + 1 == argc)
+ ? "commit" : "no-commit", dname, vname);
+ memset(&args, 0, sizeof(args));
+ strncpy_null(args.name, vname);
+ res = ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &args);
+ if(res < 0 ){
+ error("cannot delete '%s/%s': %s", dname, vname,
+ strerror(errno));
+ ret = 1;
+ goto out;
+ }
+
+ if (commit_mode == 1) {
+ res = wait_for_commit(fd);
+ if (res < 0) {
+ error("unable to wait for commit after '%s': %s",
+ path, strerror(errno));
+ ret = 1;
+ }
+ }
+
+out:
+ free(dupdname);
+ free(dupvname);
+ dupdname = NULL;
+ dupvname = NULL;
+ cnt++;
+ if (cnt < argc) {
+ close_file_or_dir(fd, dirstream);
+ /* avoid double free */
+ fd = -1;
+ dirstream = NULL;
+ goto again;
+ }
+
+ if (commit_mode == 2 && fd != -1) {
+ res = wait_for_commit(fd);
+ if (res < 0) {
+ error("unable to do final sync after deletion: %s",
+ strerror(errno));
+ ret = 1;
+ }
+ }
+ close_file_or_dir(fd, dirstream);
+
+ return ret;
+}
+
+/*
+ * Naming of options:
+ * - uppercase for filters and sort options
+ * - lowercase for enabling specific items in the output
+ */
+static const char * const cmd_subvol_list_usage[] = {
+ "btrfs subvolume list [options] [-G [+|-]value] [-C [+|-]value] "
+ "[--sort=gen,ogen,rootid,path] <path>",
+ "List subvolumes (and snapshots)",
+ "",
+ "-p print parent ID",
+ "-a print all the subvolumes in the filesystem and",
+ " distinguish absolute and relative path with respect",
+ " to the given <path>",
+ "-c print the ogeneration of the subvolume",
+ "-g print the generation of the subvolume",
+ "-o print only subvolumes below specified path",
+ "-u print the uuid of subvolumes (and snapshots)",
+ "-q print the parent uuid of the snapshots",
+ "-R print the uuid of the received snapshots",
+ "-t print the result as a table",
+ "-s list snapshots only in the filesystem",
+ "-r list readonly subvolumes (including snapshots)",
+ "-d list deleted subvolumes that are not yet cleaned",
+ "-G [+|-]value",
+ " filter the subvolumes by generation",
+ " (+value: >= value; -value: <= value; value: = value)",
+ "-C [+|-]value",
+ " filter the subvolumes by ogeneration",
+ " (+value: >= value; -value: <= value; value: = value)",
+ "--sort=gen,ogen,rootid,path",
+ " list the subvolume in order of gen, ogen, rootid or path",
+ " you also can add '+' or '-' in front of each items.",
+ " (+:ascending, -:descending, ascending default)",
+ NULL,
+};
+
+static int cmd_subvol_list(int argc, char **argv)
+{
+ struct btrfs_list_filter_set *filter_set;
+ struct btrfs_list_comparer_set *comparer_set;
+ u64 flags = 0;
+ int fd = -1;
+ u64 top_id;
+ int ret = -1, uerr = 0;
+ char *subvol;
+ int is_tab_result = 0;
+ int is_list_all = 0;
+ int is_only_in_path = 0;
+ DIR *dirstream = NULL;
+
+ filter_set = btrfs_list_alloc_filter_set();
+ comparer_set = btrfs_list_alloc_comparer_set();
+
+ optind = 1;
+ while(1) {
+ int c;
+ static const struct option long_options[] = {
+ {"sort", required_argument, NULL, 'S'},
+ {NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long(argc, argv,
+ "acdgopqsurRG:C:t", long_options, NULL);
+ if (c < 0)
+ break;
+
+ switch(c) {
+ case 'p':
+ btrfs_list_setup_print_column(BTRFS_LIST_PARENT);
+ break;
+ case 'a':
+ is_list_all = 1;
+ break;
+ case 'c':
+ btrfs_list_setup_print_column(BTRFS_LIST_OGENERATION);
+ break;
+ case 'd':
+ btrfs_list_setup_filter(&filter_set,
+ BTRFS_LIST_FILTER_DELETED,
+ 0);
+ break;
+ case 'g':
+ btrfs_list_setup_print_column(BTRFS_LIST_GENERATION);
+ break;
+ case 'o':
+ is_only_in_path = 1;
+ break;
+ case 't':
+ is_tab_result = 1;
+ break;
+ case 's':
+ btrfs_list_setup_filter(&filter_set,
+ BTRFS_LIST_FILTER_SNAPSHOT_ONLY,
+ 0);
+ btrfs_list_setup_print_column(BTRFS_LIST_OGENERATION);
+ btrfs_list_setup_print_column(BTRFS_LIST_OTIME);
+ break;
+ case 'u':
+ btrfs_list_setup_print_column(BTRFS_LIST_UUID);
+ break;
+ case 'q':
+ btrfs_list_setup_print_column(BTRFS_LIST_PUUID);
+ break;
+ case 'R':
+ btrfs_list_setup_print_column(BTRFS_LIST_RUUID);
+ break;
+ case 'r':
+ flags |= BTRFS_ROOT_SUBVOL_RDONLY;
+ break;
+ case 'G':
+ btrfs_list_setup_print_column(BTRFS_LIST_GENERATION);
+ ret = btrfs_list_parse_filter_string(optarg,
+ &filter_set,
+ BTRFS_LIST_FILTER_GEN);
+ if (ret) {
+ uerr = 1;
+ goto out;
+ }
+ break;
+
+ case 'C':
+ btrfs_list_setup_print_column(BTRFS_LIST_OGENERATION);
+ ret = btrfs_list_parse_filter_string(optarg,
+ &filter_set,
+ BTRFS_LIST_FILTER_CGEN);
+ if (ret) {
+ uerr = 1;
+ goto out;
+ }
+ break;
+ case 'S':
+ ret = btrfs_list_parse_sort_string(optarg,
+ &comparer_set);
+ if (ret) {
+ uerr = 1;
+ goto out;
+ }
+ break;
+
+ default:
+ uerr = 1;
+ goto out;
+ }
+ }
+
+ if (flags)
+ btrfs_list_setup_filter(&filter_set, BTRFS_LIST_FILTER_FLAGS,
+ flags);
+
+ if (check_argc_exact(argc - optind, 1)) {
+ uerr = 1;
+ goto out;
+ }
+
+ subvol = argv[optind];
+ fd = btrfs_open_dir(subvol, &dirstream, 1);
+ if (fd < 0) {
+ ret = -1;
+ error("can't access '%s'", subvol);
+ goto out;
+ }
+
+ ret = btrfs_list_get_path_rootid(fd, &top_id);
+ if (ret) {
+ error("can't get rootid for '%s'", subvol);
+ goto out;
+ }
+
+ if (is_list_all)
+ btrfs_list_setup_filter(&filter_set,
+ BTRFS_LIST_FILTER_FULL_PATH,
+ top_id);
+ else if (is_only_in_path)
+ btrfs_list_setup_filter(&filter_set,
+ BTRFS_LIST_FILTER_TOPID_EQUAL,
+ top_id);
+
+ /* by default we shall print the following columns*/
+ btrfs_list_setup_print_column(BTRFS_LIST_OBJECTID);
+ btrfs_list_setup_print_column(BTRFS_LIST_GENERATION);
+ btrfs_list_setup_print_column(BTRFS_LIST_TOP_LEVEL);
+ btrfs_list_setup_print_column(BTRFS_LIST_PATH);
+
+ if (is_tab_result)
+ ret = btrfs_list_subvols_print(fd, filter_set, comparer_set,
+ BTRFS_LIST_LAYOUT_TABLE,
+ !is_list_all && !is_only_in_path, NULL);
+ else
+ ret = btrfs_list_subvols_print(fd, filter_set, comparer_set,
+ BTRFS_LIST_LAYOUT_DEFAULT,
+ !is_list_all && !is_only_in_path, NULL);
+
+out:
+ close_file_or_dir(fd, dirstream);
+ if (filter_set)
+ btrfs_list_free_filter_set(filter_set);
+ if (comparer_set)
+ btrfs_list_free_comparer_set(comparer_set);
+ if (uerr)
+ usage(cmd_subvol_list_usage);
+ return !!ret;
+}
+
+static const char * const cmd_subvol_snapshot_usage[] = {
+ "btrfs subvolume snapshot [-r] [-i <qgroupid>] <source> <dest>|[<dest>/]<name>",
+ "Create a snapshot of the subvolume",
+ "Create a writable/readonly snapshot of the subvolume <source> with",
+ "the name <name> in the <dest> directory. If only <dest> is given,",
+ "the subvolume will be named the basename of <source>.",
+ "",
+ "-r create a readonly snapshot",
+ "-i <qgroupid> add the newly created snapshot to a qgroup. This",
+ " option can be given multiple times.",
+ NULL
+};
+
+static int cmd_subvol_snapshot(int argc, char **argv)
+{
+ char *subvol, *dst;
+ int res, retval;
+ int fd = -1, fddst = -1;
+ int len, readonly = 0;
+ char *dupname = NULL;
+ char *dupdir = NULL;
+ char *newname;
+ char *dstdir;
+ struct btrfs_ioctl_vol_args_v2 args;
+ struct btrfs_qgroup_inherit *inherit = NULL;
+ DIR *dirstream1 = NULL, *dirstream2 = NULL;
+
+ optind = 1;
+ memset(&args, 0, sizeof(args));
+ while (1) {
+ int c = getopt(argc, argv, "c:i:r");
+ if (c < 0)
+ break;
+
+ switch (c) {
+ case 'c':
+ res = qgroup_inherit_add_copy(&inherit, optarg, 0);
+ if (res) {
+ retval = res;
+ goto out;
+ }
+ break;
+ case 'i':
+ res = qgroup_inherit_add_group(&inherit, optarg);
+ if (res) {
+ retval = res;
+ goto out;
+ }
+ break;
+ case 'r':
+ readonly = 1;
+ break;
+ case 'x':
+ res = qgroup_inherit_add_copy(&inherit, optarg, 1);
+ if (res) {
+ retval = res;
+ goto out;
+ }
+ break;
+ default:
+ usage(cmd_subvol_snapshot_usage);
+ }
+ }
+
+ if (check_argc_exact(argc - optind, 2))
+ usage(cmd_subvol_snapshot_usage);
+
+ subvol = argv[optind];
+ dst = argv[optind + 1];
+
+ retval = 1; /* failure */
+ res = test_issubvolume(subvol);
+ if (res < 0) {
+ error("cannot access subvolume %s: %s", subvol, strerror(-res));
+ goto out;
+ }
+ if (!res) {
+ error("not a subvolume: %s", subvol);
+ goto out;
+ }
+
+ res = test_isdir(dst);
+ if (res < 0 && res != -ENOENT) {
+ error("cannot access %s: %s", dst, strerror(-res));
+ goto out;
+ }
+ if (res == 0) {
+ error("'%s' exists and it is not a directory", dst);
+ goto out;
+ }
+
+ if (res > 0) {
+ dupname = strdup(subvol);
+ newname = basename(dupname);
+ dstdir = dst;
+ } else {
+ dupname = strdup(dst);
+ newname = basename(dupname);
+ dupdir = strdup(dst);
+ dstdir = dirname(dupdir);
+ }
+
+ if (!test_issubvolname(newname)) {
+ error("invalid snapshot name '%s'", newname);
+ goto out;
+ }
+
+ len = strlen(newname);
+ if (len == 0 || len >= BTRFS_VOL_NAME_MAX) {
+ error("snapshot name too long '%s'", newname);
+ goto out;
+ }
+
+ fddst = btrfs_open_dir(dstdir, &dirstream1, 1);
+ if (fddst < 0)
+ goto out;
+
+ fd = btrfs_open_dir(subvol, &dirstream2, 1);
+ if (fd < 0)
+ goto out;
+
+ if (readonly) {
+ args.flags |= BTRFS_SUBVOL_RDONLY;
+ printf("Create a readonly snapshot of '%s' in '%s/%s'\n",
+ subvol, dstdir, newname);
+ } else {
+ printf("Create a snapshot of '%s' in '%s/%s'\n",
+ subvol, dstdir, newname);
+ }
+
+ args.fd = fd;
+ if (inherit) {
+ args.flags |= BTRFS_SUBVOL_QGROUP_INHERIT;
+ args.size = qgroup_inherit_size(inherit);
+ args.qgroup_inherit = inherit;
+ }
+ strncpy_null(args.name, newname);
+
+ res = ioctl(fddst, BTRFS_IOC_SNAP_CREATE_V2, &args);
+
+ if (res < 0) {
+ error("cannot snapshot '%s': %s", subvol, strerror(errno));
+ goto out;
+ }
+
+ retval = 0; /* success */
+
+out:
+ close_file_or_dir(fddst, dirstream1);
+ close_file_or_dir(fd, dirstream2);
+ free(inherit);
+ free(dupname);
+ free(dupdir);
+
+ return retval;
+}
+
+static const char * const cmd_subvol_get_default_usage[] = {
+ "btrfs subvolume get-default <path>",
+ "Get the default subvolume of a filesystem",
+ NULL
+};
+
+static int cmd_subvol_get_default(int argc, char **argv)
+{
+ int fd = -1;
+ int ret;
+ char *subvol;
+ struct btrfs_list_filter_set *filter_set;
+ u64 default_id;
+ DIR *dirstream = NULL;
+
+ if (check_argc_exact(argc, 2))
+ usage(cmd_subvol_get_default_usage);
+
+ subvol = argv[1];
+ fd = btrfs_open_dir(subvol, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ ret = btrfs_list_get_default_subvolume(fd, &default_id);
+ if (ret) {
+ error("failed to look up default subvolume: %s",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = 1;
+ if (default_id == 0) {
+ error("'default' dir item not found");
+ goto out;
+ }
+
+ /* no need to resolve roots if FS_TREE is default */
+ if (default_id == BTRFS_FS_TREE_OBJECTID) {
+ printf("ID 5 (FS_TREE)\n");
+ ret = 0;
+ goto out;
+ }
+
+ filter_set = btrfs_list_alloc_filter_set();
+ btrfs_list_setup_filter(&filter_set, BTRFS_LIST_FILTER_ROOTID,
+ default_id);
+
+ /* by default we shall print the following columns*/
+ btrfs_list_setup_print_column(BTRFS_LIST_OBJECTID);
+ btrfs_list_setup_print_column(BTRFS_LIST_GENERATION);
+ btrfs_list_setup_print_column(BTRFS_LIST_TOP_LEVEL);
+ btrfs_list_setup_print_column(BTRFS_LIST_PATH);
+
+ ret = btrfs_list_subvols_print(fd, filter_set, NULL,
+ BTRFS_LIST_LAYOUT_DEFAULT, 1, NULL);
+
+ if (filter_set)
+ btrfs_list_free_filter_set(filter_set);
+out:
+ close_file_or_dir(fd, dirstream);
+ return !!ret;
+}
+
+static const char * const cmd_subvol_set_default_usage[] = {
+ "btrfs subvolume set-default <subvolid> <path>",
+ "Set the default subvolume of a filesystem",
+ NULL
+};
+
+static int cmd_subvol_set_default(int argc, char **argv)
+{
+ int ret=0, fd, e;
+ u64 objectid;
+ char *path;
+ char *subvolid;
+ DIR *dirstream = NULL;
+
+ if (check_argc_exact(argc, 3))
+ usage(cmd_subvol_set_default_usage);
+
+ subvolid = argv[1];
+ path = argv[2];
+
+ objectid = arg_strtou64(subvolid);
+
+ fd = btrfs_open_dir(path, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ ret = ioctl(fd, BTRFS_IOC_DEFAULT_SUBVOL, &objectid);
+ e = errno;
+ close_file_or_dir(fd, dirstream);
+ if (ret < 0) {
+ error("unable to set a new default subvolume: %s",
+ strerror(e));
+ return 1;
+ }
+ return 0;
+}
+
+static const char * const cmd_subvol_find_new_usage[] = {
+ "btrfs subvolume find-new <path> <lastgen>",
+ "List the recently modified files in a filesystem",
+ NULL
+};
+
+static int cmd_subvol_find_new(int argc, char **argv)
+{
+ int fd;
+ int ret;
+ char *subvol;
+ u64 last_gen;
+ DIR *dirstream = NULL;
+
+ if (check_argc_exact(argc, 3))
+ usage(cmd_subvol_find_new_usage);
+
+ subvol = argv[1];
+ last_gen = arg_strtou64(argv[2]);
+
+ ret = test_issubvolume(subvol);
+ if (ret < 0) {
+ error("cannot access subvolume %s: %s", subvol, strerror(-ret));
+ return 1;
+ }
+ if (!ret) {
+ error("not a subvolume: %s", subvol);
+ return 1;
+ }
+
+ fd = btrfs_open_dir(subvol, &dirstream, 1);
+ if (fd < 0)
+ return 1;
+
+ ret = ioctl(fd, BTRFS_IOC_SYNC);
+ if (ret < 0) {
+ error("sync ioctl failed on '%s': %s",
+ subvol, strerror(errno));
+ close_file_or_dir(fd, dirstream);
+ return 1;
+ }
+
+ ret = btrfs_list_find_updated_files(fd, 0, last_gen);
+ close_file_or_dir(fd, dirstream);
+ return !!ret;
+}
+
+static const char * const cmd_subvol_show_usage[] = {
+ "btrfs subvolume show <subvol-path>",
+ "Show more information of the subvolume",
+ NULL
+};
+
+static int cmd_subvol_show(int argc, char **argv)
+{
+ struct root_info get_ri;
+ struct btrfs_list_filter_set *filter_set;
+ char tstr[256];
+ char uuidparse[BTRFS_UUID_UNPARSED_SIZE];
+ char *fullpath = NULL, *svpath = NULL, *mnt = NULL;
+ char raw_prefix[] = "\t\t\t\t";
+ u64 sv_id;
+ int fd = -1, mntfd = -1;
+ int ret = 1;
+ DIR *dirstream1 = NULL, *dirstream2 = NULL;
+
+ clean_args_no_options(argc, argv, cmd_subvol_show_usage);
+
+ if (check_argc_exact(argc - optind, 1))
+ usage(cmd_subvol_show_usage);
+
+ fullpath = realpath(argv[optind], NULL);
+ if (!fullpath) {
+ error("cannot find real path for '%s': %s",
+ argv[optind], strerror(errno));
+ goto out;
+ }
+
+ ret = test_issubvolume(fullpath);
+ if (ret < 0) {
+ error("cannot access subvolume %s: %s", fullpath,
+ strerror(-ret));
+ goto out;
+ }
+ if (!ret) {
+ error("not a subvolume: %s", fullpath);
+ ret = 1;
+ goto out;
+ }
+
+ ret = find_mount_root(fullpath, &mnt);
+ if (ret < 0) {
+ error("find_mount_root failed on '%s': %s",
+ fullpath, strerror(-ret));
+ goto out;
+ }
+ if (ret > 0) {
+ error("%s doesn't belong to btrfs mount point", fullpath);
+ goto out;
+ }
+ ret = 1;
+ svpath = get_subvol_name(mnt, fullpath);
+
+ fd = btrfs_open_dir(fullpath, &dirstream1, 1);
+ if (fd < 0)
+ goto out;
+
+ ret = btrfs_list_get_path_rootid(fd, &sv_id);
+ if (ret) {
+ error("can't get rootid for '%s'", fullpath);
+ goto out;
+ }
+
+ mntfd = btrfs_open_dir(mnt, &dirstream2, 1);
+ if (mntfd < 0)
+ goto out;
+
+ if (sv_id == BTRFS_FS_TREE_OBJECTID) {
+ printf("%s is toplevel subvolume\n", fullpath);
+ goto out;
+ }
+
+ memset(&get_ri, 0, sizeof(get_ri));
+ get_ri.root_id = sv_id;
+
+ ret = btrfs_get_subvol(mntfd, &get_ri);
+ if (ret) {
+ error("can't find '%s'", svpath);
+ goto out;
+ }
+
+ /* print the info */
+ printf("%s\n", fullpath);
+ printf("\tName: \t\t\t%s\n", get_ri.name);
+
+ if (uuid_is_null(get_ri.uuid))
+ strcpy(uuidparse, "-");
+ else
+ uuid_unparse(get_ri.uuid, uuidparse);
+ printf("\tUUID: \t\t\t%s\n", uuidparse);
+
+ if (uuid_is_null(get_ri.puuid))
+ strcpy(uuidparse, "-");
+ else
+ uuid_unparse(get_ri.puuid, uuidparse);
+ printf("\tParent UUID: \t\t%s\n", uuidparse);
+
+ if (uuid_is_null(get_ri.ruuid))
+ strcpy(uuidparse, "-");
+ else
+ uuid_unparse(get_ri.ruuid, uuidparse);
+ printf("\tReceived UUID: \t\t%s\n", uuidparse);
+
+ if (get_ri.otime) {
+ struct tm tm;
+
+ localtime_r(&get_ri.otime, &tm);
+ strftime(tstr, 256, "%Y-%m-%d %X %z", &tm);
+ } else
+ strcpy(tstr, "-");
+ printf("\tCreation time: \t\t%s\n", tstr);
+
+ printf("\tSubvolume ID: \t\t%llu\n", get_ri.root_id);
+ printf("\tGeneration: \t\t%llu\n", get_ri.gen);
+ printf("\tGen at creation: \t%llu\n", get_ri.ogen);
+ printf("\tParent ID: \t\t%llu\n", get_ri.ref_tree);
+ printf("\tTop level ID: \t\t%llu\n", get_ri.top_id);
+
+ if (get_ri.flags & BTRFS_ROOT_SUBVOL_RDONLY)
+ printf("\tFlags: \t\t\treadonly\n");
+ else
+ printf("\tFlags: \t\t\t-\n");
+
+ /* print the snapshots of the given subvol if any*/
+ printf("\tSnapshot(s):\n");
+ filter_set = btrfs_list_alloc_filter_set();
+ btrfs_list_setup_filter(&filter_set, BTRFS_LIST_FILTER_BY_PARENT,
+ (u64)(unsigned long)get_ri.uuid);
+ btrfs_list_setup_print_column(BTRFS_LIST_PATH);
+ btrfs_list_subvols_print(fd, filter_set, NULL, BTRFS_LIST_LAYOUT_RAW,
+ 1, raw_prefix);
+
+ /* clean up */
+ free(get_ri.path);
+ free(get_ri.name);
+ free(get_ri.full_path);
+ btrfs_list_free_filter_set(filter_set);
+
+out:
+ close_file_or_dir(fd, dirstream1);
+ close_file_or_dir(mntfd, dirstream2);
+ free(mnt);
+ free(fullpath);
+ return !!ret;
+}
+
+static const char * const cmd_subvol_sync_usage[] = {
+ "btrfs subvolume sync <path> [<subvol-id>...]",
+ "Wait until given subvolume(s) are completely removed from the filesystem.",
+ "Wait until given subvolume(s) are completely removed from the filesystem",
+ "after deletion.",
+ "If no subvolume id is given, wait until all current deletion requests",
+ "are completed, but do not wait for subvolumes deleted meanwhile.",
+ "The status of subvolume ids is checked periodically.",
+ "",
+ "-s <N> sleep N seconds between checks (default: 1)",
+ NULL
+};
+
+#if 0
+/*
+ * If we're looking for any dead subvolume, take a shortcut and look
+ * for any ORPHAN_ITEMs in the tree root
+ */
+static int fs_has_dead_subvolumes(int fd)
+{
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ struct btrfs_ioctl_search_header sh;
+ u64 min_subvolid = 0;
+
+again:
+ sk->tree_id = BTRFS_ROOT_TREE_OBJECTID;
+ sk->min_objectid = BTRFS_ORPHAN_OBJECTID;
+ sk->max_objectid = BTRFS_ORPHAN_OBJECTID;
+ sk->min_type = BTRFS_ORPHAN_ITEM_KEY;
+ sk->max_type = BTRFS_ORPHAN_ITEM_KEY;
+ sk->min_offset = min_subvolid;
+ sk->max_offset = (u64)-1;
+ sk->min_transid = 0;
+ sk->max_transid = (u64)-1;
+ sk->nr_items = 1;
+
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0)
+ return -errno;
+
+ if (!sk->nr_items)
+ return 0;
+
+ memcpy(&sh, args.buf, sizeof(sh));
+ min_subvolid = sh.offset;
+
+ /*
+ * Verify that the root item is really there and we haven't hit
+ * a stale orphan
+ */
+ sk->tree_id = BTRFS_ROOT_TREE_OBJECTID;
+ sk->min_objectid = min_subvolid;
+ sk->max_objectid = min_subvolid;
+ sk->min_type = BTRFS_ROOT_ITEM_KEY;
+ sk->max_type = BTRFS_ROOT_ITEM_KEY;
+ sk->min_offset = 0;
+ sk->max_offset = (u64)-1;
+ sk->min_transid = 0;
+ sk->max_transid = (u64)-1;
+ sk->nr_items = 1;
+
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0)
+ return -errno;
+
+ /*
+ * Stale orphan, try the next one
+ */
+ if (!sk->nr_items) {
+ min_subvolid++;
+ goto again;
+ }
+
+ return 1;
+}
+#endif
+
+#define SUBVOL_ID_BATCH 1024
+
+/*
+ * Enumerate all dead subvolumes that exist in the filesystem.
+ * Fill @ids and reallocate to bigger size if needed.
+ */
+static int enumerate_dead_subvols(int fd, u64 **ids)
+{
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ int idx = 0;
+ int count = 0;
+
+ memset(&args, 0, sizeof(args));
+
+ sk->tree_id = BTRFS_ROOT_TREE_OBJECTID;
+ sk->min_objectid = BTRFS_ORPHAN_OBJECTID;
+ sk->max_objectid = BTRFS_ORPHAN_OBJECTID;
+ sk->min_type = BTRFS_ORPHAN_ITEM_KEY;
+ sk->max_type = BTRFS_ORPHAN_ITEM_KEY;
+ sk->min_offset = 0;
+ sk->max_offset = (u64)-1;
+ sk->min_transid = 0;
+ sk->max_transid = (u64)-1;
+ sk->nr_items = 4096;
+
+ *ids = NULL;
+ while (1) {
+ struct btrfs_ioctl_search_header *sh;
+ unsigned long off;
+ int i;
+
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0)
+ return -errno;
+
+ if (!sk->nr_items)
+ return idx;
+
+ off = 0;
+ for (i = 0; i < sk->nr_items; i++) {
+ sh = (struct btrfs_ioctl_search_header*)(args.buf + off);
+ off += sizeof(*sh);
+
+ if (sh->type == BTRFS_ORPHAN_ITEM_KEY) {
+ if (idx >= count) {
+ u64 *newids;
+
+ count += SUBVOL_ID_BATCH;
+ newids = (u64*)realloc(*ids, count);
+ if (!newids)
+ return -ENOMEM;
+ *ids = newids;
+ }
+ (*ids)[idx] = sh->offset;
+ idx++;
+ }
+ off += sh->len;
+
+ sk->min_objectid = sh->objectid;
+ sk->min_type = sh->type;
+ sk->min_offset = sh->offset;
+ }
+ if (sk->min_offset < (u64)-1)
+ sk->min_offset++;
+ else
+ break;
+ if (sk->min_type != BTRFS_ORPHAN_ITEM_KEY)
+ break;
+ if (sk->min_objectid != BTRFS_ORPHAN_OBJECTID)
+ break;
+ }
+
+ return idx;
+}
+
+static int cmd_subvol_sync(int argc, char **argv)
+{
+ int fd = -1;
+ int i;
+ int ret = 1;
+ DIR *dirstream = NULL;
+ u64 *ids = NULL;
+ int id_count;
+ int sleep_interval = 1;
+
+ optind = 1;
+ while (1) {
+ int c = getopt(argc, argv, "s:");
+
+ if (c < 0)
+ break;
+
+ switch (c) {
+ case 's':
+ sleep_interval = atoi(argv[optind]);
+ if (sleep_interval < 1) {
+ error("invalid sleep interval %s",
+ argv[optind]);
+ ret = 1;
+ goto out;
+ }
+ break;
+ default:
+ usage(cmd_subvol_sync_usage);
+ }
+ }
+
+ if (check_argc_min(argc - optind, 1))
+ usage(cmd_subvol_sync_usage);
+
+ fd = btrfs_open_dir(argv[optind], &dirstream, 1);
+ if (fd < 0) {
+ ret = 1;
+ goto out;
+ }
+ optind++;
+
+ id_count = argc - optind;
+ if (!id_count) {
+ id_count = enumerate_dead_subvols(fd, &ids);
+ if (id_count < 0) {
+ error("can't enumerate dead subvolumes: %s",
+ strerror(-id_count));
+ ret = 1;
+ goto out;
+ }
+ if (id_count == 0) {
+ ret = 0;
+ goto out;
+ }
+ } else {
+ ids = (u64*)malloc(id_count * sizeof(u64));
+ if (!ids) {
+ error("not enough memory");
+ ret = 1;
+ goto out;
+ }
+
+ for (i = 0; i < id_count; i++) {
+ u64 id;
+ const char *arg;
+
+ arg = argv[optind + i];
+ errno = 0;
+ id = strtoull(arg, NULL, 10);
+ if (errno < 0) {
+ error("unrecognized subvolume id %s", arg);
+ ret = 1;
+ goto out;
+ }
+ if (id < BTRFS_FIRST_FREE_OBJECTID
+ || id > BTRFS_LAST_FREE_OBJECTID) {
+ error("subvolume id %s out of range\n", arg);
+ ret = 1;
+ goto out;
+ }
+ ids[i] = id;
+ }
+ }
+
+ ret = wait_for_subvolume_cleaning(fd, id_count, ids, sleep_interval);
+
+out:
+ free(ids);
+ close_file_or_dir(fd, dirstream);
+
+ return !!ret;
+}
+
+static const char subvolume_cmd_group_info[] =
+"manage subvolumes: create, delete, list, etc";
+
+const struct cmd_group subvolume_cmd_group = {
+ subvolume_cmd_group_usage, subvolume_cmd_group_info, {
+ { "create", cmd_subvol_create, cmd_subvol_create_usage, NULL, 0 },
+ { "delete", cmd_subvol_delete, cmd_subvol_delete_usage, NULL, 0 },
+ { "list", cmd_subvol_list, cmd_subvol_list_usage, NULL, 0 },
+ { "snapshot", cmd_subvol_snapshot, cmd_subvol_snapshot_usage,
+ NULL, 0 },
+ { "get-default", cmd_subvol_get_default,
+ cmd_subvol_get_default_usage, NULL, 0 },
+ { "set-default", cmd_subvol_set_default,
+ cmd_subvol_set_default_usage, NULL, 0 },
+ { "find-new", cmd_subvol_find_new, cmd_subvol_find_new_usage,
+ NULL, 0 },
+ { "show", cmd_subvol_show, cmd_subvol_show_usage, NULL, 0 },
+ { "sync", cmd_subvol_sync, cmd_subvol_sync_usage, NULL, 0 },
+ NULL_CMD_STRUCT
+ }
+};
+
+int cmd_subvolume(int argc, char **argv)
+{
+ return handle_command_group(&subvolume_cmd_group, argc, argv);
+}
diff --git a/commands.h b/commands.h
new file mode 100644
index 00000000..2da093bf
--- /dev/null
+++ b/commands.h
@@ -0,0 +1,134 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_COMMANDS_H__
+#define __BTRFS_COMMANDS_H__
+
+enum {
+ CMD_HIDDEN = (1 << 0), /* should not be in help listings */
+ CMD_ALIAS = (1 << 1), /* alias of next command in cmd_group */
+};
+
+struct cmd_struct {
+ const char *token;
+ int (*fn)(int, char **);
+
+ /*
+ * Usage strings
+ *
+ * A NULL-terminated array of the following format:
+ *
+ * usagestr[0] - one-line synopsis (required)
+ * usagestr[1] - one-line short description (required)
+ * usagestr[2..m] - a long (possibly multi-line) description
+ * (optional)
+ * usagestr[m + 1] - an empty line separator (required if at least one
+ * option string is given, not needed otherwise)
+ * usagestr[m + 2..n] - option strings, one option per line
+ * (optional)
+ * usagestr[n + 1] - NULL terminator
+ *
+ * Options (if present) should always (even if there is no long
+ * description) be prepended with an empty line. Supplied strings are
+ * indented but otherwise printed as-is, no automatic wrapping is done.
+ *
+ * Grep for cmd_*_usage[] for examples.
+ */
+ const char * const *usagestr;
+
+ /* should be NULL if token is not a subgroup */
+ const struct cmd_group *next;
+
+ /* CMD_* flags above */
+ int flags;
+};
+
+#define NULL_CMD_STRUCT {NULL, NULL, NULL, NULL, 0}
+
+struct cmd_group {
+ const char * const *usagestr;
+ const char *infostr;
+
+ const struct cmd_struct commands[];
+};
+
+/* btrfs.c */
+int prefixcmp(const char *str, const char *prefix);
+
+int handle_command_group(const struct cmd_group *grp, int argc,
+ char **argv);
+
+/* help.c */
+extern const char * const generic_cmd_help_usage[];
+
+void usage(const char * const *usagestr) __attribute__((noreturn));
+void usage_command(const struct cmd_struct *cmd, int full, int err);
+void usage_command_group(const struct cmd_group *grp, int all, int err);
+void usage_command_group_short(const struct cmd_group *grp);
+
+void help_unknown_token(const char *arg, const struct cmd_group *grp) __attribute__((noreturn));
+void help_ambiguous_token(const char *arg, const struct cmd_group *grp) __attribute__((noreturn));
+
+void help_command_group(const struct cmd_group *grp, int argc, char **argv);
+
+extern const struct cmd_group subvolume_cmd_group;
+extern const struct cmd_group filesystem_cmd_group;
+extern const struct cmd_group balance_cmd_group;
+extern const struct cmd_group device_cmd_group;
+extern const struct cmd_group scrub_cmd_group;
+extern const struct cmd_group inspect_cmd_group;
+extern const struct cmd_group property_cmd_group;
+extern const struct cmd_group quota_cmd_group;
+extern const struct cmd_group qgroup_cmd_group;
+extern const struct cmd_group replace_cmd_group;
+extern const struct cmd_group rescue_cmd_group;
+
+extern const char * const cmd_send_usage[];
+extern const char * const cmd_receive_usage[];
+extern const char * const cmd_check_usage[];
+extern const char * const cmd_chunk_recover_usage[];
+extern const char * const cmd_super_recover_usage[];
+extern const char * const cmd_restore_usage[];
+extern const char * const cmd_rescue_usage[];
+
+int cmd_subvolume(int argc, char **argv);
+int cmd_filesystem(int argc, char **argv);
+int cmd_balance(int argc, char **argv);
+int cmd_device(int argc, char **argv);
+int cmd_scrub(int argc, char **argv);
+int cmd_check(int argc, char **argv);
+int cmd_chunk_recover(int argc, char **argv);
+int cmd_super_recover(int argc, char **argv);
+int cmd_inspect(int argc, char **argv);
+int cmd_property(int argc, char **argv);
+int cmd_send(int argc, char **argv);
+int cmd_receive(int argc, char **argv);
+int cmd_quota(int argc, char **argv);
+int cmd_qgroup(int argc, char **argv);
+int cmd_replace(int argc, char **argv);
+int cmd_restore(int argc, char **argv);
+int cmd_select_super(int argc, char **argv);
+int cmd_dump_super(int argc, char **argv);
+int cmd_debug_tree(int argc, char **argv);
+int cmd_rescue(int argc, char **argv);
+
+/* subvolume exported functions */
+int test_issubvolume(const char *path);
+
+/* send.c */
+char *get_subvol_name(char *mnt, char *full_path);
+
+#endif
diff --git a/config.h.in b/config.h.in
new file mode 100644
index 00000000..d938e813
--- /dev/null
+++ b/config.h.in
@@ -0,0 +1,129 @@
+/* config.h.in. Generated from configure.ac by autoheader. */
+
+/* Define if building universal (internal helper macro) */
+#undef AC_APPLE_UNIVERSAL_BUILD
+
+/* disable backtrace stuff in kerncompat.h */
+#undef BTRFS_DISABLE_BACKTRACE
+
+/* Define to 1 if you have the `backtrace' function. */
+#undef HAVE_BACKTRACE
+
+/* Define to 1 if you have the `backtrace_symbols_fd' function. */
+#undef HAVE_BACKTRACE_SYMBOLS_FD
+
+/* Define to 1 if you have the <execinfo.h> header file. */
+#undef HAVE_EXECINFO_H
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define to 1 if you have the `openat' function. */
+#undef HAVE_OPENAT
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* URL */
+#undef PACKAGE_URL
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Enable extensions on AIX 3, Interix. */
+#ifndef _ALL_SOURCE
+# undef _ALL_SOURCE
+#endif
+/* Enable GNU extensions on systems that have them. */
+#ifndef _GNU_SOURCE
+# undef _GNU_SOURCE
+#endif
+/* Enable threading extensions on Solaris. */
+#ifndef _POSIX_PTHREAD_SEMANTICS
+# undef _POSIX_PTHREAD_SEMANTICS
+#endif
+/* Enable extensions on HP NonStop. */
+#ifndef _TANDEM_SOURCE
+# undef _TANDEM_SOURCE
+#endif
+/* Enable general extensions on Solaris. */
+#ifndef __EXTENSIONS__
+# undef __EXTENSIONS__
+#endif
+
+
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+ significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+# define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+# undef WORDS_BIGENDIAN
+# endif
+#endif
+
+/* Enable large inode numbers on Mac OS X 10.5. */
+#ifndef _DARWIN_USE_64_BIT_INODE
+# define _DARWIN_USE_64_BIT_INODE 1
+#endif
+
+/* Number of bits in a file offset, on hosts where this is settable. */
+#undef _FILE_OFFSET_BITS
+
+/* Define for large files, on AIX-style hosts. */
+#undef _LARGE_FILES
+
+/* Define to 1 if on MINIX. */
+#undef _MINIX
+
+/* Define to 2 if the system does not provide POSIX.1 features except with
+ this defined. */
+#undef _POSIX_1_SOURCE
+
+/* Define to 1 if you need to in order for `stat' and other things to work. */
+#undef _POSIX_SOURCE
+
+/* Define to empty if `const' does not conform to ANSI C. */
+#undef const
+
+/* Define to empty if the keyword `volatile' does not work. Warning: valid
+ code using `volatile' can become incorrect without. Disable with care. */
+#undef volatile
diff --git a/config/config.guess b/config/config.guess
new file mode 100755
index 00000000..c6fad2f5
--- /dev/null
+++ b/config/config.guess
@@ -0,0 +1,1568 @@
+#! /bin/sh
+# Attempt to guess a canonical system name.
+# Copyright 1992-2013 Free Software Foundation, Inc.
+
+timestamp='2013-06-10'
+
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that
+# program. This Exception is an additional permission under section 7
+# of the GNU General Public License, version 3 ("GPLv3").
+#
+# Originally written by Per Bothner.
+#
+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+#
+# Please send patches with a ChangeLog entry to config-patches@gnu.org.
+
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION]
+
+Output the configuration name of the system \`$me' is run on.
+
+Operation modes:
+ -h, --help print this help, then exit
+ -t, --time-stamp print date of last modification, then exit
+ -v, --version print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.guess ($timestamp)
+
+Originally written by Per Bothner.
+Copyright 1992-2013 Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+ case $1 in
+ --time-stamp | --time* | -t )
+ echo "$timestamp" ; exit ;;
+ --version | -v )
+ echo "$version" ; exit ;;
+ --help | --h* | -h )
+ echo "$usage"; exit ;;
+ -- ) # Stop option processing
+ shift; break ;;
+ - ) # Use stdin as input.
+ break ;;
+ -* )
+ echo "$me: invalid option $1$help" >&2
+ exit 1 ;;
+ * )
+ break ;;
+ esac
+done
+
+if test $# != 0; then
+ echo "$me: too many arguments$help" >&2
+ exit 1
+fi
+
+trap 'exit 1' 1 2 15
+
+# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
+# compiler to aid in system detection is discouraged as it requires
+# temporary files to be created and, as you can see below, it is a
+# headache to deal with in a portable fashion.
+
+# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
+# use `HOST_CC' if defined, but it is deprecated.
+
+# Portable tmp directory creation inspired by the Autoconf team.
+
+set_cc_for_build='
+trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
+trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
+: ${TMPDIR=/tmp} ;
+ { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
+ { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
+ { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
+ { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
+dummy=$tmp/dummy ;
+tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
+case $CC_FOR_BUILD,$HOST_CC,$CC in
+ ,,) echo "int x;" > $dummy.c ;
+ for c in cc gcc c89 c99 ; do
+ if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then
+ CC_FOR_BUILD="$c"; break ;
+ fi ;
+ done ;
+ if test x"$CC_FOR_BUILD" = x ; then
+ CC_FOR_BUILD=no_compiler_found ;
+ fi
+ ;;
+ ,,*) CC_FOR_BUILD=$CC ;;
+ ,*,*) CC_FOR_BUILD=$HOST_CC ;;
+esac ; set_cc_for_build= ;'
+
+# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
+# (ghazi@noc.rutgers.edu 1994-08-24)
+if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+ PATH=$PATH:/.attbin ; export PATH
+fi
+
+UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
+UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
+UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
+
+case "${UNAME_SYSTEM}" in
+Linux|GNU|GNU/*)
+ # If the system lacks a compiler, then just pick glibc.
+ # We could probably try harder.
+ LIBC=gnu
+
+ eval $set_cc_for_build
+ cat <<-EOF > $dummy.c
+ #include <features.h>
+ #if defined(__UCLIBC__)
+ LIBC=uclibc
+ #elif defined(__dietlibc__)
+ LIBC=dietlibc
+ #else
+ LIBC=gnu
+ #endif
+ EOF
+ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
+ ;;
+esac
+
+case "${UNAME_MACHINE}" in
+ i?86)
+ test -z "$VENDOR" && VENDOR=pc
+ ;;
+ *)
+ test -z "$VENDOR" && VENDOR=unknown
+ ;;
+esac
+test -f /etc/SuSE-release -o -f /.buildenv && VENDOR=suse
+
+# Note: order is significant - the case branches are not exclusive.
+
+case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
+ *:NetBSD:*:*)
+ # NetBSD (nbsd) targets should (where applicable) match one or
+ # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
+ # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently
+ # switched to ELF, *-*-netbsd* would select the old
+ # object file format. This provides both forward
+ # compatibility and a consistent mechanism for selecting the
+ # object file format.
+ #
+ # Note: NetBSD doesn't particularly care about the vendor
+ # portion of the name. We always set it to "unknown".
+ sysctl="sysctl -n hw.machine_arch"
+ UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \
+ /usr/sbin/$sysctl 2>/dev/null || echo unknown)`
+ case "${UNAME_MACHINE_ARCH}" in
+ armeb) machine=armeb-unknown ;;
+ arm*) machine=arm-unknown ;;
+ sh3el) machine=shl-unknown ;;
+ sh3eb) machine=sh-unknown ;;
+ sh5el) machine=sh5le-unknown ;;
+ *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
+ esac
+ # The Operating System including object format, if it has switched
+ # to ELF recently, or will in the future.
+ case "${UNAME_MACHINE_ARCH}" in
+ arm*|i386|m68k|ns32k|sh3*|sparc|vax)
+ eval $set_cc_for_build
+ if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
+ | grep -q __ELF__
+ then
+ # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
+ # Return netbsd for either. FIX?
+ os=netbsd
+ else
+ os=netbsdelf
+ fi
+ ;;
+ *)
+ os=netbsd
+ ;;
+ esac
+ # The OS release
+ # Debian GNU/NetBSD machines have a different userland, and
+ # thus, need a distinct triplet. However, they do not need
+ # kernel version information, so it can be replaced with a
+ # suitable tag, in the style of linux-gnu.
+ case "${UNAME_VERSION}" in
+ Debian*)
+ release='-gnu'
+ ;;
+ *)
+ release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+ ;;
+ esac
+ # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
+ # contains redundant information, the shorter form:
+ # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
+ echo "${machine}-${os}${release}"
+ exit ;;
+ *:Bitrig:*:*)
+ UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
+ echo ${UNAME_MACHINE_ARCH}-${VENDOR}-bitrig${UNAME_RELEASE}
+ exit ;;
+ *:OpenBSD:*:*)
+ UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
+ echo ${UNAME_MACHINE_ARCH}-${VENDOR}-openbsd${UNAME_RELEASE}
+ exit ;;
+ *:ekkoBSD:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-ekkobsd${UNAME_RELEASE}
+ exit ;;
+ *:SolidBSD:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-solidbsd${UNAME_RELEASE}
+ exit ;;
+ macppc:MirBSD:*:*)
+ echo powerpc-${VENDOR}-mirbsd${UNAME_RELEASE}
+ exit ;;
+ *:MirBSD:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-mirbsd${UNAME_RELEASE}
+ exit ;;
+ alpha:OSF1:*:*)
+ case $UNAME_RELEASE in
+ *4.0)
+ UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+ ;;
+ *5.*)
+ UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+ ;;
+ esac
+ # According to Compaq, /usr/sbin/psrinfo has been available on
+ # OSF/1 and Tru64 systems produced since 1995. I hope that
+ # covers most systems running today. This code pipes the CPU
+ # types through head -n 1, so we only detect the type of CPU 0.
+ ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+ case "$ALPHA_CPU_TYPE" in
+ "EV4 (21064)")
+ UNAME_MACHINE="alpha" ;;
+ "EV4.5 (21064)")
+ UNAME_MACHINE="alpha" ;;
+ "LCA4 (21066/21068)")
+ UNAME_MACHINE="alpha" ;;
+ "EV5 (21164)")
+ UNAME_MACHINE="alphaev5" ;;
+ "EV5.6 (21164A)")
+ UNAME_MACHINE="alphaev56" ;;
+ "EV5.6 (21164PC)")
+ UNAME_MACHINE="alphapca56" ;;
+ "EV5.7 (21164PC)")
+ UNAME_MACHINE="alphapca57" ;;
+ "EV6 (21264)")
+ UNAME_MACHINE="alphaev6" ;;
+ "EV6.7 (21264A)")
+ UNAME_MACHINE="alphaev67" ;;
+ "EV6.8CB (21264C)")
+ UNAME_MACHINE="alphaev68" ;;
+ "EV6.8AL (21264B)")
+ UNAME_MACHINE="alphaev68" ;;
+ "EV6.8CX (21264D)")
+ UNAME_MACHINE="alphaev68" ;;
+ "EV6.9A (21264/EV69A)")
+ UNAME_MACHINE="alphaev69" ;;
+ "EV7 (21364)")
+ UNAME_MACHINE="alphaev7" ;;
+ "EV7.9 (21364A)")
+ UNAME_MACHINE="alphaev79" ;;
+ esac
+ # A Pn.n version is a patched version.
+ # A Vn.n version is a released version.
+ # A Tn.n version is a released field test version.
+ # A Xn.n version is an unreleased experimental baselevel.
+ # 1.2 uses "1.2" for uname -r.
+ echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+ # Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+ exitcode=$?
+ trap '' 0
+ exit $exitcode ;;
+ Alpha\ *:Windows_NT*:*)
+ # How do we know it's Interix rather than the generic POSIX subsystem?
+ # Should we change UNAME_MACHINE based on the output of uname instead
+ # of the specific Alpha model?
+ echo alpha-pc-interix
+ exit ;;
+ 21064:Windows_NT:50:3)
+ echo alpha-dec-winnt3.5
+ exit ;;
+ Amiga*:UNIX_System_V:4.0:*)
+ echo m68k-${VENDOR}-sysv4
+ exit ;;
+ *:[Aa]miga[Oo][Ss]:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-amigaos
+ exit ;;
+ *:[Mm]orph[Oo][Ss]:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-morphos
+ exit ;;
+ *:OS/390:*:*)
+ echo i370-ibm-openedition
+ exit ;;
+ *:z/VM:*:*)
+ echo s390-ibm-zvmoe
+ exit ;;
+ *:OS400:*:*)
+ echo powerpc-ibm-os400
+ exit ;;
+ arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
+ echo arm-acorn-riscix${UNAME_RELEASE}
+ exit ;;
+ arm*:riscos:*:*|arm*:RISCOS:*:*)
+ echo arm-${VENDOR}-riscos
+ exit ;;
+ SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
+ echo hppa1.1-hitachi-hiuxmpp
+ exit ;;
+ Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
+ # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
+ if test "`(/bin/universe) 2>/dev/null`" = att ; then
+ echo pyramid-pyramid-sysv3
+ else
+ echo pyramid-pyramid-bsd
+ fi
+ exit ;;
+ NILE*:*:*:dcosx)
+ echo pyramid-pyramid-svr4
+ exit ;;
+ DRS?6000:unix:4.0:6*)
+ echo sparc-icl-nx6
+ exit ;;
+ DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
+ case `/usr/bin/uname -p` in
+ sparc) echo sparc-icl-nx7; exit ;;
+ esac ;;
+ s390x:SunOS:*:*)
+ echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit ;;
+ sun4H:SunOS:5.*:*)
+ echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit ;;
+ sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
+ echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit ;;
+ i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
+ echo i386-pc-auroraux${UNAME_RELEASE}
+ exit ;;
+ i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
+ eval $set_cc_for_build
+ SUN_ARCH="i386"
+ # If there is a compiler, see if it is configured for 64-bit objects.
+ # Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
+ # This test works for both compilers.
+ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+ if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
+ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+ grep IS_64BIT_ARCH >/dev/null
+ then
+ SUN_ARCH="x86_64"
+ fi
+ fi
+ echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit ;;
+ sun4*:SunOS:6*:*)
+ # According to config.sub, this is the proper way to canonicalize
+ # SunOS6. Hard to guess exactly what SunOS6 will be like, but
+ # it's likely to be more like Solaris than SunOS4.
+ echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit ;;
+ sun4*:SunOS:*:*)
+ case "`/usr/bin/arch -k`" in
+ Series*|S4*)
+ UNAME_RELEASE=`uname -v`
+ ;;
+ esac
+ # Japanese Language versions have a version number like `4.1.3-JL'.
+ echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
+ exit ;;
+ sun3*:SunOS:*:*)
+ echo m68k-sun-sunos${UNAME_RELEASE}
+ exit ;;
+ sun*:*:4.2BSD:*)
+ UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
+ test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
+ case "`/bin/arch`" in
+ sun3)
+ echo m68k-sun-sunos${UNAME_RELEASE}
+ ;;
+ sun4)
+ echo sparc-sun-sunos${UNAME_RELEASE}
+ ;;
+ esac
+ exit ;;
+ aushp:SunOS:*:*)
+ echo sparc-auspex-sunos${UNAME_RELEASE}
+ exit ;;
+ # The situation for MiNT is a little confusing. The machine name
+ # can be virtually everything (everything which is not
+ # "atarist" or "atariste" at least should have a processor
+ # > m68000). The system name ranges from "MiNT" over "FreeMiNT"
+ # to the lowercase version "mint" (or "freemint"). Finally
+ # the system name "TOS" denotes a system which is actually not
+ # MiNT. But MiNT is downward compatible to TOS, so this should
+ # be no problem.
+ atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
+ echo m68k-atari-mint${UNAME_RELEASE}
+ exit ;;
+ atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
+ echo m68k-atari-mint${UNAME_RELEASE}
+ exit ;;
+ *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
+ echo m68k-atari-mint${UNAME_RELEASE}
+ exit ;;
+ milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
+ echo m68k-milan-mint${UNAME_RELEASE}
+ exit ;;
+ hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
+ echo m68k-hades-mint${UNAME_RELEASE}
+ exit ;;
+ *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
+ echo m68k-${VENDOR}-mint${UNAME_RELEASE}
+ exit ;;
+ m68k:machten:*:*)
+ echo m68k-apple-machten${UNAME_RELEASE}
+ exit ;;
+ powerpc:machten:*:*)
+ echo powerpc-apple-machten${UNAME_RELEASE}
+ exit ;;
+ RISC*:Mach:*:*)
+ echo mips-dec-mach_bsd4.3
+ exit ;;
+ RISC*:ULTRIX:*:*)
+ echo mips-dec-ultrix${UNAME_RELEASE}
+ exit ;;
+ VAX*:ULTRIX*:*:*)
+ echo vax-dec-ultrix${UNAME_RELEASE}
+ exit ;;
+ 2020:CLIX:*:* | 2430:CLIX:*:*)
+ echo clipper-intergraph-clix${UNAME_RELEASE}
+ exit ;;
+ mips:*:*:UMIPS | mips:*:*:RISCos)
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+#ifdef __cplusplus
+#include <stdio.h> /* for printf() prototype */
+ int main (int argc, char *argv[]) {
+#else
+ int main (argc, argv) int argc; char *argv[]; {
+#endif
+ #if defined (host_mips) && defined (MIPSEB)
+ #if defined (SYSTYPE_SYSV)
+ printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
+ #endif
+ #if defined (SYSTYPE_SVR4)
+ printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
+ #endif
+ #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
+ printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
+ #endif
+ #endif
+ exit (-1);
+ }
+EOF
+ $CC_FOR_BUILD -o $dummy $dummy.c &&
+ dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` &&
+ SYSTEM_NAME=`$dummy $dummyarg` &&
+ { echo "$SYSTEM_NAME"; exit; }
+ echo mips-mips-riscos${UNAME_RELEASE}
+ exit ;;
+ Motorola:PowerMAX_OS:*:*)
+ echo powerpc-motorola-powermax
+ exit ;;
+ Motorola:*:4.3:PL8-*)
+ echo powerpc-harris-powermax
+ exit ;;
+ Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
+ echo powerpc-harris-powermax
+ exit ;;
+ Night_Hawk:Power_UNIX:*:*)
+ echo powerpc-harris-powerunix
+ exit ;;
+ m88k:CX/UX:7*:*)
+ echo m88k-harris-cxux7
+ exit ;;
+ m88k:*:4*:R4*)
+ echo m88k-motorola-sysv4
+ exit ;;
+ m88k:*:3*:R3*)
+ echo m88k-motorola-sysv3
+ exit ;;
+ AViiON:dgux:*:*)
+ # DG/UX returns AViiON for all architectures
+ UNAME_PROCESSOR=`/usr/bin/uname -p`
+ if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
+ then
+ if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
+ [ ${TARGET_BINARY_INTERFACE}x = x ]
+ then
+ echo m88k-dg-dgux${UNAME_RELEASE}
+ else
+ echo m88k-dg-dguxbcs${UNAME_RELEASE}
+ fi
+ else
+ echo i586-dg-dgux${UNAME_RELEASE}
+ fi
+ exit ;;
+ M88*:DolphinOS:*:*) # DolphinOS (SVR3)
+ echo m88k-dolphin-sysv3
+ exit ;;
+ M88*:*:R3*:*)
+ # Delta 88k system running SVR3
+ echo m88k-motorola-sysv3
+ exit ;;
+ XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
+ echo m88k-tektronix-sysv3
+ exit ;;
+ Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
+ echo m68k-tektronix-bsd
+ exit ;;
+ *:IRIX*:*:*)
+ echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
+ exit ;;
+ ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
+ echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id
+ exit ;; # Note that: echo "'`uname -s`'" gives 'AIX '
+ i*86:AIX:*:*)
+ echo i386-ibm-aix
+ exit ;;
+ ia64:AIX:*:*)
+ if [ -x /usr/bin/oslevel ] ; then
+ IBM_REV=`/usr/bin/oslevel`
+ else
+ IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+ fi
+ echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
+ exit ;;
+ *:AIX:2:3)
+ if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+ #include <sys/systemcfg.h>
+
+ main()
+ {
+ if (!__power_pc())
+ exit(1);
+ puts("powerpc-ibm-aix3.2.5");
+ exit(0);
+ }
+EOF
+ if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`
+ then
+ echo "$SYSTEM_NAME"
+ else
+ echo rs6000-ibm-aix3.2.5
+ fi
+ elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
+ echo rs6000-ibm-aix3.2.4
+ else
+ echo rs6000-ibm-aix3.2
+ fi
+ exit ;;
+ *:AIX:*:[4567])
+ IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
+ if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
+ IBM_ARCH=rs6000
+ else
+ IBM_ARCH=powerpc
+ fi
+ if [ -x /usr/bin/oslevel ] ; then
+ IBM_REV=`/usr/bin/oslevel`
+ else
+ IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+ fi
+ echo ${IBM_ARCH}-ibm-aix${IBM_REV}
+ exit ;;
+ *:AIX:*:*)
+ echo rs6000-ibm-aix
+ exit ;;
+ ibmrt:4.4BSD:*|romp-ibm:BSD:*)
+ echo romp-ibm-bsd4.4
+ exit ;;
+ ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and
+ echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to
+ exit ;; # report: romp-ibm BSD 4.3
+ *:BOSX:*:*)
+ echo rs6000-bull-bosx
+ exit ;;
+ DPX/2?00:B.O.S.:*:*)
+ echo m68k-bull-sysv3
+ exit ;;
+ 9000/[34]??:4.3bsd:1.*:*)
+ echo m68k-hp-bsd
+ exit ;;
+ hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
+ echo m68k-hp-bsd4.4
+ exit ;;
+ 9000/[34678]??:HP-UX:*:*)
+ HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+ case "${UNAME_MACHINE}" in
+ 9000/31? ) HP_ARCH=m68000 ;;
+ 9000/[34]?? ) HP_ARCH=m68k ;;
+ 9000/[678][0-9][0-9])
+ if [ -x /usr/bin/getconf ]; then
+ sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
+ sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+ case "${sc_cpu_version}" in
+ 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+ 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+ 532) # CPU_PA_RISC2_0
+ case "${sc_kernel_bits}" in
+ 32) HP_ARCH="hppa2.0n" ;;
+ 64) HP_ARCH="hppa2.0w" ;;
+ '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20
+ esac ;;
+ esac
+ fi
+ if [ "${HP_ARCH}" = "" ]; then
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+
+ #define _HPUX_SOURCE
+ #include <stdlib.h>
+ #include <unistd.h>
+
+ int main ()
+ {
+ #if defined(_SC_KERNEL_BITS)
+ long bits = sysconf(_SC_KERNEL_BITS);
+ #endif
+ long cpu = sysconf (_SC_CPU_VERSION);
+
+ switch (cpu)
+ {
+ case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+ case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+ case CPU_PA_RISC2_0:
+ #if defined(_SC_KERNEL_BITS)
+ switch (bits)
+ {
+ case 64: puts ("hppa2.0w"); break;
+ case 32: puts ("hppa2.0n"); break;
+ default: puts ("hppa2.0"); break;
+ } break;
+ #else /* !defined(_SC_KERNEL_BITS) */
+ puts ("hppa2.0"); break;
+ #endif
+ default: puts ("hppa1.0"); break;
+ }
+ exit (0);
+ }
+EOF
+ (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+ test -z "$HP_ARCH" && HP_ARCH=hppa
+ fi ;;
+ esac
+ if [ ${HP_ARCH} = "hppa2.0w" ]
+ then
+ eval $set_cc_for_build
+
+ # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
+ # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler
+ # generating 64-bit code. GNU and HP use different nomenclature:
+ #
+ # $ CC_FOR_BUILD=cc ./config.guess
+ # => hppa2.0w-hp-hpux11.23
+ # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
+ # => hppa64-hp-hpux11.23
+
+ if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
+ grep -q __LP64__
+ then
+ HP_ARCH="hppa2.0w"
+ else
+ HP_ARCH="hppa64"
+ fi
+ fi
+ echo ${HP_ARCH}-hp-hpux${HPUX_REV}
+ exit ;;
+ ia64:HP-UX:*:*)
+ HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+ echo ia64-hp-hpux${HPUX_REV}
+ exit ;;
+ 3050*:HI-UX:*:*)
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+ #include <unistd.h>
+ int
+ main ()
+ {
+ long cpu = sysconf (_SC_CPU_VERSION);
+ /* The order matters, because CPU_IS_HP_MC68K erroneously returns
+ true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct
+ results, however. */
+ if (CPU_IS_PA_RISC (cpu))
+ {
+ switch (cpu)
+ {
+ case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
+ case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
+ case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
+ default: puts ("hppa-hitachi-hiuxwe2"); break;
+ }
+ }
+ else if (CPU_IS_HP_MC68K (cpu))
+ puts ("m68k-hitachi-hiuxwe2");
+ else puts ("unknown-hitachi-hiuxwe2");
+ exit (0);
+ }
+EOF
+ $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&
+ { echo "$SYSTEM_NAME"; exit; }
+ echo unknown-hitachi-hiuxwe2
+ exit ;;
+ 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
+ echo hppa1.1-hp-bsd
+ exit ;;
+ 9000/8??:4.3bsd:*:*)
+ echo hppa1.0-hp-bsd
+ exit ;;
+ *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
+ echo hppa1.0-hp-mpeix
+ exit ;;
+ hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
+ echo hppa1.1-hp-osf
+ exit ;;
+ hp8??:OSF1:*:*)
+ echo hppa1.0-hp-osf
+ exit ;;
+ i*86:OSF1:*:*)
+ if [ -x /usr/sbin/sysversion ] ; then
+ echo ${UNAME_MACHINE}-${VENDOR}-osf1mk
+ else
+ echo ${UNAME_MACHINE}-${VENDOR}-osf1
+ fi
+ exit ;;
+ parisc*:Lites*:*:*)
+ echo hppa1.1-hp-lites
+ exit ;;
+ C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
+ echo c1-convex-bsd
+ exit ;;
+ C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
+ if getsysinfo -f scalar_acc
+ then echo c32-convex-bsd
+ else echo c2-convex-bsd
+ fi
+ exit ;;
+ C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
+ echo c34-convex-bsd
+ exit ;;
+ C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
+ echo c38-convex-bsd
+ exit ;;
+ C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
+ echo c4-convex-bsd
+ exit ;;
+ CRAY*Y-MP:*:*:*)
+ echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ exit ;;
+ CRAY*[A-Z]90:*:*:*)
+ echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
+ | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
+ -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
+ -e 's/\.[^.]*$/.X/'
+ exit ;;
+ CRAY*TS:*:*:*)
+ echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ exit ;;
+ CRAY*T3E:*:*:*)
+ echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ exit ;;
+ CRAY*SV1:*:*:*)
+ echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ exit ;;
+ *:UNICOS/mp:*:*)
+ echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ exit ;;
+ F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
+ FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+ FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+ echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+ exit ;;
+ 5000:UNIX_System_V:4.*:*)
+ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+ FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+ echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+ exit ;;
+ i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
+ echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
+ exit ;;
+ sparc*:BSD/OS:*:*)
+ echo sparc-${VENDOR}-bsdi${UNAME_RELEASE}
+ exit ;;
+ *:BSD/OS:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-bsdi${UNAME_RELEASE}
+ exit ;;
+ *:FreeBSD:*:*)
+ UNAME_PROCESSOR=`/usr/bin/uname -p`
+ case ${UNAME_PROCESSOR} in
+ amd64)
+ echo x86_64-${VENDOR}-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+ *)
+ echo ${UNAME_PROCESSOR}-${VENDOR}-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+ esac
+ exit ;;
+ i*:CYGWIN*:*)
+ echo ${UNAME_MACHINE}-pc-cygwin
+ exit ;;
+ *:MINGW64*:*)
+ echo ${UNAME_MACHINE}-pc-mingw64
+ exit ;;
+ *:MINGW*:*)
+ echo ${UNAME_MACHINE}-pc-mingw32
+ exit ;;
+ i*:MSYS*:*)
+ echo ${UNAME_MACHINE}-pc-msys
+ exit ;;
+ i*:windows32*:*)
+ # uname -m includes "-pc" on this system.
+ echo ${UNAME_MACHINE}-mingw32
+ exit ;;
+ i*:PW*:*)
+ echo ${UNAME_MACHINE}-pc-pw32
+ exit ;;
+ *:Interix*:*)
+ case ${UNAME_MACHINE} in
+ x86)
+ echo i586-pc-interix${UNAME_RELEASE}
+ exit ;;
+ authenticamd | genuineintel | EM64T)
+ echo x86_64-${VENDOR}-interix${UNAME_RELEASE}
+ exit ;;
+ IA64)
+ echo ia64-${VENDOR}-interix${UNAME_RELEASE}
+ exit ;;
+ esac ;;
+ [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
+ echo i${UNAME_MACHINE}-pc-mks
+ exit ;;
+ 8664:Windows_NT:*)
+ echo x86_64-pc-mks
+ exit ;;
+ i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
+ # How do we know it's Interix rather than the generic POSIX subsystem?
+ # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
+ # UNAME_MACHINE based on the output of uname instead of i386?
+ echo i586-pc-interix
+ exit ;;
+ i*:UWIN*:*)
+ echo ${UNAME_MACHINE}-pc-uwin
+ exit ;;
+ amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
+ echo x86_64-${VENDOR}-cygwin
+ exit ;;
+ p*:CYGWIN*:*)
+ echo powerpcle-${VENDOR}-cygwin
+ exit ;;
+ prep*:SunOS:5.*:*)
+ echo powerpcle-${VENDOR}-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit ;;
+ *:GNU:*:*)
+ # the GNU system
+ echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-${VENDOR}-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+ exit ;;
+ *:GNU/*:*:*)
+ # other systems with GNU libc and userland
+ echo ${UNAME_MACHINE}-${VENDOR}-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
+ exit ;;
+ i*86:Minix:*:*)
+ echo ${UNAME_MACHINE}-pc-minix
+ exit ;;
+ aarch64:Linux:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ aarch64_be:Linux:*:*)
+ UNAME_MACHINE=aarch64_be
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ alpha:Linux:*:*)
+ case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+ EV5) UNAME_MACHINE=alphaev5 ;;
+ EV56) UNAME_MACHINE=alphaev56 ;;
+ PCA56) UNAME_MACHINE=alphapca56 ;;
+ PCA57) UNAME_MACHINE=alphapca56 ;;
+ EV6) UNAME_MACHINE=alphaev6 ;;
+ EV67) UNAME_MACHINE=alphaev67 ;;
+ EV68*) UNAME_MACHINE=alphaev68 ;;
+ esac
+ objdump --private-headers /bin/sh | grep -q ld.so.1
+ if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ arc:Linux:*:* | arceb:Linux:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ arm*:Linux:*:*)
+ eval $set_cc_for_build
+ if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
+ | grep -q __ARM_EABI__
+ then
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ else
+ if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+ | grep -q __ARM_PCS_VFP
+ then
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}eabi
+ else
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}eabihf
+ fi
+ fi
+ exit ;;
+ avr32*:Linux:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ cris:Linux:*:*)
+ echo ${UNAME_MACHINE}-axis-linux-${LIBC}
+ exit ;;
+ crisv32:Linux:*:*)
+ echo ${UNAME_MACHINE}-axis-linux-${LIBC}
+ exit ;;
+ frv:Linux:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ hexagon:Linux:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ i*86:Linux:*:*)
+ echo ${UNAME_MACHINE}-pc-linux-${LIBC}
+ exit ;;
+ ia64:Linux:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ m32r*:Linux:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ m68*:Linux:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ mips:Linux:*:* | mips64:Linux:*:*)
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+ #undef CPU
+ #undef ${UNAME_MACHINE}
+ #undef ${UNAME_MACHINE}el
+ #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+ CPU=${UNAME_MACHINE}el
+ #else
+ #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+ CPU=${UNAME_MACHINE}
+ #else
+ CPU=
+ #endif
+ #endif
+EOF
+ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
+ test x"${CPU}" != x && { echo "${CPU}-${VENDOR}-linux-${LIBC}"; exit; }
+ ;;
+ or1k:Linux:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ or32:Linux:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ padre:Linux:*:*)
+ echo sparc-${VENDOR}-linux-${LIBC}
+ exit ;;
+ parisc64:Linux:*:* | hppa64:Linux:*:*)
+ echo hppa64-${VENDOR}-linux-${LIBC}
+ exit ;;
+ parisc:Linux:*:* | hppa:Linux:*:*)
+ # Look for CPU level
+ case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
+ PA7*) echo hppa1.1-${VENDOR}-linux-${LIBC} ;;
+ PA8*) echo hppa2.0-${VENDOR}-linux-${LIBC} ;;
+ *) echo hppa-${VENDOR}-linux-${LIBC} ;;
+ esac
+ exit ;;
+ ppc64:Linux:*:*)
+ echo powerpc64-${VENDOR}-linux-${LIBC}
+ exit ;;
+ ppc:Linux:*:*)
+ echo powerpc-${VENDOR}-linux-${LIBC}
+ exit ;;
+ ppc64le:Linux:*:*)
+ echo powerpc64le-${VENDOR}-linux-${LIBC}
+ exit ;;
+ ppcle:Linux:*:*)
+ echo powerpcle-${VENDOR}-linux-${LIBC}
+ exit ;;
+ s390:Linux:*:* | s390x:Linux:*:*)
+ echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
+ exit ;;
+ sh64*:Linux:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ sh*:Linux:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ sparc:Linux:*:* | sparc64:Linux:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ tile*:Linux:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ vax:Linux:*:*)
+ echo ${UNAME_MACHINE}-dec-linux-${LIBC}
+ exit ;;
+ x86_64:Linux:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ xtensa*:Linux:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-linux-${LIBC}
+ exit ;;
+ i*86:DYNIX/ptx:4*:*)
+ # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
+ # earlier versions are messed up and put the nodename in both
+ # sysname and nodename.
+ echo i386-sequent-sysv4
+ exit ;;
+ i*86:UNIX_SV:4.2MP:2.*)
+ # Unixware is an offshoot of SVR4, but it has its own version
+ # number series starting with 2...
+ # I am not positive that other SVR4 systems won't match this,
+ # I just have to hope. -- rms.
+ # Use sysv4.2uw... so that sysv4* matches it.
+ echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
+ exit ;;
+ i*86:OS/2:*:*)
+ # If we were able to find `uname', then EMX Unix compatibility
+ # is probably installed.
+ echo ${UNAME_MACHINE}-pc-os2-emx
+ exit ;;
+ i*86:XTS-300:*:STOP)
+ echo ${UNAME_MACHINE}-${VENDOR}-stop
+ exit ;;
+ i*86:atheos:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-atheos
+ exit ;;
+ i*86:syllable:*:*)
+ echo ${UNAME_MACHINE}-pc-syllable
+ exit ;;
+ i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
+ echo i386-${VENODR}-lynxos${UNAME_RELEASE}
+ exit ;;
+ i*86:*DOS:*:*)
+ echo ${UNAME_MACHINE}-pc-msdosdjgpp
+ exit ;;
+ i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
+ UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
+ if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
+ echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
+ else
+ echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
+ fi
+ exit ;;
+ i*86:*:5:[678]*)
+ # UnixWare 7.x, OpenUNIX and OpenServer 6.
+ case `/bin/uname -X | grep "^Machine"` in
+ *486*) UNAME_MACHINE=i486 ;;
+ *Pentium) UNAME_MACHINE=i586 ;;
+ *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
+ esac
+ echo ${UNAME_MACHINE}-${VENDOR}-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
+ exit ;;
+ i*86:*:3.2:*)
+ if test -f /usr/options/cb.name; then
+ UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+ echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
+ elif /bin/uname -X 2>/dev/null >/dev/null ; then
+ UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
+ (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
+ (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
+ && UNAME_MACHINE=i586
+ (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
+ && UNAME_MACHINE=i686
+ (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
+ && UNAME_MACHINE=i686
+ echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
+ else
+ echo ${UNAME_MACHINE}-pc-sysv32
+ fi
+ exit ;;
+ pc:*:*:*)
+ # Left here for compatibility:
+ # uname -m prints for DJGPP always 'pc', but it prints nothing about
+ # the processor, so we play safe by assuming i586.
+ # Note: whatever this is, it MUST be the same as what config.sub
+ # prints for the "djgpp" host, or else GDB configury will decide that
+ # this is a cross-build.
+ echo i586-pc-msdosdjgpp
+ exit ;;
+ Intel:Mach:3*:*)
+ echo i386-pc-mach3
+ exit ;;
+ paragon:*:*:*)
+ echo i860-intel-osf1
+ exit ;;
+ i860:*:4.*:*) # i860-SVR4
+ if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
+ echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
+ else # Add other i860-SVR4 vendors below as they are discovered.
+ echo i860-${VENODR}-sysv${UNAME_RELEASE} # Unknown i860-SVR4
+ fi
+ exit ;;
+ mini*:CTIX:SYS*5:*)
+ # "miniframe"
+ echo m68010-convergent-sysv
+ exit ;;
+ mc68k:UNIX:SYSTEM5:3.51m)
+ echo m68k-convergent-sysv
+ exit ;;
+ M680?0:D-NIX:5.3:*)
+ echo m68k-diab-dnix
+ exit ;;
+ M68*:*:R3V[5678]*:*)
+ test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
+ 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
+ OS_REL=''
+ test -r /etc/.relid \
+ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+ && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+ /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+ 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
+ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+ && { echo i486-ncr-sysv4; exit; } ;;
+ NCR*:*:4.2:* | MPRAS*:*:4.2:*)
+ OS_REL='.3'
+ test -r /etc/.relid \
+ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+ && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+ /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+ && { echo i586-ncr-sysv4.3${OS_REL}; exit; }
+ /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
+ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+ m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
+ echo m68k-${VENDOR}-lynxos${UNAME_RELEASE}
+ exit ;;
+ mc68030:UNIX_System_V:4.*:*)
+ echo m68k-atari-sysv4
+ exit ;;
+ TSUNAMI:LynxOS:2.*:*)
+ echo sparc-${VENDOR}-lynxos${UNAME_RELEASE}
+ exit ;;
+ rs6000:LynxOS:2.*:*)
+ echo rs6000-${VENDOR}-lynxos${UNAME_RELEASE}
+ exit ;;
+ PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
+ echo powerpc-${VENDOR}-lynxos${UNAME_RELEASE}
+ exit ;;
+ SM[BE]S:UNIX_SV:*:*)
+ echo mips-dde-sysv${UNAME_RELEASE}
+ exit ;;
+ RM*:ReliantUNIX-*:*:*)
+ echo mips-sni-sysv4
+ exit ;;
+ RM*:SINIX-*:*:*)
+ echo mips-sni-sysv4
+ exit ;;
+ *:SINIX-*:*:*)
+ if uname -p 2>/dev/null >/dev/null ; then
+ UNAME_MACHINE=`(uname -p) 2>/dev/null`
+ echo ${UNAME_MACHINE}-sni-sysv4
+ else
+ echo ns32k-sni-sysv
+ fi
+ exit ;;
+ PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+ # says <Richard.M.Bartel@ccMail.Census.GOV>
+ echo i586-unisys-sysv4
+ exit ;;
+ *:UNIX_System_V:4*:FTX*)
+ # From Gerald Hewes <hewes@openmarket.com>.
+ # How about differentiating between stratus architectures? -djm
+ echo hppa1.1-stratus-sysv4
+ exit ;;
+ *:*:*:FTX*)
+ # From seanf@swdc.stratus.com.
+ echo i860-stratus-sysv4
+ exit ;;
+ i*86:VOS:*:*)
+ # From Paul.Green@stratus.com.
+ echo ${UNAME_MACHINE}-stratus-vos
+ exit ;;
+ *:VOS:*:*)
+ # From Paul.Green@stratus.com.
+ echo hppa1.1-stratus-vos
+ exit ;;
+ mc68*:A/UX:*:*)
+ echo m68k-apple-aux${UNAME_RELEASE}
+ exit ;;
+ news*:NEWS-OS:6*:*)
+ echo mips-sony-newsos6
+ exit ;;
+ R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
+ if [ -d /usr/nec ]; then
+ echo mips-nec-sysv${UNAME_RELEASE}
+ else
+ echo mips-${VENDOR}-sysv${UNAME_RELEASE}
+ fi
+ exit ;;
+ BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only.
+ echo powerpc-be-beos
+ exit ;;
+ BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only.
+ echo powerpc-apple-beos
+ exit ;;
+ BePC:BeOS:*:*) # BeOS running on Intel PC compatible.
+ echo i586-pc-beos
+ exit ;;
+ BePC:Haiku:*:*) # Haiku running on Intel PC compatible.
+ echo i586-pc-haiku
+ exit ;;
+ x86_64:Haiku:*:*)
+ echo x86_64-${VENDOR}-haiku
+ exit ;;
+ SX-4:SUPER-UX:*:*)
+ echo sx4-nec-superux${UNAME_RELEASE}
+ exit ;;
+ SX-5:SUPER-UX:*:*)
+ echo sx5-nec-superux${UNAME_RELEASE}
+ exit ;;
+ SX-6:SUPER-UX:*:*)
+ echo sx6-nec-superux${UNAME_RELEASE}
+ exit ;;
+ SX-7:SUPER-UX:*:*)
+ echo sx7-nec-superux${UNAME_RELEASE}
+ exit ;;
+ SX-8:SUPER-UX:*:*)
+ echo sx8-nec-superux${UNAME_RELEASE}
+ exit ;;
+ SX-8R:SUPER-UX:*:*)
+ echo sx8r-nec-superux${UNAME_RELEASE}
+ exit ;;
+ Power*:Rhapsody:*:*)
+ echo powerpc-apple-rhapsody${UNAME_RELEASE}
+ exit ;;
+ *:Rhapsody:*:*)
+ echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
+ exit ;;
+ *:Darwin:*:*)
+ UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
+ eval $set_cc_for_build
+ if test "$UNAME_PROCESSOR" = unknown ; then
+ UNAME_PROCESSOR=powerpc
+ fi
+ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+ if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+ grep IS_64BIT_ARCH >/dev/null
+ then
+ case $UNAME_PROCESSOR in
+ i386) UNAME_PROCESSOR=x86_64 ;;
+ powerpc) UNAME_PROCESSOR=powerpc64 ;;
+ esac
+ fi
+ fi
+ echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
+ exit ;;
+ *:procnto*:*:* | *:QNX:[0123456789]*:*)
+ UNAME_PROCESSOR=`uname -p`
+ if test "$UNAME_PROCESSOR" = "x86"; then
+ UNAME_PROCESSOR=i386
+ UNAME_MACHINE=pc
+ fi
+ echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
+ exit ;;
+ *:QNX:*:4*)
+ echo i386-pc-qnx
+ exit ;;
+ NEO-?:NONSTOP_KERNEL:*:*)
+ echo neo-tandem-nsk${UNAME_RELEASE}
+ exit ;;
+ NSE-*:NONSTOP_KERNEL:*:*)
+ echo nse-tandem-nsk${UNAME_RELEASE}
+ exit ;;
+ NSR-?:NONSTOP_KERNEL:*:*)
+ echo nsr-tandem-nsk${UNAME_RELEASE}
+ exit ;;
+ *:NonStop-UX:*:*)
+ echo mips-compaq-nonstopux
+ exit ;;
+ BS2000:POSIX*:*:*)
+ echo bs2000-siemens-sysv
+ exit ;;
+ DS/*:UNIX_System_V:*:*)
+ echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
+ exit ;;
+ *:Plan9:*:*)
+ # "uname -m" is not consistent, so use $cputype instead. 386
+ # is converted to i386 for consistency with other x86
+ # operating systems.
+ if test "$cputype" = "386"; then
+ UNAME_MACHINE=i386
+ else
+ UNAME_MACHINE="$cputype"
+ fi
+ echo ${UNAME_MACHINE}-${VENDOR}-plan9
+ exit ;;
+ *:TOPS-10:*:*)
+ echo pdp10-${VENDOR}-tops10
+ exit ;;
+ *:TENEX:*:*)
+ echo pdp10-${VENDOR}-tenex
+ exit ;;
+ KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
+ echo pdp10-dec-tops20
+ exit ;;
+ XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
+ echo pdp10-xkl-tops20
+ exit ;;
+ *:TOPS-20:*:*)
+ echo pdp10-${VENDOR}-tops20
+ exit ;;
+ *:ITS:*:*)
+ echo pdp10-${VENDOR}-its
+ exit ;;
+ SEI:*:*:SEIUX)
+ echo mips-sei-seiux${UNAME_RELEASE}
+ exit ;;
+ *:DragonFly:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
+ exit ;;
+ *:*VMS:*:*)
+ UNAME_MACHINE=`(uname -p) 2>/dev/null`
+ case "${UNAME_MACHINE}" in
+ A*) echo alpha-dec-vms ; exit ;;
+ I*) echo ia64-dec-vms ; exit ;;
+ V*) echo vax-dec-vms ; exit ;;
+ esac ;;
+ *:XENIX:*:SysV)
+ echo i386-pc-xenix
+ exit ;;
+ i*86:skyos:*:*)
+ echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
+ exit ;;
+ i*86:rdos:*:*)
+ echo ${UNAME_MACHINE}-pc-rdos
+ exit ;;
+ i*86:AROS:*:*)
+ echo ${UNAME_MACHINE}-pc-aros
+ exit ;;
+ x86_64:VMkernel:*:*)
+ echo ${UNAME_MACHINE}-${VENDOR}-esx
+ exit ;;
+esac
+
+eval $set_cc_for_build
+cat >$dummy.c <<EOF
+#ifdef _SEQUENT_
+# include <sys/types.h>
+# include <sys/utsname.h>
+#endif
+main ()
+{
+#if defined (sony)
+#if defined (MIPSEB)
+ /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed,
+ I don't know.... */
+ printf ("mips-sony-bsd\n"); exit (0);
+#else
+#include <sys/param.h>
+ printf ("m68k-sony-newsos%s\n",
+#ifdef NEWSOS4
+ "4"
+#else
+ ""
+#endif
+ ); exit (0);
+#endif
+#endif
+
+#if defined (__arm) && defined (__acorn) && defined (__unix)
+ printf ("arm-acorn-riscix\n"); exit (0);
+#endif
+
+#if defined (hp300) && !defined (hpux)
+ printf ("m68k-hp-bsd\n"); exit (0);
+#endif
+
+#if defined (NeXT)
+#if !defined (__ARCHITECTURE__)
+#define __ARCHITECTURE__ "m68k"
+#endif
+ int version;
+ version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
+ if (version < 4)
+ printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
+ else
+ printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
+ exit (0);
+#endif
+
+#if defined (MULTIMAX) || defined (n16)
+#if defined (UMAXV)
+ printf ("ns32k-encore-sysv\n"); exit (0);
+#else
+#if defined (CMU)
+ printf ("ns32k-encore-mach\n"); exit (0);
+#else
+ printf ("ns32k-encore-bsd\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (__386BSD__)
+ printf ("i386-pc-bsd\n"); exit (0);
+#endif
+
+#if defined (sequent)
+#if defined (i386)
+ printf ("i386-sequent-dynix\n"); exit (0);
+#endif
+#if defined (ns32000)
+ printf ("ns32k-sequent-dynix\n"); exit (0);
+#endif
+#endif
+
+#if defined (_SEQUENT_)
+ struct utsname un;
+
+ uname(&un);
+
+ if (strncmp(un.version, "V2", 2) == 0) {
+ printf ("i386-sequent-ptx2\n"); exit (0);
+ }
+ if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
+ printf ("i386-sequent-ptx1\n"); exit (0);
+ }
+ printf ("i386-sequent-ptx\n"); exit (0);
+
+#endif
+
+#if defined (vax)
+# if !defined (ultrix)
+# include <sys/param.h>
+# if defined (BSD)
+# if BSD == 43
+ printf ("vax-dec-bsd4.3\n"); exit (0);
+# else
+# if BSD == 199006
+ printf ("vax-dec-bsd4.3reno\n"); exit (0);
+# else
+ printf ("vax-dec-bsd\n"); exit (0);
+# endif
+# endif
+# else
+ printf ("vax-dec-bsd\n"); exit (0);
+# endif
+# else
+ printf ("vax-dec-ultrix\n"); exit (0);
+# endif
+#endif
+
+#if defined (alliant) && defined (i860)
+ printf ("i860-alliant-bsd\n"); exit (0);
+#endif
+
+ exit (1);
+}
+EOF
+
+$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` &&
+ { echo "$SYSTEM_NAME"; exit; }
+
+# Apollos put the system type in the environment.
+
+test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; }
+
+# Convex versions that predate uname can use getsysinfo(1)
+
+if [ -x /usr/convex/getsysinfo ]
+then
+ case `getsysinfo -f cpu_type` in
+ c1*)
+ echo c1-convex-bsd
+ exit ;;
+ c2*)
+ if getsysinfo -f scalar_acc
+ then echo c32-convex-bsd
+ else echo c2-convex-bsd
+ fi
+ exit ;;
+ c34*)
+ echo c34-convex-bsd
+ exit ;;
+ c38*)
+ echo c38-convex-bsd
+ exit ;;
+ c4*)
+ echo c4-convex-bsd
+ exit ;;
+ esac
+fi
+
+cat >&2 <<EOF
+$0: unable to guess system type
+
+This script, last modified $timestamp, has failed to recognize
+the operating system you are using. It is advised that you
+download the most up to date version of the config scripts from
+
+ http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+and
+ http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+
+If the version you run ($0) is already up to date, please
+send the following data and any information you think might be
+pertinent to <config-patches@gnu.org> in order to provide the needed
+information to handle your system.
+
+config.guess timestamp = $timestamp
+
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
+/bin/uname -X = `(/bin/uname -X) 2>/dev/null`
+
+hostinfo = `(hostinfo) 2>/dev/null`
+/bin/universe = `(/bin/universe) 2>/dev/null`
+/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null`
+/bin/arch = `(/bin/arch) 2>/dev/null`
+/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
+
+UNAME_MACHINE = ${UNAME_MACHINE}
+UNAME_RELEASE = ${UNAME_RELEASE}
+UNAME_SYSTEM = ${UNAME_SYSTEM}
+UNAME_VERSION = ${UNAME_VERSION}
+EOF
+
+exit 1
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/config/config.sub b/config/config.sub
new file mode 100755
index 00000000..8b612ab8
--- /dev/null
+++ b/config/config.sub
@@ -0,0 +1,1788 @@
+#! /bin/sh
+# Configuration validation subroutine script.
+# Copyright 1992-2013 Free Software Foundation, Inc.
+
+timestamp='2013-04-24'
+
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that
+# program. This Exception is an additional permission under section 7
+# of the GNU General Public License, version 3 ("GPLv3").
+
+
+# Please send patches with a ChangeLog entry to config-patches@gnu.org.
+#
+# Configuration subroutine to validate and canonicalize a configuration type.
+# Supply the specified configuration type as an argument.
+# If it is invalid, we print an error message on stderr and exit with code 1.
+# Otherwise, we print the canonical config type on stdout and succeed.
+
+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+
+# This file is supposed to be the same for all GNU packages
+# and recognize all the CPU types, system types and aliases
+# that are meaningful with *any* GNU software.
+# Each package is responsible for reporting which valid configurations
+# it does not support. The user should be able to distinguish
+# a failure to support a valid configuration from a meaningless
+# configuration.
+
+# The goal of this file is to map all the various variations of a given
+# machine specification into a single specification in the form:
+# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or in some cases, the newer four-part form:
+# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+# It is wrong to echo any other type of specification.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION] CPU-MFR-OPSYS
+ $0 [OPTION] ALIAS
+
+Canonicalize a configuration name.
+
+Operation modes:
+ -h, --help print this help, then exit
+ -t, --time-stamp print date of last modification, then exit
+ -v, --version print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.sub ($timestamp)
+
+Copyright 1992-2013 Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+ case $1 in
+ --time-stamp | --time* | -t )
+ echo "$timestamp" ; exit ;;
+ --version | -v )
+ echo "$version" ; exit ;;
+ --help | --h* | -h )
+ echo "$usage"; exit ;;
+ -- ) # Stop option processing
+ shift; break ;;
+ - ) # Use stdin as input.
+ break ;;
+ -* )
+ echo "$me: invalid option $1$help"
+ exit 1 ;;
+
+ *local*)
+ # First pass through any local machine types.
+ echo $1
+ exit ;;
+
+ * )
+ break ;;
+ esac
+done
+
+case $# in
+ 0) echo "$me: missing argument$help" >&2
+ exit 1;;
+ 1) ;;
+ *) echo "$me: too many arguments$help" >&2
+ exit 1;;
+esac
+
+# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
+# Here we must recognize all the valid KERNEL-OS combinations.
+maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
+case $maybe_os in
+ nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
+ linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
+ knetbsd*-gnu* | netbsd*-gnu* | \
+ kopensolaris*-gnu* | \
+ storm-chaos* | os2-emx* | rtmk-nova*)
+ os=-$maybe_os
+ basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
+ ;;
+ android-linux)
+ os=-linux-android
+ basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
+ ;;
+ *)
+ basic_machine=`echo $1 | sed 's/-[^-]*$//'`
+ if [ $basic_machine != $1 ]
+ then os=`echo $1 | sed 's/.*-/-/'`
+ else os=; fi
+ ;;
+esac
+
+### Let's recognize common machines as not being operating systems so
+### that things like config.sub decstation-3100 work. We also
+### recognize some manufacturers as not being operating systems, so we
+### can provide default operating systems below.
+case $os in
+ -sun*os*)
+ # Prevent following clause from handling this invalid input.
+ ;;
+ -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
+ -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
+ -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
+ -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
+ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
+ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
+ -apple | -axis | -knuth | -cray | -microblaze*)
+ os=
+ basic_machine=$1
+ ;;
+ -bluegene*)
+ os=-cnk
+ ;;
+ -sim | -cisco | -oki | -wec | -winbond)
+ os=
+ basic_machine=$1
+ ;;
+ -scout)
+ ;;
+ -wrs)
+ os=-vxworks
+ basic_machine=$1
+ ;;
+ -chorusos*)
+ os=-chorusos
+ basic_machine=$1
+ ;;
+ -chorusrdb)
+ os=-chorusrdb
+ basic_machine=$1
+ ;;
+ -hiux*)
+ os=-hiuxwe2
+ ;;
+ -sco6)
+ os=-sco5v6
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco5)
+ os=-sco3.2v5
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco4)
+ os=-sco3.2v4
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco3.2.[4-9]*)
+ os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco3.2v[4-9]*)
+ # Don't forget version if it is 3.2v4 or newer.
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco5v6*)
+ # Don't forget version if it is 3.2v4 or newer.
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco*)
+ os=-sco3.2v2
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -udk*)
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -isc)
+ os=-isc2.2
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -clix*)
+ basic_machine=clipper-intergraph
+ ;;
+ -isc*)
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -lynx*178)
+ os=-lynxos178
+ ;;
+ -lynx*5)
+ os=-lynxos5
+ ;;
+ -lynx*)
+ os=-lynxos
+ ;;
+ -ptx*)
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
+ ;;
+ -windowsnt*)
+ os=`echo $os | sed -e 's/windowsnt/winnt/'`
+ ;;
+ -psos*)
+ os=-psos
+ ;;
+ -mint | -mint[0-9]*)
+ basic_machine=m68k-atari
+ os=-mint
+ ;;
+esac
+
+# Decode aliases for certain CPU-COMPANY combinations.
+case $basic_machine in
+ # Recognize the basic CPU types without company name.
+ # Some are omitted here because they have special meanings below.
+ 1750a | 580 \
+ | a29k \
+ | aarch64 | aarch64_be \
+ | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
+ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
+ | am33_2.0 \
+ | arc | arceb \
+ | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
+ | avr | avr32 \
+ | be32 | be64 \
+ | bfin \
+ | c4x | clipper \
+ | d10v | d30v | dlx | dsp16xx \
+ | epiphany \
+ | fido | fr30 | frv \
+ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+ | hexagon \
+ | i370 | i860 | i960 | ia64 \
+ | ip2k | iq2000 \
+ | le32 | le64 \
+ | lm32 \
+ | m32c | m32r | m32rle | m68000 | m68k | m88k \
+ | maxq | mb | microblaze | microblazeel | mcore | mep | metag \
+ | mips | mipsbe | mipseb | mipsel | mipsle \
+ | mips16 \
+ | mips64 | mips64el \
+ | mips64octeon | mips64octeonel \
+ | mips64orion | mips64orionel \
+ | mips64r5900 | mips64r5900el \
+ | mips64vr | mips64vrel \
+ | mips64vr4100 | mips64vr4100el \
+ | mips64vr4300 | mips64vr4300el \
+ | mips64vr5000 | mips64vr5000el \
+ | mips64vr5900 | mips64vr5900el \
+ | mipsisa32 | mipsisa32el \
+ | mipsisa32r2 | mipsisa32r2el \
+ | mipsisa64 | mipsisa64el \
+ | mipsisa64r2 | mipsisa64r2el \
+ | mipsisa64sb1 | mipsisa64sb1el \
+ | mipsisa64sr71k | mipsisa64sr71kel \
+ | mipsr5900 | mipsr5900el \
+ | mipstx39 | mipstx39el \
+ | mn10200 | mn10300 \
+ | moxie \
+ | mt \
+ | msp430 \
+ | nds32 | nds32le | nds32be \
+ | nios | nios2 | nios2eb | nios2el \
+ | ns16k | ns32k \
+ | open8 \
+ | or1k | or32 \
+ | pdp10 | pdp11 | pj | pjl \
+ | powerpc | powerpc64 | powerpc64le | powerpcle \
+ | pyramid \
+ | rl78 | rx \
+ | score \
+ | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
+ | sh64 | sh64le \
+ | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
+ | sparcv8 | sparcv9 | sparcv9b | sparcv9v \
+ | spu \
+ | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
+ | ubicom32 \
+ | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
+ | we32k \
+ | x86 | xc16x | xstormy16 | xtensa \
+ | z8k | z80)
+ basic_machine=$basic_machine-unknown
+ ;;
+ c54x)
+ basic_machine=tic54x-unknown
+ ;;
+ c55x)
+ basic_machine=tic55x-unknown
+ ;;
+ c6x)
+ basic_machine=tic6x-unknown
+ ;;
+ m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
+ basic_machine=$basic_machine-unknown
+ os=-none
+ ;;
+ m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
+ ;;
+ ms1)
+ basic_machine=mt-unknown
+ ;;
+
+ strongarm | thumb | xscale)
+ basic_machine=arm-unknown
+ ;;
+ xgate)
+ basic_machine=$basic_machine-unknown
+ os=-none
+ ;;
+ xscaleeb)
+ basic_machine=armeb-unknown
+ ;;
+
+ xscaleel)
+ basic_machine=armel-unknown
+ ;;
+
+ # We use `pc' rather than `unknown'
+ # because (1) that's what they normally are, and
+ # (2) the word "unknown" tends to confuse beginning users.
+ i*86 | x86_64)
+ basic_machine=$basic_machine-pc
+ ;;
+ # Object if more than one company name word.
+ *-*-*)
+ echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+ exit 1
+ ;;
+ # Recognize the basic CPU types with company name.
+ 580-* \
+ | a29k-* \
+ | aarch64-* | aarch64_be-* \
+ | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
+ | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
+ | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
+ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \
+ | avr-* | avr32-* \
+ | be32-* | be64-* \
+ | bfin-* | bs2000-* \
+ | c[123]* | c30-* | [cjt]90-* | c4x-* \
+ | clipper-* | craynv-* | cydra-* \
+ | d10v-* | d30v-* | dlx-* \
+ | elxsi-* \
+ | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
+ | h8300-* | h8500-* \
+ | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+ | hexagon-* \
+ | i*86-* | i860-* | i960-* | ia64-* \
+ | ip2k-* | iq2000-* \
+ | le32-* | le64-* \
+ | lm32-* \
+ | m32c-* | m32r-* | m32rle-* \
+ | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
+ | m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
+ | microblaze-* | microblazeel-* \
+ | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
+ | mips16-* \
+ | mips64-* | mips64el-* \
+ | mips64octeon-* | mips64octeonel-* \
+ | mips64orion-* | mips64orionel-* \
+ | mips64r5900-* | mips64r5900el-* \
+ | mips64vr-* | mips64vrel-* \
+ | mips64vr4100-* | mips64vr4100el-* \
+ | mips64vr4300-* | mips64vr4300el-* \
+ | mips64vr5000-* | mips64vr5000el-* \
+ | mips64vr5900-* | mips64vr5900el-* \
+ | mipsisa32-* | mipsisa32el-* \
+ | mipsisa32r2-* | mipsisa32r2el-* \
+ | mipsisa64-* | mipsisa64el-* \
+ | mipsisa64r2-* | mipsisa64r2el-* \
+ | mipsisa64sb1-* | mipsisa64sb1el-* \
+ | mipsisa64sr71k-* | mipsisa64sr71kel-* \
+ | mipsr5900-* | mipsr5900el-* \
+ | mipstx39-* | mipstx39el-* \
+ | mmix-* \
+ | mt-* \
+ | msp430-* \
+ | nds32-* | nds32le-* | nds32be-* \
+ | nios-* | nios2-* | nios2eb-* | nios2el-* \
+ | none-* | np1-* | ns16k-* | ns32k-* \
+ | open8-* \
+ | orion-* \
+ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
+ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
+ | pyramid-* \
+ | rl78-* | romp-* | rs6000-* | rx-* \
+ | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
+ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
+ | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
+ | sparclite-* \
+ | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
+ | tahoe-* \
+ | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
+ | tile*-* \
+ | tron-* \
+ | ubicom32-* \
+ | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
+ | vax-* \
+ | we32k-* \
+ | x86-* | x86_64-* | xc16x-* | xps100-* \
+ | xstormy16-* | xtensa*-* \
+ | ymp-* \
+ | z8k-* | z80-*)
+ ;;
+ # Recognize the basic CPU types without company name, with glob match.
+ xtensa*)
+ basic_machine=$basic_machine-unknown
+ ;;
+ # Recognize the various machine names and aliases which stand
+ # for a CPU type and a company and sometimes even an OS.
+ 386bsd)
+ basic_machine=i386-unknown
+ os=-bsd
+ ;;
+ 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
+ basic_machine=m68000-att
+ ;;
+ 3b*)
+ basic_machine=we32k-att
+ ;;
+ a29khif)
+ basic_machine=a29k-amd
+ os=-udi
+ ;;
+ abacus)
+ basic_machine=abacus-unknown
+ ;;
+ adobe68k)
+ basic_machine=m68010-adobe
+ os=-scout
+ ;;
+ alliant | fx80)
+ basic_machine=fx80-alliant
+ ;;
+ altos | altos3068)
+ basic_machine=m68k-altos
+ ;;
+ am29k)
+ basic_machine=a29k-none
+ os=-bsd
+ ;;
+ amd64)
+ basic_machine=x86_64-pc
+ ;;
+ amd64-*)
+ basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ amdahl)
+ basic_machine=580-amdahl
+ os=-sysv
+ ;;
+ amiga | amiga-*)
+ basic_machine=m68k-unknown
+ ;;
+ amigaos | amigados)
+ basic_machine=m68k-unknown
+ os=-amigaos
+ ;;
+ amigaunix | amix)
+ basic_machine=m68k-unknown
+ os=-sysv4
+ ;;
+ apollo68)
+ basic_machine=m68k-apollo
+ os=-sysv
+ ;;
+ apollo68bsd)
+ basic_machine=m68k-apollo
+ os=-bsd
+ ;;
+ aros)
+ basic_machine=i386-pc
+ os=-aros
+ ;;
+ aux)
+ basic_machine=m68k-apple
+ os=-aux
+ ;;
+ balance)
+ basic_machine=ns32k-sequent
+ os=-dynix
+ ;;
+ blackfin)
+ basic_machine=bfin-unknown
+ os=-linux
+ ;;
+ blackfin-*)
+ basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
+ os=-linux
+ ;;
+ bluegene*)
+ basic_machine=powerpc-ibm
+ os=-cnk
+ ;;
+ c54x-*)
+ basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ c55x-*)
+ basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ c6x-*)
+ basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ c90)
+ basic_machine=c90-cray
+ os=-unicos
+ ;;
+ cegcc)
+ basic_machine=arm-unknown
+ os=-cegcc
+ ;;
+ convex-c1)
+ basic_machine=c1-convex
+ os=-bsd
+ ;;
+ convex-c2)
+ basic_machine=c2-convex
+ os=-bsd
+ ;;
+ convex-c32)
+ basic_machine=c32-convex
+ os=-bsd
+ ;;
+ convex-c34)
+ basic_machine=c34-convex
+ os=-bsd
+ ;;
+ convex-c38)
+ basic_machine=c38-convex
+ os=-bsd
+ ;;
+ cray | j90)
+ basic_machine=j90-cray
+ os=-unicos
+ ;;
+ craynv)
+ basic_machine=craynv-cray
+ os=-unicosmp
+ ;;
+ cr16 | cr16-*)
+ basic_machine=cr16-unknown
+ os=-elf
+ ;;
+ crds | unos)
+ basic_machine=m68k-crds
+ ;;
+ crisv32 | crisv32-* | etraxfs*)
+ basic_machine=crisv32-axis
+ ;;
+ cris | cris-* | etrax*)
+ basic_machine=cris-axis
+ ;;
+ crx)
+ basic_machine=crx-unknown
+ os=-elf
+ ;;
+ da30 | da30-*)
+ basic_machine=m68k-da30
+ ;;
+ decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
+ basic_machine=mips-dec
+ ;;
+ decsystem10* | dec10*)
+ basic_machine=pdp10-dec
+ os=-tops10
+ ;;
+ decsystem20* | dec20*)
+ basic_machine=pdp10-dec
+ os=-tops20
+ ;;
+ delta | 3300 | motorola-3300 | motorola-delta \
+ | 3300-motorola | delta-motorola)
+ basic_machine=m68k-motorola
+ ;;
+ delta88)
+ basic_machine=m88k-motorola
+ os=-sysv3
+ ;;
+ dicos)
+ basic_machine=i686-pc
+ os=-dicos
+ ;;
+ djgpp)
+ basic_machine=i586-pc
+ os=-msdosdjgpp
+ ;;
+ dpx20 | dpx20-*)
+ basic_machine=rs6000-bull
+ os=-bosx
+ ;;
+ dpx2* | dpx2*-bull)
+ basic_machine=m68k-bull
+ os=-sysv3
+ ;;
+ ebmon29k)
+ basic_machine=a29k-amd
+ os=-ebmon
+ ;;
+ elxsi)
+ basic_machine=elxsi-elxsi
+ os=-bsd
+ ;;
+ encore | umax | mmax)
+ basic_machine=ns32k-encore
+ ;;
+ es1800 | OSE68k | ose68k | ose | OSE)
+ basic_machine=m68k-ericsson
+ os=-ose
+ ;;
+ fx2800)
+ basic_machine=i860-alliant
+ ;;
+ genix)
+ basic_machine=ns32k-ns
+ ;;
+ gmicro)
+ basic_machine=tron-gmicro
+ os=-sysv
+ ;;
+ go32)
+ basic_machine=i386-pc
+ os=-go32
+ ;;
+ h3050r* | hiux*)
+ basic_machine=hppa1.1-hitachi
+ os=-hiuxwe2
+ ;;
+ h8300hms)
+ basic_machine=h8300-hitachi
+ os=-hms
+ ;;
+ h8300xray)
+ basic_machine=h8300-hitachi
+ os=-xray
+ ;;
+ h8500hms)
+ basic_machine=h8500-hitachi
+ os=-hms
+ ;;
+ harris)
+ basic_machine=m88k-harris
+ os=-sysv3
+ ;;
+ hp300-*)
+ basic_machine=m68k-hp
+ ;;
+ hp300bsd)
+ basic_machine=m68k-hp
+ os=-bsd
+ ;;
+ hp300hpux)
+ basic_machine=m68k-hp
+ os=-hpux
+ ;;
+ hp3k9[0-9][0-9] | hp9[0-9][0-9])
+ basic_machine=hppa1.0-hp
+ ;;
+ hp9k2[0-9][0-9] | hp9k31[0-9])
+ basic_machine=m68000-hp
+ ;;
+ hp9k3[2-9][0-9])
+ basic_machine=m68k-hp
+ ;;
+ hp9k6[0-9][0-9] | hp6[0-9][0-9])
+ basic_machine=hppa1.0-hp
+ ;;
+ hp9k7[0-79][0-9] | hp7[0-79][0-9])
+ basic_machine=hppa1.1-hp
+ ;;
+ hp9k78[0-9] | hp78[0-9])
+ # FIXME: really hppa2.0-hp
+ basic_machine=hppa1.1-hp
+ ;;
+ hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
+ # FIXME: really hppa2.0-hp
+ basic_machine=hppa1.1-hp
+ ;;
+ hp9k8[0-9][13679] | hp8[0-9][13679])
+ basic_machine=hppa1.1-hp
+ ;;
+ hp9k8[0-9][0-9] | hp8[0-9][0-9])
+ basic_machine=hppa1.0-hp
+ ;;
+ hppa-next)
+ os=-nextstep3
+ ;;
+ hppaosf)
+ basic_machine=hppa1.1-hp
+ os=-osf
+ ;;
+ hppro)
+ basic_machine=hppa1.1-hp
+ os=-proelf
+ ;;
+ i370-ibm* | ibm*)
+ basic_machine=i370-ibm
+ ;;
+ i*86v32)
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+ os=-sysv32
+ ;;
+ i*86v4*)
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+ os=-sysv4
+ ;;
+ i*86v)
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+ os=-sysv
+ ;;
+ i*86sol2)
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+ os=-solaris2
+ ;;
+ i386mach)
+ basic_machine=i386-mach
+ os=-mach
+ ;;
+ i386-vsta | vsta)
+ basic_machine=i386-unknown
+ os=-vsta
+ ;;
+ iris | iris4d)
+ basic_machine=mips-sgi
+ case $os in
+ -irix*)
+ ;;
+ *)
+ os=-irix4
+ ;;
+ esac
+ ;;
+ isi68 | isi)
+ basic_machine=m68k-isi
+ os=-sysv
+ ;;
+ m68knommu)
+ basic_machine=m68k-unknown
+ os=-linux
+ ;;
+ m68knommu-*)
+ basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
+ os=-linux
+ ;;
+ m88k-omron*)
+ basic_machine=m88k-omron
+ ;;
+ magnum | m3230)
+ basic_machine=mips-mips
+ os=-sysv
+ ;;
+ merlin)
+ basic_machine=ns32k-utek
+ os=-sysv
+ ;;
+ microblaze*)
+ basic_machine=microblaze-xilinx
+ ;;
+ mingw64)
+ basic_machine=x86_64-pc
+ os=-mingw64
+ ;;
+ mingw32)
+ basic_machine=i386-pc
+ os=-mingw32
+ ;;
+ mingw32ce)
+ basic_machine=arm-unknown
+ os=-mingw32ce
+ ;;
+ miniframe)
+ basic_machine=m68000-convergent
+ ;;
+ *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
+ basic_machine=m68k-atari
+ os=-mint
+ ;;
+ mips3*-*)
+ basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
+ ;;
+ mips3*)
+ basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
+ ;;
+ monitor)
+ basic_machine=m68k-rom68k
+ os=-coff
+ ;;
+ morphos)
+ basic_machine=powerpc-unknown
+ os=-morphos
+ ;;
+ msdos)
+ basic_machine=i386-pc
+ os=-msdos
+ ;;
+ ms1-*)
+ basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
+ ;;
+ msys)
+ basic_machine=i386-pc
+ os=-msys
+ ;;
+ mvs)
+ basic_machine=i370-ibm
+ os=-mvs
+ ;;
+ nacl)
+ basic_machine=le32-unknown
+ os=-nacl
+ ;;
+ ncr3000)
+ basic_machine=i486-ncr
+ os=-sysv4
+ ;;
+ netbsd386)
+ basic_machine=i386-unknown
+ os=-netbsd
+ ;;
+ netwinder)
+ basic_machine=armv4l-rebel
+ os=-linux
+ ;;
+ news | news700 | news800 | news900)
+ basic_machine=m68k-sony
+ os=-newsos
+ ;;
+ news1000)
+ basic_machine=m68030-sony
+ os=-newsos
+ ;;
+ news-3600 | risc-news)
+ basic_machine=mips-sony
+ os=-newsos
+ ;;
+ necv70)
+ basic_machine=v70-nec
+ os=-sysv
+ ;;
+ next | m*-next )
+ basic_machine=m68k-next
+ case $os in
+ -nextstep* )
+ ;;
+ -ns2*)
+ os=-nextstep2
+ ;;
+ *)
+ os=-nextstep3
+ ;;
+ esac
+ ;;
+ nh3000)
+ basic_machine=m68k-harris
+ os=-cxux
+ ;;
+ nh[45]000)
+ basic_machine=m88k-harris
+ os=-cxux
+ ;;
+ nindy960)
+ basic_machine=i960-intel
+ os=-nindy
+ ;;
+ mon960)
+ basic_machine=i960-intel
+ os=-mon960
+ ;;
+ nonstopux)
+ basic_machine=mips-compaq
+ os=-nonstopux
+ ;;
+ np1)
+ basic_machine=np1-gould
+ ;;
+ neo-tandem)
+ basic_machine=neo-tandem
+ ;;
+ nse-tandem)
+ basic_machine=nse-tandem
+ ;;
+ nsr-tandem)
+ basic_machine=nsr-tandem
+ ;;
+ op50n-* | op60c-*)
+ basic_machine=hppa1.1-oki
+ os=-proelf
+ ;;
+ openrisc | openrisc-*)
+ basic_machine=or32-unknown
+ ;;
+ os400)
+ basic_machine=powerpc-ibm
+ os=-os400
+ ;;
+ OSE68000 | ose68000)
+ basic_machine=m68000-ericsson
+ os=-ose
+ ;;
+ os68k)
+ basic_machine=m68k-none
+ os=-os68k
+ ;;
+ pa-hitachi)
+ basic_machine=hppa1.1-hitachi
+ os=-hiuxwe2
+ ;;
+ paragon)
+ basic_machine=i860-intel
+ os=-osf
+ ;;
+ parisc)
+ basic_machine=hppa-unknown
+ os=-linux
+ ;;
+ parisc-*)
+ basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
+ os=-linux
+ ;;
+ pbd)
+ basic_machine=sparc-tti
+ ;;
+ pbb)
+ basic_machine=m68k-tti
+ ;;
+ pc532 | pc532-*)
+ basic_machine=ns32k-pc532
+ ;;
+ pc98)
+ basic_machine=i386-pc
+ ;;
+ pc98-*)
+ basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ pentium | p5 | k5 | k6 | nexgen | viac3)
+ basic_machine=i586-pc
+ ;;
+ pentiumpro | p6 | 6x86 | athlon | athlon_*)
+ basic_machine=i686-pc
+ ;;
+ pentiumii | pentium2 | pentiumiii | pentium3)
+ basic_machine=i686-pc
+ ;;
+ pentium4)
+ basic_machine=i786-pc
+ ;;
+ pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
+ basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ pentiumpro-* | p6-* | 6x86-* | athlon-*)
+ basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
+ basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ pentium4-*)
+ basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ pn)
+ basic_machine=pn-gould
+ ;;
+ power) basic_machine=power-ibm
+ ;;
+ ppc | ppcbe) basic_machine=powerpc-unknown
+ ;;
+ ppc-* | ppcbe-*)
+ basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ ppcle | powerpclittle | ppc-le | powerpc-little)
+ basic_machine=powerpcle-unknown
+ ;;
+ ppcle-* | powerpclittle-*)
+ basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ ppc64) basic_machine=powerpc64-unknown
+ ;;
+ ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ ppc64le | powerpc64little | ppc64-le | powerpc64-little)
+ basic_machine=powerpc64le-unknown
+ ;;
+ ppc64le-* | powerpc64little-*)
+ basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ ps2)
+ basic_machine=i386-ibm
+ ;;
+ pw32)
+ basic_machine=i586-unknown
+ os=-pw32
+ ;;
+ rdos | rdos64)
+ basic_machine=x86_64-pc
+ os=-rdos
+ ;;
+ rdos32)
+ basic_machine=i386-pc
+ os=-rdos
+ ;;
+ rom68k)
+ basic_machine=m68k-rom68k
+ os=-coff
+ ;;
+ rm[46]00)
+ basic_machine=mips-siemens
+ ;;
+ rtpc | rtpc-*)
+ basic_machine=romp-ibm
+ ;;
+ s390 | s390-*)
+ basic_machine=s390-ibm
+ ;;
+ s390x | s390x-*)
+ basic_machine=s390x-ibm
+ ;;
+ sa29200)
+ basic_machine=a29k-amd
+ os=-udi
+ ;;
+ sb1)
+ basic_machine=mipsisa64sb1-unknown
+ ;;
+ sb1el)
+ basic_machine=mipsisa64sb1el-unknown
+ ;;
+ sde)
+ basic_machine=mipsisa32-sde
+ os=-elf
+ ;;
+ sei)
+ basic_machine=mips-sei
+ os=-seiux
+ ;;
+ sequent)
+ basic_machine=i386-sequent
+ ;;
+ sh)
+ basic_machine=sh-hitachi
+ os=-hms
+ ;;
+ sh5el)
+ basic_machine=sh5le-unknown
+ ;;
+ sh64)
+ basic_machine=sh64-unknown
+ ;;
+ sparclite-wrs | simso-wrs)
+ basic_machine=sparclite-wrs
+ os=-vxworks
+ ;;
+ sps7)
+ basic_machine=m68k-bull
+ os=-sysv2
+ ;;
+ spur)
+ basic_machine=spur-unknown
+ ;;
+ st2000)
+ basic_machine=m68k-tandem
+ ;;
+ stratus)
+ basic_machine=i860-stratus
+ os=-sysv4
+ ;;
+ strongarm-* | thumb-*)
+ basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ sun2)
+ basic_machine=m68000-sun
+ ;;
+ sun2os3)
+ basic_machine=m68000-sun
+ os=-sunos3
+ ;;
+ sun2os4)
+ basic_machine=m68000-sun
+ os=-sunos4
+ ;;
+ sun3os3)
+ basic_machine=m68k-sun
+ os=-sunos3
+ ;;
+ sun3os4)
+ basic_machine=m68k-sun
+ os=-sunos4
+ ;;
+ sun4os3)
+ basic_machine=sparc-sun
+ os=-sunos3
+ ;;
+ sun4os4)
+ basic_machine=sparc-sun
+ os=-sunos4
+ ;;
+ sun4sol2)
+ basic_machine=sparc-sun
+ os=-solaris2
+ ;;
+ sun3 | sun3-*)
+ basic_machine=m68k-sun
+ ;;
+ sun4)
+ basic_machine=sparc-sun
+ ;;
+ sun386 | sun386i | roadrunner)
+ basic_machine=i386-sun
+ ;;
+ sv1)
+ basic_machine=sv1-cray
+ os=-unicos
+ ;;
+ symmetry)
+ basic_machine=i386-sequent
+ os=-dynix
+ ;;
+ t3e)
+ basic_machine=alphaev5-cray
+ os=-unicos
+ ;;
+ t90)
+ basic_machine=t90-cray
+ os=-unicos
+ ;;
+ tile*)
+ basic_machine=$basic_machine-unknown
+ os=-linux-gnu
+ ;;
+ tx39)
+ basic_machine=mipstx39-unknown
+ ;;
+ tx39el)
+ basic_machine=mipstx39el-unknown
+ ;;
+ toad1)
+ basic_machine=pdp10-xkl
+ os=-tops20
+ ;;
+ tower | tower-32)
+ basic_machine=m68k-ncr
+ ;;
+ tpf)
+ basic_machine=s390x-ibm
+ os=-tpf
+ ;;
+ udi29k)
+ basic_machine=a29k-amd
+ os=-udi
+ ;;
+ ultra3)
+ basic_machine=a29k-nyu
+ os=-sym1
+ ;;
+ v810 | necv810)
+ basic_machine=v810-nec
+ os=-none
+ ;;
+ vaxv)
+ basic_machine=vax-dec
+ os=-sysv
+ ;;
+ vms)
+ basic_machine=vax-dec
+ os=-vms
+ ;;
+ vpp*|vx|vx-*)
+ basic_machine=f301-fujitsu
+ ;;
+ vxworks960)
+ basic_machine=i960-wrs
+ os=-vxworks
+ ;;
+ vxworks68)
+ basic_machine=m68k-wrs
+ os=-vxworks
+ ;;
+ vxworks29k)
+ basic_machine=a29k-wrs
+ os=-vxworks
+ ;;
+ w65*)
+ basic_machine=w65-wdc
+ os=-none
+ ;;
+ w89k-*)
+ basic_machine=hppa1.1-winbond
+ os=-proelf
+ ;;
+ xbox)
+ basic_machine=i686-pc
+ os=-mingw32
+ ;;
+ xps | xps100)
+ basic_machine=xps100-honeywell
+ ;;
+ xscale-* | xscalee[bl]-*)
+ basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`
+ ;;
+ ymp)
+ basic_machine=ymp-cray
+ os=-unicos
+ ;;
+ z8k-*-coff)
+ basic_machine=z8k-unknown
+ os=-sim
+ ;;
+ z80-*-coff)
+ basic_machine=z80-unknown
+ os=-sim
+ ;;
+ none)
+ basic_machine=none-none
+ os=-none
+ ;;
+
+# Here we handle the default manufacturer of certain CPU types. It is in
+# some cases the only manufacturer, in others, it is the most popular.
+ w89k)
+ basic_machine=hppa1.1-winbond
+ ;;
+ op50n)
+ basic_machine=hppa1.1-oki
+ ;;
+ op60c)
+ basic_machine=hppa1.1-oki
+ ;;
+ romp)
+ basic_machine=romp-ibm
+ ;;
+ mmix)
+ basic_machine=mmix-knuth
+ ;;
+ rs6000)
+ basic_machine=rs6000-ibm
+ ;;
+ vax)
+ basic_machine=vax-dec
+ ;;
+ pdp10)
+ # there are many clones, so DEC is not a safe bet
+ basic_machine=pdp10-unknown
+ ;;
+ pdp11)
+ basic_machine=pdp11-dec
+ ;;
+ we32k)
+ basic_machine=we32k-att
+ ;;
+ sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
+ basic_machine=sh-unknown
+ ;;
+ sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
+ basic_machine=sparc-sun
+ ;;
+ cydra)
+ basic_machine=cydra-cydrome
+ ;;
+ orion)
+ basic_machine=orion-highlevel
+ ;;
+ orion105)
+ basic_machine=clipper-highlevel
+ ;;
+ mac | mpw | mac-mpw)
+ basic_machine=m68k-apple
+ ;;
+ pmac | pmac-mpw)
+ basic_machine=powerpc-apple
+ ;;
+ *-unknown)
+ # Make sure to match an already-canonicalized machine name.
+ ;;
+ *)
+ echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+ exit 1
+ ;;
+esac
+
+# Here we canonicalize certain aliases for manufacturers.
+case $basic_machine in
+ *-digital*)
+ basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
+ ;;
+ *-commodore*)
+ basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
+ ;;
+ *)
+ ;;
+esac
+
+# Decode manufacturer-specific aliases for certain operating systems.
+
+if [ x"$os" != x"" ]
+then
+case $os in
+ # First match some system type aliases
+ # that might get confused with valid system types.
+ # -solaris* is a basic system type, with this one exception.
+ -auroraux)
+ os=-auroraux
+ ;;
+ -solaris1 | -solaris1.*)
+ os=`echo $os | sed -e 's|solaris1|sunos4|'`
+ ;;
+ -solaris)
+ os=-solaris2
+ ;;
+ -svr4*)
+ os=-sysv4
+ ;;
+ -unixware*)
+ os=-sysv4.2uw
+ ;;
+ -gnu/linux*)
+ os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+ ;;
+ # First accept the basic system types.
+ # The portable systems comes first.
+ # Each alternative MUST END IN A *, to match a version number.
+ # -sysv* is not here because it comes later, after sysvr4.
+ -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
+ | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
+ | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
+ | -sym* | -kopensolaris* | -plan9* \
+ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
+ | -aos* | -aros* \
+ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
+ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
+ | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
+ | -bitrig* | -openbsd* | -solidbsd* \
+ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
+ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
+ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
+ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
+ | -chorusos* | -chorusrdb* | -cegcc* \
+ | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+ | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
+ | -linux-newlib* | -linux-musl* | -linux-uclibc* \
+ | -uxpv* | -beos* | -mpeix* | -udk* \
+ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
+ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
+ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
+ | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
+ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
+ | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
+ | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*)
+ # Remember, each alternative MUST END IN *, to match a version number.
+ ;;
+ -qnx*)
+ case $basic_machine in
+ x86-* | i*86-*)
+ ;;
+ *)
+ os=-nto$os
+ ;;
+ esac
+ ;;
+ -nto-qnx*)
+ ;;
+ -nto*)
+ os=`echo $os | sed -e 's|nto|nto-qnx|'`
+ ;;
+ -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
+ | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
+ | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+ ;;
+ -mac*)
+ os=`echo $os | sed -e 's|mac|macos|'`
+ ;;
+ -linux-dietlibc)
+ os=-linux-dietlibc
+ ;;
+ -linux*)
+ os=`echo $os | sed -e 's|linux|linux-gnu|'`
+ ;;
+ -sunos5*)
+ os=`echo $os | sed -e 's|sunos5|solaris2|'`
+ ;;
+ -sunos6*)
+ os=`echo $os | sed -e 's|sunos6|solaris3|'`
+ ;;
+ -opened*)
+ os=-openedition
+ ;;
+ -os400*)
+ os=-os400
+ ;;
+ -wince*)
+ os=-wince
+ ;;
+ -osfrose*)
+ os=-osfrose
+ ;;
+ -osf*)
+ os=-osf
+ ;;
+ -utek*)
+ os=-bsd
+ ;;
+ -dynix*)
+ os=-bsd
+ ;;
+ -acis*)
+ os=-aos
+ ;;
+ -atheos*)
+ os=-atheos
+ ;;
+ -syllable*)
+ os=-syllable
+ ;;
+ -386bsd)
+ os=-bsd
+ ;;
+ -ctix* | -uts*)
+ os=-sysv
+ ;;
+ -nova*)
+ os=-rtmk-nova
+ ;;
+ -ns2 )
+ os=-nextstep2
+ ;;
+ -nsk*)
+ os=-nsk
+ ;;
+ # Preserve the version number of sinix5.
+ -sinix5.*)
+ os=`echo $os | sed -e 's|sinix|sysv|'`
+ ;;
+ -sinix*)
+ os=-sysv4
+ ;;
+ -tpf*)
+ os=-tpf
+ ;;
+ -triton*)
+ os=-sysv3
+ ;;
+ -oss*)
+ os=-sysv3
+ ;;
+ -svr4)
+ os=-sysv4
+ ;;
+ -svr3)
+ os=-sysv3
+ ;;
+ -sysvr4)
+ os=-sysv4
+ ;;
+ # This must come after -sysvr4.
+ -sysv*)
+ ;;
+ -ose*)
+ os=-ose
+ ;;
+ -es1800*)
+ os=-ose
+ ;;
+ -xenix)
+ os=-xenix
+ ;;
+ -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+ os=-mint
+ ;;
+ -aros*)
+ os=-aros
+ ;;
+ -zvmoe)
+ os=-zvmoe
+ ;;
+ -dicos*)
+ os=-dicos
+ ;;
+ -nacl*)
+ ;;
+ -none)
+ ;;
+ *)
+ # Get rid of the `-' at the beginning of $os.
+ os=`echo $os | sed 's/[^-]*-//'`
+ echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
+ exit 1
+ ;;
+esac
+else
+
+# Here we handle the default operating systems that come with various machines.
+# The value should be what the vendor currently ships out the door with their
+# machine or put another way, the most popular os provided with the machine.
+
+# Note that if you're going to try to match "-MANUFACTURER" here (say,
+# "-sun"), then you have to tell the case statement up towards the top
+# that MANUFACTURER isn't an operating system. Otherwise, code above
+# will signal an error saying that MANUFACTURER isn't an operating
+# system, and we'll never get to this point.
+
+case $basic_machine in
+ score-*)
+ os=-elf
+ ;;
+ spu-*)
+ os=-elf
+ ;;
+ *-acorn)
+ os=-riscix1.2
+ ;;
+ arm*-rebel)
+ os=-linux
+ ;;
+ arm*-semi)
+ os=-aout
+ ;;
+ c4x-* | tic4x-*)
+ os=-coff
+ ;;
+ hexagon-*)
+ os=-elf
+ ;;
+ tic54x-*)
+ os=-coff
+ ;;
+ tic55x-*)
+ os=-coff
+ ;;
+ tic6x-*)
+ os=-coff
+ ;;
+ # This must come before the *-dec entry.
+ pdp10-*)
+ os=-tops20
+ ;;
+ pdp11-*)
+ os=-none
+ ;;
+ *-dec | vax-*)
+ os=-ultrix4.2
+ ;;
+ m68*-apollo)
+ os=-domain
+ ;;
+ i386-sun)
+ os=-sunos4.0.2
+ ;;
+ m68000-sun)
+ os=-sunos3
+ ;;
+ m68*-cisco)
+ os=-aout
+ ;;
+ mep-*)
+ os=-elf
+ ;;
+ mips*-cisco)
+ os=-elf
+ ;;
+ mips*-*)
+ os=-elf
+ ;;
+ or1k-*)
+ os=-elf
+ ;;
+ or32-*)
+ os=-coff
+ ;;
+ *-tti) # must be before sparc entry or we get the wrong os.
+ os=-sysv3
+ ;;
+ sparc-* | *-sun)
+ os=-sunos4.1.1
+ ;;
+ *-be)
+ os=-beos
+ ;;
+ *-haiku)
+ os=-haiku
+ ;;
+ *-ibm)
+ os=-aix
+ ;;
+ *-knuth)
+ os=-mmixware
+ ;;
+ *-wec)
+ os=-proelf
+ ;;
+ *-winbond)
+ os=-proelf
+ ;;
+ *-oki)
+ os=-proelf
+ ;;
+ *-hp)
+ os=-hpux
+ ;;
+ *-hitachi)
+ os=-hiux
+ ;;
+ i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
+ os=-sysv
+ ;;
+ *-cbm)
+ os=-amigaos
+ ;;
+ *-dg)
+ os=-dgux
+ ;;
+ *-dolphin)
+ os=-sysv3
+ ;;
+ m68k-ccur)
+ os=-rtu
+ ;;
+ m88k-omron*)
+ os=-luna
+ ;;
+ *-next )
+ os=-nextstep
+ ;;
+ *-sequent)
+ os=-ptx
+ ;;
+ *-crds)
+ os=-unos
+ ;;
+ *-ns)
+ os=-genix
+ ;;
+ i370-*)
+ os=-mvs
+ ;;
+ *-next)
+ os=-nextstep3
+ ;;
+ *-gould)
+ os=-sysv
+ ;;
+ *-highlevel)
+ os=-bsd
+ ;;
+ *-encore)
+ os=-bsd
+ ;;
+ *-sgi)
+ os=-irix
+ ;;
+ *-siemens)
+ os=-sysv4
+ ;;
+ *-masscomp)
+ os=-rtu
+ ;;
+ f30[01]-fujitsu | f700-fujitsu)
+ os=-uxpv
+ ;;
+ *-rom68k)
+ os=-coff
+ ;;
+ *-*bug)
+ os=-coff
+ ;;
+ *-apple)
+ os=-macos
+ ;;
+ *-atari*)
+ os=-mint
+ ;;
+ *)
+ os=-none
+ ;;
+esac
+fi
+
+# Here we handle the case where we know the os, and the CPU type, but not the
+# manufacturer. We pick the logical manufacturer.
+vendor=unknown
+case $basic_machine in
+ *-unknown)
+ case $os in
+ -riscix*)
+ vendor=acorn
+ ;;
+ -sunos*)
+ vendor=sun
+ ;;
+ -cnk*|-aix*)
+ vendor=ibm
+ ;;
+ -beos*)
+ vendor=be
+ ;;
+ -hpux*)
+ vendor=hp
+ ;;
+ -mpeix*)
+ vendor=hp
+ ;;
+ -hiux*)
+ vendor=hitachi
+ ;;
+ -unos*)
+ vendor=crds
+ ;;
+ -dgux*)
+ vendor=dg
+ ;;
+ -luna*)
+ vendor=omron
+ ;;
+ -genix*)
+ vendor=ns
+ ;;
+ -mvs* | -opened*)
+ vendor=ibm
+ ;;
+ -os400*)
+ vendor=ibm
+ ;;
+ -ptx*)
+ vendor=sequent
+ ;;
+ -tpf*)
+ vendor=ibm
+ ;;
+ -vxsim* | -vxworks* | -windiss*)
+ vendor=wrs
+ ;;
+ -aux*)
+ vendor=apple
+ ;;
+ -hms*)
+ vendor=hitachi
+ ;;
+ -mpw* | -macos*)
+ vendor=apple
+ ;;
+ -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+ vendor=atari
+ ;;
+ -vos*)
+ vendor=stratus
+ ;;
+ esac
+ basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
+ ;;
+esac
+
+echo $basic_machine$os
+exit
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/config/install-sh b/config/install-sh
new file mode 100755
index 00000000..377bb868
--- /dev/null
+++ b/config/install-sh
@@ -0,0 +1,527 @@
+#!/bin/sh
+# install - install a program, script, or datafile
+
+scriptversion=2011-11-20.07; # UTC
+
+# This originates from X11R5 (mit/util/scripts/install.sh), which was
+# later released in X11R6 (xc/config/util/install.sh) with the
+# following copyright and license.
+#
+# Copyright (C) 1994 X Consortium
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
+# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# Except as contained in this notice, the name of the X Consortium shall not
+# be used in advertising or otherwise to promote the sale, use or other deal-
+# ings in this Software without prior written authorization from the X Consor-
+# tium.
+#
+#
+# FSF changes to this file are in the public domain.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# 'make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch.
+
+nl='
+'
+IFS=" "" $nl"
+
+# set DOITPROG to echo to test this script
+
+# Don't use :- since 4.3BSD and earlier shells don't like it.
+doit=${DOITPROG-}
+if test -z "$doit"; then
+ doit_exec=exec
+else
+ doit_exec=$doit
+fi
+
+# Put in absolute file names if you don't have them in your path;
+# or use environment vars.
+
+chgrpprog=${CHGRPPROG-chgrp}
+chmodprog=${CHMODPROG-chmod}
+chownprog=${CHOWNPROG-chown}
+cmpprog=${CMPPROG-cmp}
+cpprog=${CPPROG-cp}
+mkdirprog=${MKDIRPROG-mkdir}
+mvprog=${MVPROG-mv}
+rmprog=${RMPROG-rm}
+stripprog=${STRIPPROG-strip}
+
+posix_glob='?'
+initialize_posix_glob='
+ test "$posix_glob" != "?" || {
+ if (set -f) 2>/dev/null; then
+ posix_glob=
+ else
+ posix_glob=:
+ fi
+ }
+'
+
+posix_mkdir=
+
+# Desired mode of installed file.
+mode=0755
+
+chgrpcmd=
+chmodcmd=$chmodprog
+chowncmd=
+mvcmd=$mvprog
+rmcmd="$rmprog -f"
+stripcmd=
+
+src=
+dst=
+dir_arg=
+dst_arg=
+
+copy_on_change=false
+no_target_directory=
+
+usage="\
+Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
+ or: $0 [OPTION]... SRCFILES... DIRECTORY
+ or: $0 [OPTION]... -t DIRECTORY SRCFILES...
+ or: $0 [OPTION]... -d DIRECTORIES...
+
+In the 1st form, copy SRCFILE to DSTFILE.
+In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
+In the 4th, create DIRECTORIES.
+
+Options:
+ --help display this help and exit.
+ --version display version info and exit.
+
+ -c (ignored)
+ -C install only if different (preserve the last data modification time)
+ -d create directories instead of installing files.
+ -g GROUP $chgrpprog installed files to GROUP.
+ -m MODE $chmodprog installed files to MODE.
+ -o USER $chownprog installed files to USER.
+ -s $stripprog installed files.
+ -t DIRECTORY install into DIRECTORY.
+ -T report an error if DSTFILE is a directory.
+
+Environment variables override the default commands:
+ CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
+ RMPROG STRIPPROG
+"
+
+while test $# -ne 0; do
+ case $1 in
+ -c) ;;
+
+ -C) copy_on_change=true;;
+
+ -d) dir_arg=true;;
+
+ -g) chgrpcmd="$chgrpprog $2"
+ shift;;
+
+ --help) echo "$usage"; exit $?;;
+
+ -m) mode=$2
+ case $mode in
+ *' '* | *' '* | *'
+'* | *'*'* | *'?'* | *'['*)
+ echo "$0: invalid mode: $mode" >&2
+ exit 1;;
+ esac
+ shift;;
+
+ -o) chowncmd="$chownprog $2"
+ shift;;
+
+ -s) stripcmd=$stripprog;;
+
+ -t) dst_arg=$2
+ # Protect names problematic for 'test' and other utilities.
+ case $dst_arg in
+ -* | [=\(\)!]) dst_arg=./$dst_arg;;
+ esac
+ shift;;
+
+ -T) no_target_directory=true;;
+
+ --version) echo "$0 $scriptversion"; exit $?;;
+
+ --) shift
+ break;;
+
+ -*) echo "$0: invalid option: $1" >&2
+ exit 1;;
+
+ *) break;;
+ esac
+ shift
+done
+
+if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
+ # When -d is used, all remaining arguments are directories to create.
+ # When -t is used, the destination is already specified.
+ # Otherwise, the last argument is the destination. Remove it from $@.
+ for arg
+ do
+ if test -n "$dst_arg"; then
+ # $@ is not empty: it contains at least $arg.
+ set fnord "$@" "$dst_arg"
+ shift # fnord
+ fi
+ shift # arg
+ dst_arg=$arg
+ # Protect names problematic for 'test' and other utilities.
+ case $dst_arg in
+ -* | [=\(\)!]) dst_arg=./$dst_arg;;
+ esac
+ done
+fi
+
+if test $# -eq 0; then
+ if test -z "$dir_arg"; then
+ echo "$0: no input file specified." >&2
+ exit 1
+ fi
+ # It's OK to call 'install-sh -d' without argument.
+ # This can happen when creating conditional directories.
+ exit 0
+fi
+
+if test -z "$dir_arg"; then
+ do_exit='(exit $ret); exit $ret'
+ trap "ret=129; $do_exit" 1
+ trap "ret=130; $do_exit" 2
+ trap "ret=141; $do_exit" 13
+ trap "ret=143; $do_exit" 15
+
+ # Set umask so as not to create temps with too-generous modes.
+ # However, 'strip' requires both read and write access to temps.
+ case $mode in
+ # Optimize common cases.
+ *644) cp_umask=133;;
+ *755) cp_umask=22;;
+
+ *[0-7])
+ if test -z "$stripcmd"; then
+ u_plus_rw=
+ else
+ u_plus_rw='% 200'
+ fi
+ cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
+ *)
+ if test -z "$stripcmd"; then
+ u_plus_rw=
+ else
+ u_plus_rw=,u+rw
+ fi
+ cp_umask=$mode$u_plus_rw;;
+ esac
+fi
+
+for src
+do
+ # Protect names problematic for 'test' and other utilities.
+ case $src in
+ -* | [=\(\)!]) src=./$src;;
+ esac
+
+ if test -n "$dir_arg"; then
+ dst=$src
+ dstdir=$dst
+ test -d "$dstdir"
+ dstdir_status=$?
+ else
+
+ # Waiting for this to be detected by the "$cpprog $src $dsttmp" command
+ # might cause directories to be created, which would be especially bad
+ # if $src (and thus $dsttmp) contains '*'.
+ if test ! -f "$src" && test ! -d "$src"; then
+ echo "$0: $src does not exist." >&2
+ exit 1
+ fi
+
+ if test -z "$dst_arg"; then
+ echo "$0: no destination specified." >&2
+ exit 1
+ fi
+ dst=$dst_arg
+
+ # If destination is a directory, append the input filename; won't work
+ # if double slashes aren't ignored.
+ if test -d "$dst"; then
+ if test -n "$no_target_directory"; then
+ echo "$0: $dst_arg: Is a directory" >&2
+ exit 1
+ fi
+ dstdir=$dst
+ dst=$dstdir/`basename "$src"`
+ dstdir_status=0
+ else
+ # Prefer dirname, but fall back on a substitute if dirname fails.
+ dstdir=`
+ (dirname "$dst") 2>/dev/null ||
+ expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$dst" : 'X\(//\)[^/]' \| \
+ X"$dst" : 'X\(//\)$' \| \
+ X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
+ echo X"$dst" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'
+ `
+
+ test -d "$dstdir"
+ dstdir_status=$?
+ fi
+ fi
+
+ obsolete_mkdir_used=false
+
+ if test $dstdir_status != 0; then
+ case $posix_mkdir in
+ '')
+ # Create intermediate dirs using mode 755 as modified by the umask.
+ # This is like FreeBSD 'install' as of 1997-10-28.
+ umask=`umask`
+ case $stripcmd.$umask in
+ # Optimize common cases.
+ *[2367][2367]) mkdir_umask=$umask;;
+ .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
+
+ *[0-7])
+ mkdir_umask=`expr $umask + 22 \
+ - $umask % 100 % 40 + $umask % 20 \
+ - $umask % 10 % 4 + $umask % 2
+ `;;
+ *) mkdir_umask=$umask,go-w;;
+ esac
+
+ # With -d, create the new directory with the user-specified mode.
+ # Otherwise, rely on $mkdir_umask.
+ if test -n "$dir_arg"; then
+ mkdir_mode=-m$mode
+ else
+ mkdir_mode=
+ fi
+
+ posix_mkdir=false
+ case $umask in
+ *[123567][0-7][0-7])
+ # POSIX mkdir -p sets u+wx bits regardless of umask, which
+ # is incompatible with FreeBSD 'install' when (umask & 300) != 0.
+ ;;
+ *)
+ tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
+ trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
+
+ if (umask $mkdir_umask &&
+ exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
+ then
+ if test -z "$dir_arg" || {
+ # Check for POSIX incompatibilities with -m.
+ # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
+ # other-writable bit of parent directory when it shouldn't.
+ # FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
+ ls_ld_tmpdir=`ls -ld "$tmpdir"`
+ case $ls_ld_tmpdir in
+ d????-?r-*) different_mode=700;;
+ d????-?--*) different_mode=755;;
+ *) false;;
+ esac &&
+ $mkdirprog -m$different_mode -p -- "$tmpdir" && {
+ ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
+ test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
+ }
+ }
+ then posix_mkdir=:
+ fi
+ rmdir "$tmpdir/d" "$tmpdir"
+ else
+ # Remove any dirs left behind by ancient mkdir implementations.
+ rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
+ fi
+ trap '' 0;;
+ esac;;
+ esac
+
+ if
+ $posix_mkdir && (
+ umask $mkdir_umask &&
+ $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
+ )
+ then :
+ else
+
+ # The umask is ridiculous, or mkdir does not conform to POSIX,
+ # or it failed possibly due to a race condition. Create the
+ # directory the slow way, step by step, checking for races as we go.
+
+ case $dstdir in
+ /*) prefix='/';;
+ [-=\(\)!]*) prefix='./';;
+ *) prefix='';;
+ esac
+
+ eval "$initialize_posix_glob"
+
+ oIFS=$IFS
+ IFS=/
+ $posix_glob set -f
+ set fnord $dstdir
+ shift
+ $posix_glob set +f
+ IFS=$oIFS
+
+ prefixes=
+
+ for d
+ do
+ test X"$d" = X && continue
+
+ prefix=$prefix$d
+ if test -d "$prefix"; then
+ prefixes=
+ else
+ if $posix_mkdir; then
+ (umask=$mkdir_umask &&
+ $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
+ # Don't fail if two instances are running concurrently.
+ test -d "$prefix" || exit 1
+ else
+ case $prefix in
+ *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
+ *) qprefix=$prefix;;
+ esac
+ prefixes="$prefixes '$qprefix'"
+ fi
+ fi
+ prefix=$prefix/
+ done
+
+ if test -n "$prefixes"; then
+ # Don't fail if two instances are running concurrently.
+ (umask $mkdir_umask &&
+ eval "\$doit_exec \$mkdirprog $prefixes") ||
+ test -d "$dstdir" || exit 1
+ obsolete_mkdir_used=true
+ fi
+ fi
+ fi
+
+ if test -n "$dir_arg"; then
+ { test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
+ { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
+ { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
+ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
+ else
+
+ # Make a couple of temp file names in the proper directory.
+ dsttmp=$dstdir/_inst.$$_
+ rmtmp=$dstdir/_rm.$$_
+
+ # Trap to clean up those temp files at exit.
+ trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
+
+ # Copy the file name to the temp name.
+ (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
+
+ # and set any options; do chmod last to preserve setuid bits.
+ #
+ # If any of these fail, we abort the whole thing. If we want to
+ # ignore errors from any of these, just make sure not to ignore
+ # errors from the above "$doit $cpprog $src $dsttmp" command.
+ #
+ { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
+ { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
+ { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
+ { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
+
+ # If -C, don't bother to copy if it wouldn't change the file.
+ if $copy_on_change &&
+ old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
+ new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
+
+ eval "$initialize_posix_glob" &&
+ $posix_glob set -f &&
+ set X $old && old=:$2:$4:$5:$6 &&
+ set X $new && new=:$2:$4:$5:$6 &&
+ $posix_glob set +f &&
+
+ test "$old" = "$new" &&
+ $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
+ then
+ rm -f "$dsttmp"
+ else
+ # Rename the file to the real destination.
+ $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
+
+ # The rename failed, perhaps because mv can't rename something else
+ # to itself, or perhaps because mv is so ancient that it does not
+ # support -f.
+ {
+ # Now remove or move aside any old file at destination location.
+ # We try this two ways since rm can't unlink itself on some
+ # systems and the destination file might be busy for other
+ # reasons. In this case, the final cleanup might fail but the new
+ # file should still install successfully.
+ {
+ test ! -f "$dst" ||
+ $doit $rmcmd -f "$dst" 2>/dev/null ||
+ { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
+ { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
+ } ||
+ { echo "$0: cannot unlink or rename $dst" >&2
+ (exit 1); exit 1
+ }
+ } &&
+
+ # Now rename the file to the real destination.
+ $doit $mvcmd "$dsttmp" "$dst"
+ }
+ fi || exit 1
+
+ trap '' 0
+ fi
+done
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/configure b/configure
new file mode 100755
index 00000000..552e186d
--- /dev/null
+++ b/configure
@@ -0,0 +1,7246 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.69 for btrfs-progs v4.4.1.
+#
+# Report bugs to <linux-btrfs@vger.kernel.org>.
+#
+#
+# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
+#
+#
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+ emulate sh
+ NULLCMD=:
+ # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+ setopt NO_GLOB_SUBST
+else
+ case `(set -o) 2>/dev/null` in #(
+ *posix*) :
+ set -o posix ;; #(
+ *) :
+ ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+ as_echo='print -r --'
+ as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+ as_echo='printf %s\n'
+ as_echo_n='printf %s'
+else
+ if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+ as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+ as_echo_n='/usr/ucb/echo -n'
+ else
+ as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+ as_echo_n_body='eval
+ arg=$1;
+ case $arg in #(
+ *"$as_nl"*)
+ expr "X$arg" : "X\\(.*\\)$as_nl";
+ arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+ esac;
+ expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+ '
+ export as_echo_n_body
+ as_echo_n='sh -c $as_echo_n_body as_echo'
+ fi
+ export as_echo_body
+ as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ PATH_SEPARATOR=:
+ (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+ (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+ PATH_SEPARATOR=';'
+ }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order. Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" "" $as_nl"
+
+# Find who we are. Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+ *[\\/]* ) as_myself=$0 ;;
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+ done
+IFS=$as_save_IFS
+
+ ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+ as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+ $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+ exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there. '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+# Use a proper internal environment variable to ensure we don't fall
+ # into an infinite loop, continuously re-executing ourselves.
+ if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then
+ _as_can_reexec=no; export _as_can_reexec;
+ # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+ *v*x* | *x*v* ) as_opts=-vx ;;
+ *v* ) as_opts=-v ;;
+ *x* ) as_opts=-x ;;
+ * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+as_fn_exit 255
+ fi
+ # We don't want this to propagate to other subprocesses.
+ { _as_can_reexec=; unset _as_can_reexec;}
+if test "x$CONFIG_SHELL" = x; then
+ as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
+ emulate sh
+ NULLCMD=:
+ # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '\${1+\"\$@\"}'='\"\$@\"'
+ setopt NO_GLOB_SUBST
+else
+ case \`(set -o) 2>/dev/null\` in #(
+ *posix*) :
+ set -o posix ;; #(
+ *) :
+ ;;
+esac
+fi
+"
+ as_required="as_fn_return () { (exit \$1); }
+as_fn_success () { as_fn_return 0; }
+as_fn_failure () { as_fn_return 1; }
+as_fn_ret_success () { return 0; }
+as_fn_ret_failure () { return 1; }
+
+exitcode=0
+as_fn_success || { exitcode=1; echo as_fn_success failed.; }
+as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; }
+as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; }
+as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; }
+if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then :
+
+else
+ exitcode=1; echo positional parameters were not saved.
+fi
+test x\$exitcode = x0 || exit 1
+test -x / || exit 1"
+ as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
+ as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
+ eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
+ test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1
+test \$(( 1 + 1 )) = 2 || exit 1"
+ if (eval "$as_required") 2>/dev/null; then :
+ as_have_required=yes
+else
+ as_have_required=no
+fi
+ if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then :
+
+else
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+as_found=false
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ as_found=:
+ case $as_dir in #(
+ /*)
+ for as_base in sh bash ksh sh5; do
+ # Try only shells that exist, to save several forks.
+ as_shell=$as_dir/$as_base
+ if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+ { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then :
+ CONFIG_SHELL=$as_shell as_have_required=yes
+ if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then :
+ break 2
+fi
+fi
+ done;;
+ esac
+ as_found=false
+done
+$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } &&
+ { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then :
+ CONFIG_SHELL=$SHELL as_have_required=yes
+fi; }
+IFS=$as_save_IFS
+
+
+ if test "x$CONFIG_SHELL" != x; then :
+ export CONFIG_SHELL
+ # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+ *v*x* | *x*v* ) as_opts=-vx ;;
+ *v* ) as_opts=-v ;;
+ *x* ) as_opts=-x ;;
+ * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+exit 255
+fi
+
+ if test x$as_have_required = xno; then :
+ $as_echo "$0: This script requires a shell more modern than all"
+ $as_echo "$0: the shells that I found on your system."
+ if test x${ZSH_VERSION+set} = xset ; then
+ $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should"
+ $as_echo "$0: be upgraded to zsh 4.3.4 or later."
+ else
+ $as_echo "$0: Please tell bug-autoconf@gnu.org and
+$0: linux-btrfs@vger.kernel.org about your system,
+$0: including any error possibly output before this
+$0: message. Then install a modern shell, or manually run
+$0: the script under such a shell if you do have one."
+ fi
+ exit 1
+fi
+fi
+fi
+SHELL=${CONFIG_SHELL-/bin/sh}
+export SHELL
+# Unset more variables known to interfere with behavior of common tools.
+CLICOLOR_FORCE= GREP_OPTIONS=
+unset CLICOLOR_FORCE GREP_OPTIONS
+
+## --------------------- ##
+## M4sh Shell Functions. ##
+## --------------------- ##
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+ { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+ return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+ set +e
+ as_fn_set_status $1
+ exit $1
+} # as_fn_exit
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+ case $as_dir in #(
+ -*) as_dir=./$as_dir;;
+ esac
+ test -d "$as_dir" || eval $as_mkdir_p || {
+ as_dirs=
+ while :; do
+ case $as_dir in #(
+ *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+ *) as_qdir=$as_dir;;
+ esac
+ as_dirs="'$as_qdir' $as_dirs"
+ as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_dir" : 'X\(//\)[^/]' \| \
+ X"$as_dir" : 'X\(//\)$' \| \
+ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ test -d "$as_dir" && break
+ done
+ test -z "$as_dirs" || eval "mkdir $as_dirs"
+ } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+ test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+ eval 'as_fn_append ()
+ {
+ eval $1+=\$2
+ }'
+else
+ as_fn_append ()
+ {
+ eval $1=\$$1\$2
+ }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+ eval 'as_fn_arith ()
+ {
+ as_val=$(( $* ))
+ }'
+else
+ as_fn_arith ()
+ {
+ as_val=`expr "$@" || test $? -eq 1`
+ }
+fi # as_fn_arith
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+ as_status=$1; test $as_status -eq 0 && as_status=1
+ if test "$4"; then
+ as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+ fi
+ $as_echo "$as_me: error: $2" >&2
+ as_fn_exit $as_status
+} # as_fn_error
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+ test "X`expr 00001 : '.*\(...\)'`" = X001; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+ as_basename=basename
+else
+ as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+ as_dirname=dirname
+else
+ as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+
+ as_lineno_1=$LINENO as_lineno_1a=$LINENO
+ as_lineno_2=$LINENO as_lineno_2a=$LINENO
+ eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" &&
+ test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || {
+ # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-)
+ sed -n '
+ p
+ /[$]LINENO/=
+ ' <$as_myself |
+ sed '
+ s/[$]LINENO.*/&-/
+ t lineno
+ b
+ :lineno
+ N
+ :loop
+ s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+ t loop
+ s/-\n.*//
+ ' >$as_me.lineno &&
+ chmod +x "$as_me.lineno" ||
+ { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
+
+ # If we had to re-execute with $CONFIG_SHELL, we're ensured to have
+ # already done that, so ensure we don't try to do so again and fall
+ # in an infinite loop. This has already happened in practice.
+ _as_can_reexec=no; export _as_can_reexec
+ # Don't try to exec as it changes $[0], causing all sort of problems
+ # (the dirname of $[0] is not the place where we might find the
+ # original and so on. Autoconf is especially sensitive to this).
+ . "./$as_me.lineno"
+ # Exit status is that of the last command.
+ exit
+}
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+ case `echo 'xy\c'` in
+ *c*) ECHO_T=' ';; # ECHO_T is single tab character.
+ xy) ECHO_C='\c';;
+ *) echo `echo ksh88 bug on AIX 6.1` > /dev/null
+ ECHO_T=' ';;
+ esac;;
+*)
+ ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+ rm -f conf$$.dir/conf$$.file
+else
+ rm -f conf$$.dir
+ mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+ if ln -s conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s='ln -s'
+ # ... but there are two gotchas:
+ # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+ # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+ # In both cases, we have to default to `cp -pR'.
+ ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+ as_ln_s='cp -pR'
+ elif ln conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s=ln
+ else
+ as_ln_s='cp -pR'
+ fi
+else
+ as_ln_s='cp -pR'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+ as_mkdir_p='mkdir -p "$as_dir"'
+else
+ test -d ./-p && rmdir ./-p
+ as_mkdir_p=false
+fi
+
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+test -n "$DJDIR" || exec 7<&0 </dev/null
+exec 6>&1
+
+# Name of the host.
+# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_clean_files=
+ac_config_libobj_dir=.
+LIBOBJS=
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+
+# Identity of this package.
+PACKAGE_NAME='btrfs-progs'
+PACKAGE_TARNAME='btrfs-progs'
+PACKAGE_VERSION='v4.4.1'
+PACKAGE_STRING='btrfs-progs v4.4.1'
+PACKAGE_BUGREPORT='linux-btrfs@vger.kernel.org'
+PACKAGE_URL='http://btrfs.wiki.kernel.org'
+
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#ifdef STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# ifdef HAVE_STDLIB_H
+# include <stdlib.h>
+# endif
+#endif
+#ifdef HAVE_STRING_H
+# if !defined STDC_HEADERS && defined HAVE_MEMORY_H
+# include <memory.h>
+# endif
+# include <string.h>
+#endif
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
+ac_unique_file="btrfs.c"
+ac_default_prefix=/usr/local
+ac_subst_vars='LTLIBOBJS
+LIBOBJS
+LIBBTRFS_PATCHLEVEL
+LIBBTRFS_MINOR
+LIBBTRFS_MAJOR
+LZO2_CFLAGS
+LZO2_LIBS_STATIC
+LZO2_LIBS
+ZLIB_LIBS_STATIC
+ZLIB_LIBS
+ZLIB_CFLAGS
+UUID_LIBS_STATIC
+UUID_LIBS
+UUID_CFLAGS
+BLKID_LIBS_STATIC
+BLKID_LIBS
+BLKID_CFLAGS
+COM_ERR_LIBS
+COM_ERR_CFLAGS
+EXT2FS_LIBS
+EXT2FS_CFLAGS
+DISABLE_BTRFSCONVERT
+SED
+MV
+GZIP
+XMLTO
+ASCIIDOC
+DISABLE_DOCUMENTATION
+PKG_CONFIG_LIBDIR
+PKG_CONFIG_PATH
+PKG_CONFIG
+RMDIR
+RM
+AR
+LN_S
+INSTALL_DATA
+INSTALL_SCRIPT
+INSTALL_PROGRAM
+host_os
+host_vendor
+host_cpu
+host
+build_os
+build_vendor
+build_cpu
+build
+EGREP
+GREP
+CPP
+OBJEXT
+EXEEXT
+ac_ct_CC
+CPPFLAGS
+LDFLAGS
+CC
+CFLAGS
+target_alias
+host_alias
+build_alias
+LIBS
+ECHO_T
+ECHO_N
+ECHO_C
+DEFS
+mandir
+localedir
+libdir
+psdir
+pdfdir
+dvidir
+htmldir
+infodir
+docdir
+oldincludedir
+includedir
+localstatedir
+sharedstatedir
+sysconfdir
+datadir
+datarootdir
+libexecdir
+sbindir
+bindir
+program_transform_name
+prefix
+exec_prefix
+PACKAGE_URL
+PACKAGE_BUGREPORT
+PACKAGE_STRING
+PACKAGE_VERSION
+PACKAGE_TARNAME
+PACKAGE_NAME
+PATH_SEPARATOR
+SHELL'
+ac_subst_files=''
+ac_user_opts='
+enable_option_checking
+enable_largefile
+enable_backtrace
+enable_documentation
+enable_convert
+'
+ ac_precious_vars='build_alias
+host_alias
+target_alias
+CC
+CFLAGS
+LDFLAGS
+LIBS
+CPPFLAGS
+CPP
+PKG_CONFIG
+PKG_CONFIG_PATH
+PKG_CONFIG_LIBDIR
+EXT2FS_CFLAGS
+EXT2FS_LIBS
+COM_ERR_CFLAGS
+COM_ERR_LIBS
+BLKID_CFLAGS
+BLKID_LIBS
+UUID_CFLAGS
+UUID_LIBS
+ZLIB_CFLAGS
+ZLIB_LIBS'
+
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+ac_unrecognized_opts=
+ac_unrecognized_sep=
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+# (The list follows the same order as the GNU Coding Standards.)
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datarootdir='${prefix}/share'
+datadir='${datarootdir}'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
+infodir='${datarootdir}/info'
+htmldir='${docdir}'
+dvidir='${docdir}'
+pdfdir='${docdir}'
+psdir='${docdir}'
+libdir='${exec_prefix}/lib'
+localedir='${datarootdir}/locale'
+mandir='${datarootdir}/man'
+
+ac_prev=
+ac_dashdash=
+for ac_option
+do
+ # If the previous option needs an argument, assign it.
+ if test -n "$ac_prev"; then
+ eval $ac_prev=\$ac_option
+ ac_prev=
+ continue
+ fi
+
+ case $ac_option in
+ *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+ *=) ac_optarg= ;;
+ *) ac_optarg=yes ;;
+ esac
+
+ # Accept the important Cygnus configure options, so we can diagnose typos.
+
+ case $ac_dashdash$ac_option in
+ --)
+ ac_dashdash=yes ;;
+
+ -bindir | --bindir | --bindi | --bind | --bin | --bi)
+ ac_prev=bindir ;;
+ -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+ bindir=$ac_optarg ;;
+
+ -build | --build | --buil | --bui | --bu)
+ ac_prev=build_alias ;;
+ -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+ build_alias=$ac_optarg ;;
+
+ -cache-file | --cache-file | --cache-fil | --cache-fi \
+ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+ ac_prev=cache_file ;;
+ -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+ cache_file=$ac_optarg ;;
+
+ --config-cache | -C)
+ cache_file=config.cache ;;
+
+ -datadir | --datadir | --datadi | --datad)
+ ac_prev=datadir ;;
+ -datadir=* | --datadir=* | --datadi=* | --datad=*)
+ datadir=$ac_optarg ;;
+
+ -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \
+ | --dataroo | --dataro | --datar)
+ ac_prev=datarootdir ;;
+ -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \
+ | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)
+ datarootdir=$ac_optarg ;;
+
+ -disable-* | --disable-*)
+ ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+ as_fn_error $? "invalid feature name: $ac_useropt"
+ ac_useropt_orig=$ac_useropt
+ ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+ case $ac_user_opts in
+ *"
+"enable_$ac_useropt"
+"*) ;;
+ *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig"
+ ac_unrecognized_sep=', ';;
+ esac
+ eval enable_$ac_useropt=no ;;
+
+ -docdir | --docdir | --docdi | --doc | --do)
+ ac_prev=docdir ;;
+ -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)
+ docdir=$ac_optarg ;;
+
+ -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)
+ ac_prev=dvidir ;;
+ -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)
+ dvidir=$ac_optarg ;;
+
+ -enable-* | --enable-*)
+ ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+ as_fn_error $? "invalid feature name: $ac_useropt"
+ ac_useropt_orig=$ac_useropt
+ ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+ case $ac_user_opts in
+ *"
+"enable_$ac_useropt"
+"*) ;;
+ *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig"
+ ac_unrecognized_sep=', ';;
+ esac
+ eval enable_$ac_useropt=\$ac_optarg ;;
+
+ -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+ | --exec | --exe | --ex)
+ ac_prev=exec_prefix ;;
+ -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+ | --exec=* | --exe=* | --ex=*)
+ exec_prefix=$ac_optarg ;;
+
+ -gas | --gas | --ga | --g)
+ # Obsolete; use --with-gas.
+ with_gas=yes ;;
+
+ -help | --help | --hel | --he | -h)
+ ac_init_help=long ;;
+ -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+ ac_init_help=recursive ;;
+ -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+ ac_init_help=short ;;
+
+ -host | --host | --hos | --ho)
+ ac_prev=host_alias ;;
+ -host=* | --host=* | --hos=* | --ho=*)
+ host_alias=$ac_optarg ;;
+
+ -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)
+ ac_prev=htmldir ;;
+ -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \
+ | --ht=*)
+ htmldir=$ac_optarg ;;
+
+ -includedir | --includedir | --includedi | --included | --include \
+ | --includ | --inclu | --incl | --inc)
+ ac_prev=includedir ;;
+ -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+ | --includ=* | --inclu=* | --incl=* | --inc=*)
+ includedir=$ac_optarg ;;
+
+ -infodir | --infodir | --infodi | --infod | --info | --inf)
+ ac_prev=infodir ;;
+ -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+ infodir=$ac_optarg ;;
+
+ -libdir | --libdir | --libdi | --libd)
+ ac_prev=libdir ;;
+ -libdir=* | --libdir=* | --libdi=* | --libd=*)
+ libdir=$ac_optarg ;;
+
+ -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+ | --libexe | --libex | --libe)
+ ac_prev=libexecdir ;;
+ -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+ | --libexe=* | --libex=* | --libe=*)
+ libexecdir=$ac_optarg ;;
+
+ -localedir | --localedir | --localedi | --localed | --locale)
+ ac_prev=localedir ;;
+ -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)
+ localedir=$ac_optarg ;;
+
+ -localstatedir | --localstatedir | --localstatedi | --localstated \
+ | --localstate | --localstat | --localsta | --localst | --locals)
+ ac_prev=localstatedir ;;
+ -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+ | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)
+ localstatedir=$ac_optarg ;;
+
+ -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+ ac_prev=mandir ;;
+ -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+ mandir=$ac_optarg ;;
+
+ -nfp | --nfp | --nf)
+ # Obsolete; use --without-fp.
+ with_fp=no ;;
+
+ -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+ | --no-cr | --no-c | -n)
+ no_create=yes ;;
+
+ -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+ no_recursion=yes ;;
+
+ -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+ | --oldin | --oldi | --old | --ol | --o)
+ ac_prev=oldincludedir ;;
+ -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+ oldincludedir=$ac_optarg ;;
+
+ -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+ ac_prev=prefix ;;
+ -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+ prefix=$ac_optarg ;;
+
+ -program-prefix | --program-prefix | --program-prefi | --program-pref \
+ | --program-pre | --program-pr | --program-p)
+ ac_prev=program_prefix ;;
+ -program-prefix=* | --program-prefix=* | --program-prefi=* \
+ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+ program_prefix=$ac_optarg ;;
+
+ -program-suffix | --program-suffix | --program-suffi | --program-suff \
+ | --program-suf | --program-su | --program-s)
+ ac_prev=program_suffix ;;
+ -program-suffix=* | --program-suffix=* | --program-suffi=* \
+ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+ program_suffix=$ac_optarg ;;
+
+ -program-transform-name | --program-transform-name \
+ | --program-transform-nam | --program-transform-na \
+ | --program-transform-n | --program-transform- \
+ | --program-transform | --program-transfor \
+ | --program-transfo | --program-transf \
+ | --program-trans | --program-tran \
+ | --progr-tra | --program-tr | --program-t)
+ ac_prev=program_transform_name ;;
+ -program-transform-name=* | --program-transform-name=* \
+ | --program-transform-nam=* | --program-transform-na=* \
+ | --program-transform-n=* | --program-transform-=* \
+ | --program-transform=* | --program-transfor=* \
+ | --program-transfo=* | --program-transf=* \
+ | --program-trans=* | --program-tran=* \
+ | --progr-tra=* | --program-tr=* | --program-t=*)
+ program_transform_name=$ac_optarg ;;
+
+ -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)
+ ac_prev=pdfdir ;;
+ -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)
+ pdfdir=$ac_optarg ;;
+
+ -psdir | --psdir | --psdi | --psd | --ps)
+ ac_prev=psdir ;;
+ -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
+ psdir=$ac_optarg ;;
+
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ silent=yes ;;
+
+ -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+ ac_prev=sbindir ;;
+ -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+ | --sbi=* | --sb=*)
+ sbindir=$ac_optarg ;;
+
+ -sharedstatedir | --sharedstatedir | --sharedstatedi \
+ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+ | --sharedst | --shareds | --shared | --share | --shar \
+ | --sha | --sh)
+ ac_prev=sharedstatedir ;;
+ -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+ | --sha=* | --sh=*)
+ sharedstatedir=$ac_optarg ;;
+
+ -site | --site | --sit)
+ ac_prev=site ;;
+ -site=* | --site=* | --sit=*)
+ site=$ac_optarg ;;
+
+ -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+ ac_prev=srcdir ;;
+ -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+ srcdir=$ac_optarg ;;
+
+ -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+ | --syscon | --sysco | --sysc | --sys | --sy)
+ ac_prev=sysconfdir ;;
+ -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+ sysconfdir=$ac_optarg ;;
+
+ -target | --target | --targe | --targ | --tar | --ta | --t)
+ ac_prev=target_alias ;;
+ -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+ target_alias=$ac_optarg ;;
+
+ -v | -verbose | --verbose | --verbos | --verbo | --verb)
+ verbose=yes ;;
+
+ -version | --version | --versio | --versi | --vers | -V)
+ ac_init_version=: ;;
+
+ -with-* | --with-*)
+ ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+ as_fn_error $? "invalid package name: $ac_useropt"
+ ac_useropt_orig=$ac_useropt
+ ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+ case $ac_user_opts in
+ *"
+"with_$ac_useropt"
+"*) ;;
+ *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig"
+ ac_unrecognized_sep=', ';;
+ esac
+ eval with_$ac_useropt=\$ac_optarg ;;
+
+ -without-* | --without-*)
+ ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+ as_fn_error $? "invalid package name: $ac_useropt"
+ ac_useropt_orig=$ac_useropt
+ ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+ case $ac_user_opts in
+ *"
+"with_$ac_useropt"
+"*) ;;
+ *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig"
+ ac_unrecognized_sep=', ';;
+ esac
+ eval with_$ac_useropt=no ;;
+
+ --x)
+ # Obsolete; use --with-x.
+ with_x=yes ;;
+
+ -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+ | --x-incl | --x-inc | --x-in | --x-i)
+ ac_prev=x_includes ;;
+ -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+ x_includes=$ac_optarg ;;
+
+ -x-libraries | --x-libraries | --x-librarie | --x-librari \
+ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+ ac_prev=x_libraries ;;
+ -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+ x_libraries=$ac_optarg ;;
+
+ -*) as_fn_error $? "unrecognized option: \`$ac_option'
+Try \`$0 --help' for more information"
+ ;;
+
+ *=*)
+ ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+ # Reject names that are not valid shell variable names.
+ case $ac_envvar in #(
+ '' | [0-9]* | *[!_$as_cr_alnum]* )
+ as_fn_error $? "invalid variable name: \`$ac_envvar'" ;;
+ esac
+ eval $ac_envvar=\$ac_optarg
+ export $ac_envvar ;;
+
+ *)
+ # FIXME: should be removed in autoconf 3.0.
+ $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+ expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+ $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+ : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}"
+ ;;
+
+ esac
+done
+
+if test -n "$ac_prev"; then
+ ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+ as_fn_error $? "missing argument to $ac_option"
+fi
+
+if test -n "$ac_unrecognized_opts"; then
+ case $enable_option_checking in
+ no) ;;
+ fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;;
+ *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
+ esac
+fi
+
+# Check all directory arguments for consistency.
+for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
+ datadir sysconfdir sharedstatedir localstatedir includedir \
+ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
+ libdir localedir mandir
+do
+ eval ac_val=\$$ac_var
+ # Remove trailing slashes.
+ case $ac_val in
+ */ )
+ ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'`
+ eval $ac_var=\$ac_val;;
+ esac
+ # Be sure to have absolute directory names.
+ case $ac_val in
+ [\\/$]* | ?:[\\/]* ) continue;;
+ NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
+ esac
+ as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val"
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+ if test "x$build_alias" = x; then
+ cross_compiling=maybe
+ elif test "x$build_alias" != "x$host_alias"; then
+ cross_compiling=yes
+ fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+ac_pwd=`pwd` && test -n "$ac_pwd" &&
+ac_ls_di=`ls -di .` &&
+ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
+ as_fn_error $? "working directory cannot be determined"
+test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
+ as_fn_error $? "pwd does not report name of working directory"
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+ ac_srcdir_defaulted=yes
+ # Try the directory containing this script, then the parent directory.
+ ac_confdir=`$as_dirname -- "$as_myself" ||
+$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_myself" : 'X\(//\)[^/]' \| \
+ X"$as_myself" : 'X\(//\)$' \| \
+ X"$as_myself" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_myself" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ srcdir=$ac_confdir
+ if test ! -r "$srcdir/$ac_unique_file"; then
+ srcdir=..
+ fi
+else
+ ac_srcdir_defaulted=no
+fi
+if test ! -r "$srcdir/$ac_unique_file"; then
+ test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
+ as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir"
+fi
+ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
+ac_abs_confdir=`(
+ cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg"
+ pwd)`
+# When building in place, set srcdir=.
+if test "$ac_abs_confdir" = "$ac_pwd"; then
+ srcdir=.
+fi
+# Remove unnecessary trailing slashes from srcdir.
+# Double slashes in file names in object file debugging info
+# mess up M-x gdb in Emacs.
+case $srcdir in
+*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;;
+esac
+for ac_var in $ac_precious_vars; do
+ eval ac_env_${ac_var}_set=\${${ac_var}+set}
+ eval ac_env_${ac_var}_value=\$${ac_var}
+ eval ac_cv_env_${ac_var}_set=\${${ac_var}+set}
+ eval ac_cv_env_${ac_var}_value=\$${ac_var}
+done
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+ # Omit some internal or obsolete options to make the list less imposing.
+ # This message is too long to be a string in the A/UX 3.1 sh.
+ cat <<_ACEOF
+\`configure' configures btrfs-progs v4.4.1 to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE. See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+ -h, --help display this help and exit
+ --help=short display options specific to this package
+ --help=recursive display the short help of all the included packages
+ -V, --version display version information and exit
+ -q, --quiet, --silent do not print \`checking ...' messages
+ --cache-file=FILE cache test results in FILE [disabled]
+ -C, --config-cache alias for \`--cache-file=config.cache'
+ -n, --no-create do not create output files
+ --srcdir=DIR find the sources in DIR [configure dir or \`..']
+
+Installation directories:
+ --prefix=PREFIX install architecture-independent files in PREFIX
+ [$ac_default_prefix]
+ --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX
+ [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+ --bindir=DIR user executables [EPREFIX/bin]
+ --sbindir=DIR system admin executables [EPREFIX/sbin]
+ --libexecdir=DIR program executables [EPREFIX/libexec]
+ --sysconfdir=DIR read-only single-machine data [PREFIX/etc]
+ --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
+ --localstatedir=DIR modifiable single-machine data [PREFIX/var]
+ --libdir=DIR object code libraries [EPREFIX/lib]
+ --includedir=DIR C header files [PREFIX/include]
+ --oldincludedir=DIR C header files for non-gcc [/usr/include]
+ --datarootdir=DIR read-only arch.-independent data root [PREFIX/share]
+ --datadir=DIR read-only architecture-independent data [DATAROOTDIR]
+ --infodir=DIR info documentation [DATAROOTDIR/info]
+ --localedir=DIR locale-dependent data [DATAROOTDIR/locale]
+ --mandir=DIR man documentation [DATAROOTDIR/man]
+ --docdir=DIR documentation root [DATAROOTDIR/doc/btrfs-progs]
+ --htmldir=DIR html documentation [DOCDIR]
+ --dvidir=DIR dvi documentation [DOCDIR]
+ --pdfdir=DIR pdf documentation [DOCDIR]
+ --psdir=DIR ps documentation [DOCDIR]
+_ACEOF
+
+ cat <<\_ACEOF
+
+System types:
+ --build=BUILD configure for building on BUILD [guessed]
+ --host=HOST cross-compile to build programs to run on HOST [BUILD]
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+ case $ac_init_help in
+ short | recursive ) echo "Configuration of btrfs-progs v4.4.1:";;
+ esac
+ cat <<\_ACEOF
+
+Optional Features:
+ --disable-option-checking ignore unrecognized --enable/--with options
+ --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no)
+ --enable-FEATURE[=ARG] include FEATURE [ARG=yes]
+ --disable-largefile omit support for large files
+ --disable-backtrace disable btrfs backtrace
+ --disable-documentation do not build domumentation
+ --disable-convert do not build btrfs-convert
+
+Some influential environment variables:
+ CC C compiler command
+ CFLAGS C compiler flags
+ LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a
+ nonstandard directory <lib dir>
+ LIBS libraries to pass to the linker, e.g. -l<library>
+ CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
+ you have headers in a nonstandard directory <include dir>
+ CPP C preprocessor
+ PKG_CONFIG path to pkg-config utility
+ PKG_CONFIG_PATH
+ directories to add to pkg-config's search path
+ PKG_CONFIG_LIBDIR
+ path overriding pkg-config's built-in search path
+ EXT2FS_CFLAGS
+ C compiler flags for EXT2FS, overriding pkg-config
+ EXT2FS_LIBS linker flags for EXT2FS, overriding pkg-config
+ COM_ERR_CFLAGS
+ C compiler flags for COM_ERR, overriding pkg-config
+ COM_ERR_LIBS
+ linker flags for COM_ERR, overriding pkg-config
+ BLKID_CFLAGS
+ C compiler flags for BLKID, overriding pkg-config
+ BLKID_LIBS linker flags for BLKID, overriding pkg-config
+ UUID_CFLAGS C compiler flags for UUID, overriding pkg-config
+ UUID_LIBS linker flags for UUID, overriding pkg-config
+ ZLIB_CFLAGS C compiler flags for ZLIB, overriding pkg-config
+ ZLIB_LIBS linker flags for ZLIB, overriding pkg-config
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+Report bugs to <linux-btrfs@vger.kernel.org>.
+btrfs-progs home page: <http://btrfs.wiki.kernel.org>.
+_ACEOF
+ac_status=$?
+fi
+
+if test "$ac_init_help" = "recursive"; then
+ # If there are subdirs, report their specific --help.
+ for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+ test -d "$ac_dir" ||
+ { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } ||
+ continue
+ ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+ ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+ # A ".." for each directory in $ac_dir_suffix.
+ ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+ case $ac_top_builddir_sub in
+ "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+ *) ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+ esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+ .) # We are building in place.
+ ac_srcdir=.
+ ac_top_srcdir=$ac_top_builddir_sub
+ ac_abs_top_srcdir=$ac_pwd ;;
+ [\\/]* | ?:[\\/]* ) # Absolute name.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir
+ ac_abs_top_srcdir=$srcdir ;;
+ *) # Relative name.
+ ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_build_prefix$srcdir
+ ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+ cd "$ac_dir" || { ac_status=$?; continue; }
+ # Check for guested configure.
+ if test -f "$ac_srcdir/configure.gnu"; then
+ echo &&
+ $SHELL "$ac_srcdir/configure.gnu" --help=recursive
+ elif test -f "$ac_srcdir/configure"; then
+ echo &&
+ $SHELL "$ac_srcdir/configure" --help=recursive
+ else
+ $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+ fi || ac_status=$?
+ cd "$ac_pwd" || { ac_status=$?; break; }
+ done
+fi
+
+test -n "$ac_init_help" && exit $ac_status
+if $ac_init_version; then
+ cat <<\_ACEOF
+btrfs-progs configure v4.4.1
+generated by GNU Autoconf 2.69
+
+Copyright (C) 2012 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+ exit
+fi
+
+## ------------------------ ##
+## Autoconf initialization. ##
+## ------------------------ ##
+
+# ac_fn_c_try_compile LINENO
+# --------------------------
+# Try to compile conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_compile ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ rm -f conftest.$ac_objext
+ if { { ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_compile") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ grep -v '^ *+' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ mv -f conftest.er1 conftest.err
+ fi
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then :
+ ac_retval=0
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_retval=1
+fi
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+ as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_compile
+
+# ac_fn_c_try_cpp LINENO
+# ----------------------
+# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_cpp ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ if { { ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ grep -v '^ *+' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ mv -f conftest.er1 conftest.err
+ fi
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; } > conftest.i && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then :
+ ac_retval=0
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_retval=1
+fi
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+ as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_cpp
+
+# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists, giving a warning if it cannot be compiled using
+# the include files in INCLUDES and setting the cache variable VAR
+# accordingly.
+ac_fn_c_check_header_mongrel ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ if eval \${$3+:} false; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+ $as_echo_n "(cached) " >&6
+fi
+eval ac_res=\$$3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+else
+ # Is the header compilable?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
+$as_echo_n "checking $2 usability... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_header_compiler=yes
+else
+ ac_header_compiler=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
+$as_echo_n "checking $2 presence... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <$2>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+ ac_header_preproc=yes
+else
+ ac_header_preproc=no
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
+
+# So? What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
+ yes:no: )
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+ ;;
+ no:yes:* )
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
+$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
+$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+( $as_echo "## ------------------------------------------ ##
+## Report this to linux-btrfs@vger.kernel.org ##
+## ------------------------------------------ ##"
+ ) | sed "s/^/$as_me: WARNING: /" >&2
+ ;;
+esac
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ eval "$3=\$ac_header_compiler"
+fi
+eval ac_res=\$$3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+fi
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_mongrel
+
+# ac_fn_c_try_run LINENO
+# ----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
+# that executables *can* be run.
+ac_fn_c_try_run ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ if { { ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
+ { { case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+ ac_retval=0
+else
+ $as_echo "$as_me: program exited with status $ac_status" >&5
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_retval=$ac_status
+fi
+ rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+ as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_run
+
+# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists and can be compiled using the include files in
+# INCLUDES, setting the cache variable VAR accordingly.
+ac_fn_c_check_header_compile ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ eval "$3=yes"
+else
+ eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_compile
+
+# ac_fn_c_try_link LINENO
+# -----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_link ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ rm -f conftest.$ac_objext conftest$ac_exeext
+ if { { ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_link") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ grep -v '^ *+' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ mv -f conftest.er1 conftest.err
+ fi
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ test -x conftest$ac_exeext
+ }; then :
+ ac_retval=0
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_retval=1
+fi
+ # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
+ # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
+ # interfere with the next link command; also delete a directory that is
+ # left behind by Apple's compiler. We do this before executing the actions.
+ rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+ as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_link
+
+# ac_fn_c_check_func LINENO FUNC VAR
+# ----------------------------------
+# Tests whether FUNC exists, setting the cache variable VAR accordingly
+ac_fn_c_check_func ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+/* Define $2 to an innocuous variant, in case <limits.h> declares $2.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $2 innocuous_$2
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $2 (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $2
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $2 ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$2 || defined __stub___$2
+choke me
+#endif
+
+int
+main ()
+{
+return $2 ();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ eval "$3=yes"
+else
+ eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_func
+cat >config.log <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by btrfs-progs $as_me v4.4.1, which was
+generated by GNU Autoconf 2.69. Invocation command line was
+
+ $ $0 $@
+
+_ACEOF
+exec 5>>config.log
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown`
+
+/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown`
+/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown`
+/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown`
+/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown`
+/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ $as_echo "PATH: $as_dir"
+ done
+IFS=$as_save_IFS
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+ for ac_arg
+ do
+ case $ac_arg in
+ -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ continue ;;
+ *\'*)
+ ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+ esac
+ case $ac_pass in
+ 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;;
+ 2)
+ as_fn_append ac_configure_args1 " '$ac_arg'"
+ if test $ac_must_keep_next = true; then
+ ac_must_keep_next=false # Got value, back to normal.
+ else
+ case $ac_arg in
+ *=* | --config-cache | -C | -disable-* | --disable-* \
+ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+ | -with-* | --with-* | -without-* | --without-* | --x)
+ case "$ac_configure_args0 " in
+ "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+ esac
+ ;;
+ -* ) ac_must_keep_next=true ;;
+ esac
+ fi
+ as_fn_append ac_configure_args " '$ac_arg'"
+ ;;
+ esac
+ done
+done
+{ ac_configure_args0=; unset ac_configure_args0;}
+{ ac_configure_args1=; unset ac_configure_args1;}
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log. We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Use '\'' to represent an apostrophe within the trap.
+# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.
+trap 'exit_status=$?
+ # Save into config.log some information that might help in debugging.
+ {
+ echo
+
+ $as_echo "## ---------------- ##
+## Cache variables. ##
+## ---------------- ##"
+ echo
+ # The following way of writing the cache mishandles newlines in values,
+(
+ for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do
+ eval ac_val=\$$ac_var
+ case $ac_val in #(
+ *${as_nl}*)
+ case $ac_var in #(
+ *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+ esac
+ case $ac_var in #(
+ _ | IFS | as_nl) ;; #(
+ BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+ *) { eval $ac_var=; unset $ac_var;} ;;
+ esac ;;
+ esac
+ done
+ (set) 2>&1 |
+ case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #(
+ *${as_nl}ac_space=\ *)
+ sed -n \
+ "s/'\''/'\''\\\\'\'''\''/g;
+ s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p"
+ ;; #(
+ *)
+ sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+ ;;
+ esac |
+ sort
+)
+ echo
+
+ $as_echo "## ----------------- ##
+## Output variables. ##
+## ----------------- ##"
+ echo
+ for ac_var in $ac_subst_vars
+ do
+ eval ac_val=\$$ac_var
+ case $ac_val in
+ *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+ esac
+ $as_echo "$ac_var='\''$ac_val'\''"
+ done | sort
+ echo
+
+ if test -n "$ac_subst_files"; then
+ $as_echo "## ------------------- ##
+## File substitutions. ##
+## ------------------- ##"
+ echo
+ for ac_var in $ac_subst_files
+ do
+ eval ac_val=\$$ac_var
+ case $ac_val in
+ *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+ esac
+ $as_echo "$ac_var='\''$ac_val'\''"
+ done | sort
+ echo
+ fi
+
+ if test -s confdefs.h; then
+ $as_echo "## ----------- ##
+## confdefs.h. ##
+## ----------- ##"
+ echo
+ cat confdefs.h
+ echo
+ fi
+ test "$ac_signal" != 0 &&
+ $as_echo "$as_me: caught signal $ac_signal"
+ $as_echo "$as_me: exit $exit_status"
+ } >&5
+ rm -f core *.core core.conftest.* &&
+ rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&
+ exit $exit_status
+' 0
+for ac_signal in 1 2 13 15; do
+ trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -f -r conftest* confdefs.h
+
+$as_echo "/* confdefs.h */" > confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_URL "$PACKAGE_URL"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer an explicitly selected file to automatically selected ones.
+ac_site_file1=NONE
+ac_site_file2=NONE
+if test -n "$CONFIG_SITE"; then
+ # We do not want a PATH search for config.site.
+ case $CONFIG_SITE in #((
+ -*) ac_site_file1=./$CONFIG_SITE;;
+ */*) ac_site_file1=$CONFIG_SITE;;
+ *) ac_site_file1=./$CONFIG_SITE;;
+ esac
+elif test "x$prefix" != xNONE; then
+ ac_site_file1=$prefix/share/config.site
+ ac_site_file2=$prefix/etc/config.site
+else
+ ac_site_file1=$ac_default_prefix/share/config.site
+ ac_site_file2=$ac_default_prefix/etc/config.site
+fi
+for ac_site_file in "$ac_site_file1" "$ac_site_file2"
+do
+ test "x$ac_site_file" = xNONE && continue
+ if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
+$as_echo "$as_me: loading site script $ac_site_file" >&6;}
+ sed 's/^/| /' "$ac_site_file" >&5
+ . "$ac_site_file" \
+ || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "failed to load site script $ac_site_file
+See \`config.log' for more details" "$LINENO" 5; }
+ fi
+done
+
+if test -r "$cache_file"; then
+ # Some versions of bash will fail to source /dev/null (special files
+ # actually), so we avoid doing that. DJGPP emulates it as a regular file.
+ if test /dev/null != "$cache_file" && test -f "$cache_file"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5
+$as_echo "$as_me: loading cache $cache_file" >&6;}
+ case $cache_file in
+ [\\/]* | ?:[\\/]* ) . "$cache_file";;
+ *) . "./$cache_file";;
+ esac
+ fi
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5
+$as_echo "$as_me: creating cache $cache_file" >&6;}
+ >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in $ac_precious_vars; do
+ eval ac_old_set=\$ac_cv_env_${ac_var}_set
+ eval ac_new_set=\$ac_env_${ac_var}_set
+ eval ac_old_val=\$ac_cv_env_${ac_var}_value
+ eval ac_new_val=\$ac_env_${ac_var}_value
+ case $ac_old_set,$ac_new_set in
+ set,)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+ ac_cache_corrupted=: ;;
+ ,set)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+ ac_cache_corrupted=: ;;
+ ,);;
+ *)
+ if test "x$ac_old_val" != "x$ac_new_val"; then
+ # differences in whitespace do not lead to failure.
+ ac_old_val_w=`echo x $ac_old_val`
+ ac_new_val_w=`echo x $ac_new_val`
+ if test "$ac_old_val_w" != "$ac_new_val_w"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5
+$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+ ac_cache_corrupted=:
+ else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5
+$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;}
+ eval $ac_var=\$ac_old_val
+ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5
+$as_echo "$as_me: former value: \`$ac_old_val'" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5
+$as_echo "$as_me: current value: \`$ac_new_val'" >&2;}
+ fi;;
+ esac
+ # Pass precious variables to config.status.
+ if test "$ac_new_set" = set; then
+ case $ac_new_val in
+ *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+ *) ac_arg=$ac_var=$ac_new_val ;;
+ esac
+ case " $ac_configure_args " in
+ *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy.
+ *) as_fn_append ac_configure_args " '$ac_arg'" ;;
+ esac
+ fi
+done
+if $ac_cache_corrupted; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5
+$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+ as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
+fi
+## -------------------- ##
+## Main body of script. ##
+## -------------------- ##
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+if test "x$PACKAGE_URL" = "x"; then
+
+$as_echo "#define PACKAGE_URL \"http://btrfs.wiki.kernel.org\"" >>confdefs.h
+
+fi
+
+LIBBTRFS_MAJOR=0
+LIBBTRFS_MINOR=1
+LIBBTRFS_PATCHLEVEL=1
+
+CFLAGS=${CFLAGS:-"-g -O1 -Wall -D_FORTIFY_SOURCE=2"}
+
+
+
+
+ac_aux_dir=
+for ac_dir in config "$srcdir"/config; do
+ if test -f "$ac_dir/install-sh"; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install-sh -c"
+ break
+ elif test -f "$ac_dir/install.sh"; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install.sh -c"
+ break
+ elif test -f "$ac_dir/shtool"; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/shtool install -c"
+ break
+ fi
+done
+if test -z "$ac_aux_dir"; then
+ as_fn_error $? "cannot find install-sh, install.sh, or shtool in config \"$srcdir\"/config" "$LINENO" 5
+fi
+
+# These three variables are undocumented and unsupported,
+# and are intended to be withdrawn in a future Autoconf release.
+# They can cause serious problems if a builder's source tree is in a directory
+# whose full name contains unusual characters.
+ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var.
+ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var.
+ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var.
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="${ac_tool_prefix}gcc"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+ ac_ct_CC=$CC
+ # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="gcc"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+ if test "x$ac_ct_CC" = x; then
+ CC=""
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ CC=$ac_ct_CC
+ fi
+else
+ CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="${ac_tool_prefix}cc"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ fi
+fi
+if test -z "$CC"; then
+ # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+ ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+ ac_prog_rejected=yes
+ continue
+ fi
+ ac_cv_prog_CC="cc"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+ # We found a bogon in the path, so make sure we never use it.
+ set dummy $ac_cv_prog_CC
+ shift
+ if test $# != 0; then
+ # We chose a different compiler from the bogus one.
+ # However, it has the same basename, so the bogon will be chosen
+ # first if we set CC to just the basename; use the full file name.
+ shift
+ ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
+ fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ for ac_prog in cl.exe
+ do
+ # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ test -n "$CC" && break
+ done
+fi
+if test -z "$CC"; then
+ ac_ct_CC=$CC
+ for ac_prog in cl.exe
+do
+ # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="$ac_prog"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ test -n "$ac_ct_CC" && break
+done
+
+ if test "x$ac_ct_CC" = x; then
+ CC=""
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ CC=$ac_ct_CC
+ fi
+fi
+
+fi
+
+
+test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "no acceptable C compiler found in \$PATH
+See \`config.log' for more details" "$LINENO" 5; }
+
+# Provide some information about the compiler.
+$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
+set X $ac_compile
+ac_compiler=$2
+for ac_option in --version -v -V -qversion; do
+ { { ac_try="$ac_compiler $ac_option >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ sed '10a\
+... rest of stderr output deleted ...
+ 10q' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ fi
+ rm -f conftest.er1 conftest.err
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }
+done
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5
+$as_echo_n "checking whether the C compiler works... " >&6; }
+ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+
+# The possible output files:
+ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*"
+
+ac_rmfiles=
+for ac_file in $ac_files
+do
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+ * ) ac_rmfiles="$ac_rmfiles $ac_file";;
+ esac
+done
+rm -f $ac_rmfiles
+
+if { { ac_try="$ac_link_default"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_link_default") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then :
+ # Autoconf-2.13 could set the ac_cv_exeext variable to `no'.
+# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'
+# in a Makefile. We should not override ac_cv_exeext if it was cached,
+# so that the user can short-circuit this test for compilers unknown to
+# Autoconf.
+for ac_file in $ac_files ''
+do
+ test -f "$ac_file" || continue
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj )
+ ;;
+ [ab].out )
+ # We found the default executable, but exeext='' is most
+ # certainly right.
+ break;;
+ *.* )
+ if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
+ then :; else
+ ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+ fi
+ # We set ac_cv_exeext here because the later test for it is not
+ # safe: cross compilers may not add the suffix if given an `-o'
+ # argument, so we may need to know it at that point already.
+ # Even if this section looks crufty: it has the advantage of
+ # actually working.
+ break;;
+ * )
+ break;;
+ esac
+done
+test "$ac_cv_exeext" = no && ac_cv_exeext=
+
+else
+ ac_file=''
+fi
+if test -z "$ac_file"; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+$as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "C compiler cannot create executables
+See \`config.log' for more details" "$LINENO" 5; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5
+$as_echo_n "checking for C compiler default output file name... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5
+$as_echo "$ac_file" >&6; }
+ac_exeext=$ac_cv_exeext
+
+rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out
+ac_clean_files=$ac_clean_files_save
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5
+$as_echo_n "checking for suffix of executables... " >&6; }
+if { { ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then :
+ # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in conftest.exe conftest conftest.*; do
+ test -f "$ac_file" || continue
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+ *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+ break;;
+ * ) break;;
+ esac
+done
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+rm -f conftest conftest$ac_cv_exeext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
+$as_echo "$ac_cv_exeext" >&6; }
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdio.h>
+int
+main ()
+{
+FILE *f = fopen ("conftest.out", "w");
+ return ferror (f) || fclose (f) != 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+ac_clean_files="$ac_clean_files conftest.out"
+# Check that the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5
+$as_echo_n "checking whether we are cross compiling... " >&6; }
+if test "$cross_compiling" != yes; then
+ { { ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }
+ if { ac_try='./conftest$ac_cv_exeext'
+ { { case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then
+ cross_compiling=no
+ else
+ if test "$cross_compiling" = maybe; then
+ cross_compiling=yes
+ else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details" "$LINENO" 5; }
+ fi
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5
+$as_echo "$cross_compiling" >&6; }
+
+rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out
+ac_clean_files=$ac_clean_files_save
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5
+$as_echo_n "checking for suffix of object files... " >&6; }
+if ${ac_cv_objext+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { { ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_compile") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then :
+ for ac_file in conftest.o conftest.obj conftest.*; do
+ test -f "$ac_file" || continue;
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;;
+ *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+ break;;
+ esac
+done
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot compute suffix of object files: cannot compile
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5
+$as_echo "$ac_cv_objext" >&6; }
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
+$as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
+if ${ac_cv_c_compiler_gnu+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+#ifndef __GNUC__
+ choke me
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_compiler_gnu=yes
+else
+ ac_compiler_gnu=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5
+$as_echo "$ac_cv_c_compiler_gnu" >&6; }
+if test $ac_compiler_gnu = yes; then
+ GCC=yes
+else
+ GCC=
+fi
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
+$as_echo_n "checking whether $CC accepts -g... " >&6; }
+if ${ac_cv_prog_cc_g+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_save_c_werror_flag=$ac_c_werror_flag
+ ac_c_werror_flag=yes
+ ac_cv_prog_cc_g=no
+ CFLAGS="-g"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_prog_cc_g=yes
+else
+ CFLAGS=""
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+else
+ ac_c_werror_flag=$ac_save_c_werror_flag
+ CFLAGS="-g"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_prog_cc_g=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ ac_c_werror_flag=$ac_save_c_werror_flag
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5
+$as_echo "$ac_cv_prog_cc_g" >&6; }
+if test "$ac_test_CFLAGS" = set; then
+ CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+ if test "$GCC" = yes; then
+ CFLAGS="-g -O2"
+ else
+ CFLAGS="-g"
+ fi
+else
+ if test "$GCC" = yes; then
+ CFLAGS="-O2"
+ else
+ CFLAGS=
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
+$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
+if ${ac_cv_prog_cc_c89+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdarg.h>
+#include <stdio.h>
+struct stat;
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+ char **p;
+ int i;
+{
+ return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+ char *s;
+ va_list v;
+ va_start (v,p);
+ s = g (p, va_arg (v,int));
+ va_end (v);
+ return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has
+ function prototypes and stuff, but not '\xHH' hex character constants.
+ These don't provoke an error unfortunately, instead are silently treated
+ as 'x'. The following induces an error, until -std is added to get
+ proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an
+ array size at least. It's necessary to write '\x00'==0 to get something
+ that's true only with -std. */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+ inside strings and character constants. */
+#define FOO(x) 'x'
+int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1];
+ ;
+ return 0;
+}
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
+ -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+ CC="$ac_save_CC $ac_arg"
+ if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_prog_cc_c89=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext
+ test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c89" in
+ x)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+$as_echo "none needed" >&6; } ;;
+ xno)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;;
+ *)
+ CC="$CC $ac_cv_prog_cc_c89"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
+esac
+if test "x$ac_cv_prog_cc_c89" != xno; then :
+
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5
+$as_echo_n "checking how to run the C preprocessor... " >&6; }
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+ CPP=
+fi
+if test -z "$CPP"; then
+ if ${ac_cv_prog_CPP+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ # Double quotes because CPP needs to be expanded
+ for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
+ do
+ ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+ # Use a header file that comes with gcc, so configuring glibc
+ # with a fresh cross-compiler works.
+ # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ # <limits.h> exists even on freestanding compilers.
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp. "Syntax error" is here to catch this case.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+ Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+ # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+ # OK, works on sane cases. Now check whether nonexistent headers
+ # can be detected and how.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+ # Broken: success on invalid input.
+continue
+else
+ # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+ break
+fi
+
+ done
+ ac_cv_prog_CPP=$CPP
+
+fi
+ CPP=$ac_cv_prog_CPP
+else
+ ac_cv_prog_CPP=$CPP
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5
+$as_echo "$CPP" >&6; }
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+ # Use a header file that comes with gcc, so configuring glibc
+ # with a fresh cross-compiler works.
+ # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ # <limits.h> exists even on freestanding compilers.
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp. "Syntax error" is here to catch this case.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+ Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+ # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+ # OK, works on sane cases. Now check whether nonexistent headers
+ # can be detected and how.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+ # Broken: success on invalid input.
+continue
+else
+ # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
+$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
+if ${ac_cv_path_GREP+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -z "$GREP"; then
+ ac_path_GREP_found=false
+ # Loop through the user's path and test for each of PROGNAME-LIST
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in grep ggrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+ as_fn_executable_p "$ac_path_GREP" || continue
+# Check for GNU ac_path_GREP and select it if it is found.
+ # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+ ac_count=0
+ $as_echo_n 0123456789 >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ $as_echo 'GREP' >> "conftest.nl"
+ "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ as_fn_arith $ac_count + 1 && ac_count=$as_val
+ if test $ac_count -gt ${ac_path_GREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_GREP="$ac_path_GREP"
+ ac_path_GREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+ $ac_path_GREP_found && break 3
+ done
+ done
+ done
+IFS=$as_save_IFS
+ if test -z "$ac_cv_path_GREP"; then
+ as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ fi
+else
+ ac_cv_path_GREP=$GREP
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+$as_echo "$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
+$as_echo_n "checking for egrep... " >&6; }
+if ${ac_cv_path_EGREP+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+ then ac_cv_path_EGREP="$GREP -E"
+ else
+ if test -z "$EGREP"; then
+ ac_path_EGREP_found=false
+ # Loop through the user's path and test for each of PROGNAME-LIST
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in egrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+ as_fn_executable_p "$ac_path_EGREP" || continue
+# Check for GNU ac_path_EGREP and select it if it is found.
+ # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+ ac_count=0
+ $as_echo_n 0123456789 >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ $as_echo 'EGREP' >> "conftest.nl"
+ "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ as_fn_arith $ac_count + 1 && ac_count=$as_val
+ if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_EGREP="$ac_path_EGREP"
+ ac_path_EGREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+ $ac_path_EGREP_found && break 3
+ done
+ done
+ done
+IFS=$as_save_IFS
+ if test -z "$ac_cv_path_EGREP"; then
+ as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ fi
+else
+ ac_cv_path_EGREP=$EGREP
+fi
+
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
+$as_echo "$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
+$as_echo_n "checking for ANSI C header files... " >&6; }
+if ${ac_cv_header_stdc+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_header_stdc=yes
+else
+ ac_cv_header_stdc=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+ # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "memchr" >/dev/null 2>&1; then :
+
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "free" >/dev/null 2>&1; then :
+
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+ if test "$cross_compiling" = yes; then :
+ :
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <ctype.h>
+#include <stdlib.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+ (('a' <= (c) && (c) <= 'i') \
+ || ('j' <= (c) && (c) <= 'r') \
+ || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ if (XOR (islower (i), ISLOWER (i))
+ || toupper (i) != TOUPPER (i))
+ return 2;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+
+else
+ ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+ conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
+$as_echo "$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+$as_echo "#define STDC_HEADERS 1" >>confdefs.h
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+ inttypes.h stdint.h unistd.h
+do :
+ as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
+ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
+"
+if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
+ cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+ ac_fn_c_check_header_mongrel "$LINENO" "minix/config.h" "ac_cv_header_minix_config_h" "$ac_includes_default"
+if test "x$ac_cv_header_minix_config_h" = xyes; then :
+ MINIX=yes
+else
+ MINIX=
+fi
+
+
+ if test "$MINIX" = yes; then
+
+$as_echo "#define _POSIX_SOURCE 1" >>confdefs.h
+
+
+$as_echo "#define _POSIX_1_SOURCE 2" >>confdefs.h
+
+
+$as_echo "#define _MINIX 1" >>confdefs.h
+
+ fi
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether it is safe to define __EXTENSIONS__" >&5
+$as_echo_n "checking whether it is safe to define __EXTENSIONS__... " >&6; }
+if ${ac_cv_safe_to_define___extensions__+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+# define __EXTENSIONS__ 1
+ $ac_includes_default
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_safe_to_define___extensions__=yes
+else
+ ac_cv_safe_to_define___extensions__=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_safe_to_define___extensions__" >&5
+$as_echo "$ac_cv_safe_to_define___extensions__" >&6; }
+ test $ac_cv_safe_to_define___extensions__ = yes &&
+ $as_echo "#define __EXTENSIONS__ 1" >>confdefs.h
+
+ $as_echo "#define _ALL_SOURCE 1" >>confdefs.h
+
+ $as_echo "#define _GNU_SOURCE 1" >>confdefs.h
+
+ $as_echo "#define _POSIX_PTHREAD_SEMANTICS 1" >>confdefs.h
+
+ $as_echo "#define _TANDEM_SOURCE 1" >>confdefs.h
+
+
+
+
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="${ac_tool_prefix}gcc"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+ ac_ct_CC=$CC
+ # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="gcc"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+ if test "x$ac_ct_CC" = x; then
+ CC=""
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ CC=$ac_ct_CC
+ fi
+else
+ CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="${ac_tool_prefix}cc"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ fi
+fi
+if test -z "$CC"; then
+ # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+ ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+ ac_prog_rejected=yes
+ continue
+ fi
+ ac_cv_prog_CC="cc"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+ # We found a bogon in the path, so make sure we never use it.
+ set dummy $ac_cv_prog_CC
+ shift
+ if test $# != 0; then
+ # We chose a different compiler from the bogus one.
+ # However, it has the same basename, so the bogon will be chosen
+ # first if we set CC to just the basename; use the full file name.
+ shift
+ ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
+ fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ for ac_prog in cl.exe
+ do
+ # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ test -n "$CC" && break
+ done
+fi
+if test -z "$CC"; then
+ ac_ct_CC=$CC
+ for ac_prog in cl.exe
+do
+ # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="$ac_prog"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ test -n "$ac_ct_CC" && break
+done
+
+ if test "x$ac_ct_CC" = x; then
+ CC=""
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ CC=$ac_ct_CC
+ fi
+fi
+
+fi
+
+
+test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "no acceptable C compiler found in \$PATH
+See \`config.log' for more details" "$LINENO" 5; }
+
+# Provide some information about the compiler.
+$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
+set X $ac_compile
+ac_compiler=$2
+for ac_option in --version -v -V -qversion; do
+ { { ac_try="$ac_compiler $ac_option >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ sed '10a\
+... rest of stderr output deleted ...
+ 10q' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ fi
+ rm -f conftest.er1 conftest.err
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }
+done
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
+$as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
+if ${ac_cv_c_compiler_gnu+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+#ifndef __GNUC__
+ choke me
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_compiler_gnu=yes
+else
+ ac_compiler_gnu=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5
+$as_echo "$ac_cv_c_compiler_gnu" >&6; }
+if test $ac_compiler_gnu = yes; then
+ GCC=yes
+else
+ GCC=
+fi
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
+$as_echo_n "checking whether $CC accepts -g... " >&6; }
+if ${ac_cv_prog_cc_g+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_save_c_werror_flag=$ac_c_werror_flag
+ ac_c_werror_flag=yes
+ ac_cv_prog_cc_g=no
+ CFLAGS="-g"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_prog_cc_g=yes
+else
+ CFLAGS=""
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+else
+ ac_c_werror_flag=$ac_save_c_werror_flag
+ CFLAGS="-g"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_prog_cc_g=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ ac_c_werror_flag=$ac_save_c_werror_flag
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5
+$as_echo "$ac_cv_prog_cc_g" >&6; }
+if test "$ac_test_CFLAGS" = set; then
+ CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+ if test "$GCC" = yes; then
+ CFLAGS="-g -O2"
+ else
+ CFLAGS="-g"
+ fi
+else
+ if test "$GCC" = yes; then
+ CFLAGS="-O2"
+ else
+ CFLAGS=
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
+$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
+if ${ac_cv_prog_cc_c89+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdarg.h>
+#include <stdio.h>
+struct stat;
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+ char **p;
+ int i;
+{
+ return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+ char *s;
+ va_list v;
+ va_start (v,p);
+ s = g (p, va_arg (v,int));
+ va_end (v);
+ return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has
+ function prototypes and stuff, but not '\xHH' hex character constants.
+ These don't provoke an error unfortunately, instead are silently treated
+ as 'x'. The following induces an error, until -std is added to get
+ proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an
+ array size at least. It's necessary to write '\x00'==0 to get something
+ that's true only with -std. */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+ inside strings and character constants. */
+#define FOO(x) 'x'
+int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1];
+ ;
+ return 0;
+}
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
+ -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+ CC="$ac_save_CC $ac_arg"
+ if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_prog_cc_c89=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext
+ test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c89" in
+ x)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+$as_echo "none needed" >&6; } ;;
+ xno)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;;
+ *)
+ CC="$CC $ac_cv_prog_cc_c89"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
+esac
+if test "x$ac_cv_prog_cc_c89" != xno; then :
+
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+# Make sure we can run config.sub.
+$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 ||
+ as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5
+$as_echo_n "checking build system type... " >&6; }
+if ${ac_cv_build+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_build_alias=$build_alias
+test "x$ac_build_alias" = x &&
+ ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"`
+test "x$ac_build_alias" = x &&
+ as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5
+ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` ||
+ as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5
+$as_echo "$ac_cv_build" >&6; }
+case $ac_cv_build in
+*-*-*) ;;
+*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;;
+esac
+build=$ac_cv_build
+ac_save_IFS=$IFS; IFS='-'
+set x $ac_cv_build
+shift
+build_cpu=$1
+build_vendor=$2
+shift; shift
+# Remember, the first character of IFS is used to create $*,
+# except with old shells:
+build_os=$*
+IFS=$ac_save_IFS
+case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5
+$as_echo_n "checking host system type... " >&6; }
+if ${ac_cv_host+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test "x$host_alias" = x; then
+ ac_cv_host=$ac_cv_build
+else
+ ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` ||
+ as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5
+$as_echo "$ac_cv_host" >&6; }
+case $ac_cv_host in
+*-*-*) ;;
+*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;;
+esac
+host=$ac_cv_host
+ac_save_IFS=$IFS; IFS='-'
+set x $ac_cv_host
+shift
+host_cpu=$1
+host_vendor=$2
+shift; shift
+# Remember, the first character of IFS is used to create $*,
+# except with old shells:
+host_os=$*
+IFS=$ac_save_IFS
+case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for an ANSI C-conforming const" >&5
+$as_echo_n "checking for an ANSI C-conforming const... " >&6; }
+if ${ac_cv_c_const+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+#ifndef __cplusplus
+ /* Ultrix mips cc rejects this sort of thing. */
+ typedef int charset[2];
+ const charset cs = { 0, 0 };
+ /* SunOS 4.1.1 cc rejects this. */
+ char const *const *pcpcc;
+ char **ppc;
+ /* NEC SVR4.0.2 mips cc rejects this. */
+ struct point {int x, y;};
+ static struct point const zero = {0,0};
+ /* AIX XL C 1.02.0.0 rejects this.
+ It does not let you subtract one const X* pointer from another in
+ an arm of an if-expression whose if-part is not a constant
+ expression */
+ const char *g = "string";
+ pcpcc = &g + (g ? g-g : 0);
+ /* HPUX 7.0 cc rejects these. */
+ ++pcpcc;
+ ppc = (char**) pcpcc;
+ pcpcc = (char const *const *) ppc;
+ { /* SCO 3.2v4 cc rejects this sort of thing. */
+ char tx;
+ char *t = &tx;
+ char const *s = 0 ? (char *) 0 : (char const *) 0;
+
+ *t++ = 0;
+ if (s) return 0;
+ }
+ { /* Someone thinks the Sun supposedly-ANSI compiler will reject this. */
+ int x[] = {25, 17};
+ const int *foo = &x[0];
+ ++foo;
+ }
+ { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */
+ typedef const int *iptr;
+ iptr p = 0;
+ ++p;
+ }
+ { /* AIX XL C 1.02.0.0 rejects this sort of thing, saying
+ "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */
+ struct s { int j; const int *ap[3]; } bx;
+ struct s *b = &bx; b->j = 5;
+ }
+ { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */
+ const int foo = 10;
+ if (!foo) return 0;
+ }
+ return !cs[0] && !zero.x;
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_c_const=yes
+else
+ ac_cv_c_const=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_const" >&5
+$as_echo "$ac_cv_c_const" >&6; }
+if test $ac_cv_c_const = no; then
+
+$as_echo "#define const /**/" >>confdefs.h
+
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working volatile" >&5
+$as_echo_n "checking for working volatile... " >&6; }
+if ${ac_cv_c_volatile+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+volatile int x;
+int * volatile y = (int *) 0;
+return !x && !y;
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_c_volatile=yes
+else
+ ac_cv_c_volatile=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_volatile" >&5
+$as_echo "$ac_cv_c_volatile" >&6; }
+if test $ac_cv_c_volatile = no; then
+
+$as_echo "#define volatile /**/" >>confdefs.h
+
+fi
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
+$as_echo_n "checking whether byte ordering is bigendian... " >&6; }
+if ${ac_cv_c_bigendian+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_cv_c_bigendian=unknown
+ # See if we're dealing with a universal compiler.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#ifndef __APPLE_CC__
+ not a universal capable compiler
+ #endif
+ typedef int dummy;
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+ # Check for potential -arch flags. It is not universal unless
+ # there are at least two -arch flags with different values.
+ ac_arch=
+ ac_prev=
+ for ac_word in $CC $CFLAGS $CPPFLAGS $LDFLAGS; do
+ if test -n "$ac_prev"; then
+ case $ac_word in
+ i?86 | x86_64 | ppc | ppc64)
+ if test -z "$ac_arch" || test "$ac_arch" = "$ac_word"; then
+ ac_arch=$ac_word
+ else
+ ac_cv_c_bigendian=universal
+ break
+ fi
+ ;;
+ esac
+ ac_prev=
+ elif test "x$ac_word" = "x-arch"; then
+ ac_prev=arch
+ fi
+ done
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ if test $ac_cv_c_bigendian = unknown; then
+ # See if sys/param.h defines the BYTE_ORDER macro.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <sys/types.h>
+ #include <sys/param.h>
+
+int
+main ()
+{
+#if ! (defined BYTE_ORDER && defined BIG_ENDIAN \
+ && defined LITTLE_ENDIAN && BYTE_ORDER && BIG_ENDIAN \
+ && LITTLE_ENDIAN)
+ bogus endian macros
+ #endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ # It does; now see whether it defined to BIG_ENDIAN or not.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <sys/types.h>
+ #include <sys/param.h>
+
+int
+main ()
+{
+#if BYTE_ORDER != BIG_ENDIAN
+ not big endian
+ #endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_c_bigendian=yes
+else
+ ac_cv_c_bigendian=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ fi
+ if test $ac_cv_c_bigendian = unknown; then
+ # See if <limits.h> defines _LITTLE_ENDIAN or _BIG_ENDIAN (e.g., Solaris).
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <limits.h>
+
+int
+main ()
+{
+#if ! (defined _LITTLE_ENDIAN || defined _BIG_ENDIAN)
+ bogus endian macros
+ #endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ # It does; now see whether it defined to _BIG_ENDIAN or not.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <limits.h>
+
+int
+main ()
+{
+#ifndef _BIG_ENDIAN
+ not big endian
+ #endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_c_bigendian=yes
+else
+ ac_cv_c_bigendian=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ fi
+ if test $ac_cv_c_bigendian = unknown; then
+ # Compile a test program.
+ if test "$cross_compiling" = yes; then :
+ # Try to guess by grepping values from an object file.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+short int ascii_mm[] =
+ { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 };
+ short int ascii_ii[] =
+ { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 };
+ int use_ascii (int i) {
+ return ascii_mm[i] + ascii_ii[i];
+ }
+ short int ebcdic_ii[] =
+ { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 };
+ short int ebcdic_mm[] =
+ { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 };
+ int use_ebcdic (int i) {
+ return ebcdic_mm[i] + ebcdic_ii[i];
+ }
+ extern int foo;
+
+int
+main ()
+{
+return use_ascii (foo) == use_ebcdic (foo);
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ if grep BIGenDianSyS conftest.$ac_objext >/dev/null; then
+ ac_cv_c_bigendian=yes
+ fi
+ if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then
+ if test "$ac_cv_c_bigendian" = unknown; then
+ ac_cv_c_bigendian=no
+ else
+ # finding both strings is unlikely to happen, but who knows?
+ ac_cv_c_bigendian=unknown
+ fi
+ fi
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+$ac_includes_default
+int
+main ()
+{
+
+ /* Are we little or big endian? From Harbison&Steele. */
+ union
+ {
+ long int l;
+ char c[sizeof (long int)];
+ } u;
+ u.l = 1;
+ return u.c[sizeof (long int) - 1] == 1;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+ ac_cv_c_bigendian=no
+else
+ ac_cv_c_bigendian=yes
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+ conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_bigendian" >&5
+$as_echo "$ac_cv_c_bigendian" >&6; }
+ case $ac_cv_c_bigendian in #(
+ yes)
+ $as_echo "#define WORDS_BIGENDIAN 1" >>confdefs.h
+;; #(
+ no)
+ ;; #(
+ universal)
+
+$as_echo "#define AC_APPLE_UNIVERSAL_BUILD 1" >>confdefs.h
+
+ ;; #(
+ *)
+ as_fn_error $? "unknown endianness
+ presetting ac_cv_c_bigendian=no (or yes) will help" "$LINENO" 5 ;;
+ esac
+
+
+# Check whether --enable-largefile was given.
+if test "${enable_largefile+set}" = set; then :
+ enableval=$enable_largefile;
+fi
+
+if test "$enable_largefile" != no; then
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for special C compiler options needed for large files" >&5
+$as_echo_n "checking for special C compiler options needed for large files... " >&6; }
+if ${ac_cv_sys_largefile_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_cv_sys_largefile_CC=no
+ if test "$GCC" != yes; then
+ ac_save_CC=$CC
+ while :; do
+ # IRIX 6.2 and later do not support large files by default,
+ # so use the C compiler's -n32 option if that helps.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <sys/types.h>
+ /* Check that off_t can represent 2**63 - 1 correctly.
+ We can't simply define LARGE_OFF_T to be 9223372036854775807,
+ since some C++ compilers masquerading as C compilers
+ incorrectly reject 9223372036854775807. */
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+ int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
+ && LARGE_OFF_T % 2147483647 == 1)
+ ? 1 : -1];
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+ if ac_fn_c_try_compile "$LINENO"; then :
+ break
+fi
+rm -f core conftest.err conftest.$ac_objext
+ CC="$CC -n32"
+ if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_sys_largefile_CC=' -n32'; break
+fi
+rm -f core conftest.err conftest.$ac_objext
+ break
+ done
+ CC=$ac_save_CC
+ rm -f conftest.$ac_ext
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_largefile_CC" >&5
+$as_echo "$ac_cv_sys_largefile_CC" >&6; }
+ if test "$ac_cv_sys_largefile_CC" != no; then
+ CC=$CC$ac_cv_sys_largefile_CC
+ fi
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _FILE_OFFSET_BITS value needed for large files" >&5
+$as_echo_n "checking for _FILE_OFFSET_BITS value needed for large files... " >&6; }
+if ${ac_cv_sys_file_offset_bits+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ while :; do
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <sys/types.h>
+ /* Check that off_t can represent 2**63 - 1 correctly.
+ We can't simply define LARGE_OFF_T to be 9223372036854775807,
+ since some C++ compilers masquerading as C compilers
+ incorrectly reject 9223372036854775807. */
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+ int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
+ && LARGE_OFF_T % 2147483647 == 1)
+ ? 1 : -1];
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_sys_file_offset_bits=no; break
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#define _FILE_OFFSET_BITS 64
+#include <sys/types.h>
+ /* Check that off_t can represent 2**63 - 1 correctly.
+ We can't simply define LARGE_OFF_T to be 9223372036854775807,
+ since some C++ compilers masquerading as C compilers
+ incorrectly reject 9223372036854775807. */
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+ int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
+ && LARGE_OFF_T % 2147483647 == 1)
+ ? 1 : -1];
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_sys_file_offset_bits=64; break
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ ac_cv_sys_file_offset_bits=unknown
+ break
+done
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_file_offset_bits" >&5
+$as_echo "$ac_cv_sys_file_offset_bits" >&6; }
+case $ac_cv_sys_file_offset_bits in #(
+ no | unknown) ;;
+ *)
+cat >>confdefs.h <<_ACEOF
+#define _FILE_OFFSET_BITS $ac_cv_sys_file_offset_bits
+_ACEOF
+;;
+esac
+rm -rf conftest*
+ if test $ac_cv_sys_file_offset_bits = unknown; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _LARGE_FILES value needed for large files" >&5
+$as_echo_n "checking for _LARGE_FILES value needed for large files... " >&6; }
+if ${ac_cv_sys_large_files+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ while :; do
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <sys/types.h>
+ /* Check that off_t can represent 2**63 - 1 correctly.
+ We can't simply define LARGE_OFF_T to be 9223372036854775807,
+ since some C++ compilers masquerading as C compilers
+ incorrectly reject 9223372036854775807. */
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+ int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
+ && LARGE_OFF_T % 2147483647 == 1)
+ ? 1 : -1];
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_sys_large_files=no; break
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#define _LARGE_FILES 1
+#include <sys/types.h>
+ /* Check that off_t can represent 2**63 - 1 correctly.
+ We can't simply define LARGE_OFF_T to be 9223372036854775807,
+ since some C++ compilers masquerading as C compilers
+ incorrectly reject 9223372036854775807. */
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+ int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
+ && LARGE_OFF_T % 2147483647 == 1)
+ ? 1 : -1];
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_sys_large_files=1; break
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ ac_cv_sys_large_files=unknown
+ break
+done
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_large_files" >&5
+$as_echo "$ac_cv_sys_large_files" >&6; }
+case $ac_cv_sys_large_files in #(
+ no | unknown) ;;
+ *)
+cat >>confdefs.h <<_ACEOF
+#define _LARGE_FILES $ac_cv_sys_large_files
+_ACEOF
+;;
+esac
+rm -rf conftest*
+ fi
+
+
+fi
+
+
+# Find a good install program. We prefer a C program (faster),
+# so one script is as good as another. But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AmigaOS /C/install, which installs bootblocks on floppy discs
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# OS/2's system install, which has a completely different semantic
+# ./install, which can be erroneously created by make from ./install.sh.
+# Reject install programs that cannot install multiple files.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5
+$as_echo_n "checking for a BSD-compatible install... " >&6; }
+if test -z "$INSTALL"; then
+if ${ac_cv_path_install+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ # Account for people who put trailing slashes in PATH elements.
+case $as_dir/ in #((
+ ./ | .// | /[cC]/* | \
+ /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \
+ ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \
+ /usr/ucb/* ) ;;
+ *)
+ # OSF1 and SCO ODT 3.0 have their own names for install.
+ # Don't use installbsd from OSF since it installs stuff as root
+ # by default.
+ for ac_prog in ginstall scoinst install; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then
+ if test $ac_prog = install &&
+ grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+ # AIX install. It has an incompatible calling convention.
+ :
+ elif test $ac_prog = install &&
+ grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+ # program-specific install script used by HP pwplus--don't use.
+ :
+ else
+ rm -rf conftest.one conftest.two conftest.dir
+ echo one > conftest.one
+ echo two > conftest.two
+ mkdir conftest.dir
+ if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" &&
+ test -s conftest.one && test -s conftest.two &&
+ test -s conftest.dir/conftest.one &&
+ test -s conftest.dir/conftest.two
+ then
+ ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c"
+ break 3
+ fi
+ fi
+ fi
+ done
+ done
+ ;;
+esac
+
+ done
+IFS=$as_save_IFS
+
+rm -rf conftest.one conftest.two conftest.dir
+
+fi
+ if test "${ac_cv_path_install+set}" = set; then
+ INSTALL=$ac_cv_path_install
+ else
+ # As a last resort, use the slow shell script. Don't cache a
+ # value for INSTALL within a source directory, because that will
+ # break other packages using the cache if that directory is
+ # removed, or if the value is a relative name.
+ INSTALL=$ac_install_sh
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5
+$as_echo "$INSTALL" >&6; }
+
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
+
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5
+$as_echo_n "checking whether ln -s works... " >&6; }
+LN_S=$as_ln_s
+if test "$LN_S" = "ln -s"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5
+$as_echo "no, using $LN_S" >&6; }
+fi
+
+if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}ar", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ar; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_AR+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$AR"; then
+ ac_cv_prog_AR="$AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_AR="${ac_tool_prefix}ar"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+AR=$ac_cv_prog_AR
+if test -n "$AR"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5
+$as_echo "$AR" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_AR"; then
+ ac_ct_AR=$AR
+ # Extract the first word of "ar", so it can be a program name with args.
+set dummy ar; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_AR+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$ac_ct_AR"; then
+ ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_AR="ar"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_AR=$ac_cv_prog_ac_ct_AR
+if test -n "$ac_ct_AR"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5
+$as_echo "$ac_ct_AR" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+ if test "x$ac_ct_AR" = x; then
+ AR=""
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ AR=$ac_ct_AR
+ fi
+else
+ AR="$ac_cv_prog_AR"
+fi
+
+# Extract the first word of "rm", so it can be a program name with args.
+set dummy rm; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_RM+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ case $RM in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_RM="$RM" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_path_RM="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+ test -z "$ac_cv_path_RM" && ac_cv_path_RM="rm"
+ ;;
+esac
+fi
+RM=$ac_cv_path_RM
+if test -n "$RM"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RM" >&5
+$as_echo "$RM" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+# Extract the first word of "rmdir", so it can be a program name with args.
+set dummy rmdir; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_RMDIR+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ case $RMDIR in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_RMDIR="$RMDIR" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_path_RMDIR="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+ test -z "$ac_cv_path_RMDIR" && ac_cv_path_RMDIR="rmdir"
+ ;;
+esac
+fi
+RMDIR=$ac_cv_path_RMDIR
+if test -n "$RMDIR"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RMDIR" >&5
+$as_echo "$RMDIR" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+
+for ac_func in openat
+do :
+ ac_fn_c_check_func "$LINENO" "openat" "ac_cv_func_openat"
+if test "x$ac_cv_func_openat" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_OPENAT 1
+_ACEOF
+
+else
+ as_fn_error $? "cannot find openat() function" "$LINENO" 5
+fi
+done
+
+
+
+
+
+
+
+
+
+
+if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args.
+set dummy ${ac_tool_prefix}pkg-config; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_PKG_CONFIG+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ case $PKG_CONFIG in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_path_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+ ;;
+esac
+fi
+PKG_CONFIG=$ac_cv_path_PKG_CONFIG
+if test -n "$PKG_CONFIG"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5
+$as_echo "$PKG_CONFIG" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_path_PKG_CONFIG"; then
+ ac_pt_PKG_CONFIG=$PKG_CONFIG
+ # Extract the first word of "pkg-config", so it can be a program name with args.
+set dummy pkg-config; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_ac_pt_PKG_CONFIG+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ case $ac_pt_PKG_CONFIG in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_path_ac_pt_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+ ;;
+esac
+fi
+ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG
+if test -n "$ac_pt_PKG_CONFIG"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_PKG_CONFIG" >&5
+$as_echo "$ac_pt_PKG_CONFIG" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+ if test "x$ac_pt_PKG_CONFIG" = x; then
+ PKG_CONFIG=""
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ PKG_CONFIG=$ac_pt_PKG_CONFIG
+ fi
+else
+ PKG_CONFIG="$ac_cv_path_PKG_CONFIG"
+fi
+
+fi
+if test -n "$PKG_CONFIG"; then
+ _pkg_min_version=0.9.0
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking pkg-config is at least version $_pkg_min_version" >&5
+$as_echo_n "checking pkg-config is at least version $_pkg_min_version... " >&6; }
+ if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+ else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ PKG_CONFIG=""
+ fi
+fi
+
+
+
+
+
+# Check whether --enable-backtrace was given.
+if test "${enable_backtrace+set}" = set; then :
+ enableval=$enable_backtrace;
+else
+ enable_backtrace=yes
+
+fi
+
+
+if test "x$enable_backtrace" = xno; then :
+
+
+$as_echo "#define BTRFS_DISABLE_BACKTRACE 1" >>confdefs.h
+
+
+fi
+
+if test "x$enable_backtrace" = xyes; then
+ for ac_header in execinfo.h
+do :
+ ac_fn_c_check_header_mongrel "$LINENO" "execinfo.h" "ac_cv_header_execinfo_h" "$ac_includes_default"
+if test "x$ac_cv_header_execinfo_h" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_EXECINFO_H 1
+_ACEOF
+
+fi
+
+done
+
+ for ac_func in backtrace backtrace_symbols_fd
+do :
+ as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
+ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
+if eval test \"x\$"$as_ac_var"\" = x"yes"; then :
+ cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+else
+ as_fn_error $? "standard library does not have backtrace support" "$LINENO" 5
+fi
+done
+
+fi
+
+
+# Check whether --enable-documentation was given.
+if test "${enable_documentation+set}" = set; then :
+ enableval=$enable_documentation;
+else
+ enable_documentation=yes
+
+fi
+
+if test "x$enable_documentation" = xyes; then :
+ DISABLE_DOCUMENTATION=0
+else
+ DISABLE_DOCUMENTATION=1
+fi
+
+
+if test "x$enable_documentation" = xyes; then
+ # Extract the first word of "asciidoc", so it can be a program name with args.
+set dummy asciidoc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_ASCIIDOC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ case $ASCIIDOC in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_ASCIIDOC="$ASCIIDOC" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_path_ASCIIDOC="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+ test -z "$ac_cv_path_ASCIIDOC" && ac_cv_path_ASCIIDOC="asciidoc"
+ ;;
+esac
+fi
+ASCIIDOC=$ac_cv_path_ASCIIDOC
+if test -n "$ASCIIDOC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ASCIIDOC" >&5
+$as_echo "$ASCIIDOC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ # Extract the first word of "xmlto", so it can be a program name with args.
+set dummy xmlto; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_XMLTO+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ case $XMLTO in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_XMLTO="$XMLTO" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_path_XMLTO="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+ test -z "$ac_cv_path_XMLTO" && ac_cv_path_XMLTO="xmlto"
+ ;;
+esac
+fi
+XMLTO=$ac_cv_path_XMLTO
+if test -n "$XMLTO"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $XMLTO" >&5
+$as_echo "$XMLTO" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ # Extract the first word of "gzip", so it can be a program name with args.
+set dummy gzip; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_GZIP+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ case $GZIP in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_GZIP="$GZIP" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_path_GZIP="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+ test -z "$ac_cv_path_GZIP" && ac_cv_path_GZIP="gzip"
+ ;;
+esac
+fi
+GZIP=$ac_cv_path_GZIP
+if test -n "$GZIP"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $GZIP" >&5
+$as_echo "$GZIP" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ # Extract the first word of "mv", so it can be a program name with args.
+set dummy mv; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_MV+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ case $MV in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_MV="$MV" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_path_MV="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+ test -z "$ac_cv_path_MV" && ac_cv_path_MV="mv"
+ ;;
+esac
+fi
+MV=$ac_cv_path_MV
+if test -n "$MV"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MV" >&5
+$as_echo "$MV" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5
+$as_echo_n "checking for a sed that does not truncate output... " >&6; }
+if ${ac_cv_path_SED+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/
+ for ac_i in 1 2 3 4 5 6 7; do
+ ac_script="$ac_script$as_nl$ac_script"
+ done
+ echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed
+ { ac_script=; unset ac_script;}
+ if test -z "$SED"; then
+ ac_path_SED_found=false
+ # Loop through the user's path and test for each of PROGNAME-LIST
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in sed gsed; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_SED="$as_dir/$ac_prog$ac_exec_ext"
+ as_fn_executable_p "$ac_path_SED" || continue
+# Check for GNU ac_path_SED and select it if it is found.
+ # Check for GNU $ac_path_SED
+case `"$ac_path_SED" --version 2>&1` in
+*GNU*)
+ ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;;
+*)
+ ac_count=0
+ $as_echo_n 0123456789 >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ $as_echo '' >> "conftest.nl"
+ "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ as_fn_arith $ac_count + 1 && ac_count=$as_val
+ if test $ac_count -gt ${ac_path_SED_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_SED="$ac_path_SED"
+ ac_path_SED_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+ $ac_path_SED_found && break 3
+ done
+ done
+ done
+IFS=$as_save_IFS
+ if test -z "$ac_cv_path_SED"; then
+ as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5
+ fi
+else
+ ac_cv_path_SED=$SED
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5
+$as_echo "$ac_cv_path_SED" >&6; }
+ SED="$ac_cv_path_SED"
+ rm -f conftest.sed
+
+fi
+
+# Check whether --enable-convert was given.
+if test "${enable_convert+set}" = set; then :
+ enableval=$enable_convert;
+else
+ enable_convert=yes
+
+fi
+
+
+if test "x$enable_convert" = xyes; then :
+ DISABLE_BTRFSCONVERT=0
+else
+ DISABLE_BTRFSCONVERT=1
+fi
+
+
+if test "x$enable_convert" = xyes; then
+
+pkg_failed=no
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for EXT2FS" >&5
+$as_echo_n "checking for EXT2FS... " >&6; }
+
+if test -n "$EXT2FS_CFLAGS"; then
+ pkg_cv_EXT2FS_CFLAGS="$EXT2FS_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"ext2fs\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "ext2fs") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_EXT2FS_CFLAGS=`$PKG_CONFIG --cflags "ext2fs" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+if test -n "$EXT2FS_LIBS"; then
+ pkg_cv_EXT2FS_LIBS="$EXT2FS_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"ext2fs\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "ext2fs") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_EXT2FS_LIBS=`$PKG_CONFIG --libs "ext2fs" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+ _pkg_short_errors_supported=yes
+else
+ _pkg_short_errors_supported=no
+fi
+ if test $_pkg_short_errors_supported = yes; then
+ EXT2FS_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "ext2fs" 2>&1`
+ else
+ EXT2FS_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "ext2fs" 2>&1`
+ fi
+ # Put the nasty error message in config.log where it belongs
+ echo "$EXT2FS_PKG_ERRORS" >&5
+
+ as_fn_error $? "Package requirements (ext2fs) were not met:
+
+$EXT2FS_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+Alternatively, you may set the environment variables EXT2FS_CFLAGS
+and EXT2FS_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details." "$LINENO" 5
+elif test $pkg_failed = untried; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+Alternatively, you may set the environment variables EXT2FS_CFLAGS
+and EXT2FS_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.
+
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.
+See \`config.log' for more details" "$LINENO" 5; }
+else
+ EXT2FS_CFLAGS=$pkg_cv_EXT2FS_CFLAGS
+ EXT2FS_LIBS=$pkg_cv_EXT2FS_LIBS
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+fi
+
+pkg_failed=no
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for COM_ERR" >&5
+$as_echo_n "checking for COM_ERR... " >&6; }
+
+if test -n "$COM_ERR_CFLAGS"; then
+ pkg_cv_COM_ERR_CFLAGS="$COM_ERR_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"com_err\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "com_err") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_COM_ERR_CFLAGS=`$PKG_CONFIG --cflags "com_err" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+if test -n "$COM_ERR_LIBS"; then
+ pkg_cv_COM_ERR_LIBS="$COM_ERR_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"com_err\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "com_err") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_COM_ERR_LIBS=`$PKG_CONFIG --libs "com_err" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+ _pkg_short_errors_supported=yes
+else
+ _pkg_short_errors_supported=no
+fi
+ if test $_pkg_short_errors_supported = yes; then
+ COM_ERR_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "com_err" 2>&1`
+ else
+ COM_ERR_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "com_err" 2>&1`
+ fi
+ # Put the nasty error message in config.log where it belongs
+ echo "$COM_ERR_PKG_ERRORS" >&5
+
+ as_fn_error $? "Package requirements (com_err) were not met:
+
+$COM_ERR_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+Alternatively, you may set the environment variables COM_ERR_CFLAGS
+and COM_ERR_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details." "$LINENO" 5
+elif test $pkg_failed = untried; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+Alternatively, you may set the environment variables COM_ERR_CFLAGS
+and COM_ERR_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.
+
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.
+See \`config.log' for more details" "$LINENO" 5; }
+else
+ COM_ERR_CFLAGS=$pkg_cv_COM_ERR_CFLAGS
+ COM_ERR_LIBS=$pkg_cv_COM_ERR_LIBS
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+fi
+fi
+
+
+
+
+pkg_failed=no
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for BLKID" >&5
+$as_echo_n "checking for BLKID... " >&6; }
+
+if test -n "$BLKID_CFLAGS"; then
+ pkg_cv_BLKID_CFLAGS="$BLKID_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"blkid\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "blkid") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_BLKID_CFLAGS=`$PKG_CONFIG --cflags "blkid" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+if test -n "$BLKID_LIBS"; then
+ pkg_cv_BLKID_LIBS="$BLKID_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"blkid\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "blkid") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_BLKID_LIBS=`$PKG_CONFIG --libs "blkid" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+ _pkg_short_errors_supported=yes
+else
+ _pkg_short_errors_supported=no
+fi
+ if test $_pkg_short_errors_supported = yes; then
+ BLKID_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "blkid" 2>&1`
+ else
+ BLKID_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "blkid" 2>&1`
+ fi
+ # Put the nasty error message in config.log where it belongs
+ echo "$BLKID_PKG_ERRORS" >&5
+
+ as_fn_error $? "Package requirements (blkid) were not met:
+
+$BLKID_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+Alternatively, you may set the environment variables BLKID_CFLAGS
+and BLKID_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details." "$LINENO" 5
+elif test $pkg_failed = untried; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+Alternatively, you may set the environment variables BLKID_CFLAGS
+and BLKID_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.
+
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.
+See \`config.log' for more details" "$LINENO" 5; }
+else
+ BLKID_CFLAGS=$pkg_cv_BLKID_CFLAGS
+ BLKID_LIBS=$pkg_cv_BLKID_LIBS
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+fi
+
+ if { { $as_echo "$as_me:${as_lineno-$LINENO}: pkg-config --exists --print-errors \"blkid\""; } >&5
+ (pkg-config --exists --print-errors "blkid") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ BLKID_LIBS_STATIC=`pkg-config --libs --static "blkid"`
+
+ else
+ as_fn_error $? "pkg-config description of blkid, needed for static build, is not available" "$LINENO" 5
+ fi
+
+
+
+pkg_failed=no
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for UUID" >&5
+$as_echo_n "checking for UUID... " >&6; }
+
+if test -n "$UUID_CFLAGS"; then
+ pkg_cv_UUID_CFLAGS="$UUID_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"uuid\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "uuid") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_UUID_CFLAGS=`$PKG_CONFIG --cflags "uuid" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+if test -n "$UUID_LIBS"; then
+ pkg_cv_UUID_LIBS="$UUID_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"uuid\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "uuid") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_UUID_LIBS=`$PKG_CONFIG --libs "uuid" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+ _pkg_short_errors_supported=yes
+else
+ _pkg_short_errors_supported=no
+fi
+ if test $_pkg_short_errors_supported = yes; then
+ UUID_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "uuid" 2>&1`
+ else
+ UUID_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "uuid" 2>&1`
+ fi
+ # Put the nasty error message in config.log where it belongs
+ echo "$UUID_PKG_ERRORS" >&5
+
+ as_fn_error $? "Package requirements (uuid) were not met:
+
+$UUID_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+Alternatively, you may set the environment variables UUID_CFLAGS
+and UUID_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details." "$LINENO" 5
+elif test $pkg_failed = untried; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+Alternatively, you may set the environment variables UUID_CFLAGS
+and UUID_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.
+
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.
+See \`config.log' for more details" "$LINENO" 5; }
+else
+ UUID_CFLAGS=$pkg_cv_UUID_CFLAGS
+ UUID_LIBS=$pkg_cv_UUID_LIBS
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+fi
+
+ if { { $as_echo "$as_me:${as_lineno-$LINENO}: pkg-config --exists --print-errors \"uuid\""; } >&5
+ (pkg-config --exists --print-errors "uuid") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ UUID_LIBS_STATIC=`pkg-config --libs --static "uuid"`
+
+ else
+ as_fn_error $? "pkg-config description of uuid, needed for static build, is not available" "$LINENO" 5
+ fi
+
+
+
+pkg_failed=no
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ZLIB" >&5
+$as_echo_n "checking for ZLIB... " >&6; }
+
+if test -n "$ZLIB_CFLAGS"; then
+ pkg_cv_ZLIB_CFLAGS="$ZLIB_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"zlib\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "zlib") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_ZLIB_CFLAGS=`$PKG_CONFIG --cflags "zlib" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+if test -n "$ZLIB_LIBS"; then
+ pkg_cv_ZLIB_LIBS="$ZLIB_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"zlib\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "zlib") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_ZLIB_LIBS=`$PKG_CONFIG --libs "zlib" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+ _pkg_short_errors_supported=yes
+else
+ _pkg_short_errors_supported=no
+fi
+ if test $_pkg_short_errors_supported = yes; then
+ ZLIB_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "zlib" 2>&1`
+ else
+ ZLIB_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "zlib" 2>&1`
+ fi
+ # Put the nasty error message in config.log where it belongs
+ echo "$ZLIB_PKG_ERRORS" >&5
+
+ as_fn_error $? "Package requirements (zlib) were not met:
+
+$ZLIB_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+Alternatively, you may set the environment variables ZLIB_CFLAGS
+and ZLIB_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details." "$LINENO" 5
+elif test $pkg_failed = untried; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+Alternatively, you may set the environment variables ZLIB_CFLAGS
+and ZLIB_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.
+
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.
+See \`config.log' for more details" "$LINENO" 5; }
+else
+ ZLIB_CFLAGS=$pkg_cv_ZLIB_CFLAGS
+ ZLIB_LIBS=$pkg_cv_ZLIB_LIBS
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+fi
+
+ if { { $as_echo "$as_me:${as_lineno-$LINENO}: pkg-config --exists --print-errors \"zlib\""; } >&5
+ (pkg-config --exists --print-errors "zlib") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ ZLIB_LIBS_STATIC=`pkg-config --libs --static "zlib"`
+
+ else
+ as_fn_error $? "pkg-config description of zlib, needed for static build, is not available" "$LINENO" 5
+ fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for lzo_version in -llzo2" >&5
+$as_echo_n "checking for lzo_version in -llzo2... " >&6; }
+if ${ac_cv_lib_lzo2_lzo_version+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-llzo2 $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char lzo_version ();
+int
+main ()
+{
+return lzo_version ();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_lib_lzo2_lzo_version=yes
+else
+ ac_cv_lib_lzo2_lzo_version=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_lzo2_lzo_version" >&5
+$as_echo "$ac_cv_lib_lzo2_lzo_version" >&6; }
+if test "x$ac_cv_lib_lzo2_lzo_version" = xyes; then :
+
+ LZO2_LIBS="-llzo2"
+ LZO2_CFLAGS=""
+ LZO2_LIBS_STATIC="-llzo2"
+else
+
+ as_fn_error $? "cannot find lzo2 library" "$LINENO" 5
+
+fi
+
+
+
+
+
+
+
+
+
+
+ac_config_headers="$ac_config_headers config.h"
+
+
+ac_config_files="$ac_config_files Makefile Documentation/Makefile version.h"
+
+
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems. If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, we kill variables containing newlines.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(
+ for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do
+ eval ac_val=\$$ac_var
+ case $ac_val in #(
+ *${as_nl}*)
+ case $ac_var in #(
+ *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+ esac
+ case $ac_var in #(
+ _ | IFS | as_nl) ;; #(
+ BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+ *) { eval $ac_var=; unset $ac_var;} ;;
+ esac ;;
+ esac
+ done
+
+ (set) 2>&1 |
+ case $as_nl`(ac_space=' '; set) 2>&1` in #(
+ *${as_nl}ac_space=\ *)
+ # `set' does not quote correctly, so add quotes: double-quote
+ # substitution turns \\\\ into \\, and sed turns \\ into \.
+ sed -n \
+ "s/'/'\\\\''/g;
+ s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+ ;; #(
+ *)
+ # `set' quotes correctly as required by POSIX, so do not add quotes.
+ sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+ ;;
+ esac |
+ sort
+) |
+ sed '
+ /^ac_cv_env_/b end
+ t clear
+ :clear
+ s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+ t end
+ s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+ :end' >>confcache
+if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
+ if test -w "$cache_file"; then
+ if test "x$cache_file" != "x/dev/null"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5
+$as_echo "$as_me: updating cache $cache_file" >&6;}
+ if test ! -f "$cache_file" || test -h "$cache_file"; then
+ cat confcache >"$cache_file"
+ else
+ case $cache_file in #(
+ */* | ?:*)
+ mv -f confcache "$cache_file"$$ &&
+ mv -f "$cache_file"$$ "$cache_file" ;; #(
+ *)
+ mv -f confcache "$cache_file" ;;
+ esac
+ fi
+ fi
+ else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5
+$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;}
+ fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+DEFS=-DHAVE_CONFIG_H
+
+ac_libobjs=
+ac_ltlibobjs=
+U=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+ # 1. Remove the extension, and $U if already installed.
+ ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
+ ac_i=`$as_echo "$ac_i" | sed "$ac_script"`
+ # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR
+ # will be set to the directory where LIBOBJS objects are built.
+ as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+ as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+
+
+: "${CONFIG_STATUS=./config.status}"
+ac_write_fail=0
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5
+$as_echo "$as_me: creating $CONFIG_STATUS" >&6;}
+as_write_fail=0
+cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+
+SHELL=\${CONFIG_SHELL-$SHELL}
+export SHELL
+_ASEOF
+cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+ emulate sh
+ NULLCMD=:
+ # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+ setopt NO_GLOB_SUBST
+else
+ case `(set -o) 2>/dev/null` in #(
+ *posix*) :
+ set -o posix ;; #(
+ *) :
+ ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+ as_echo='print -r --'
+ as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+ as_echo='printf %s\n'
+ as_echo_n='printf %s'
+else
+ if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+ as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+ as_echo_n='/usr/ucb/echo -n'
+ else
+ as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+ as_echo_n_body='eval
+ arg=$1;
+ case $arg in #(
+ *"$as_nl"*)
+ expr "X$arg" : "X\\(.*\\)$as_nl";
+ arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+ esac;
+ expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+ '
+ export as_echo_n_body
+ as_echo_n='sh -c $as_echo_n_body as_echo'
+ fi
+ export as_echo_body
+ as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ PATH_SEPARATOR=:
+ (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+ (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+ PATH_SEPARATOR=';'
+ }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order. Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" "" $as_nl"
+
+# Find who we are. Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+ *[\\/]* ) as_myself=$0 ;;
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+ done
+IFS=$as_save_IFS
+
+ ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+ as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+ $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+ exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there. '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+ as_status=$1; test $as_status -eq 0 && as_status=1
+ if test "$4"; then
+ as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+ fi
+ $as_echo "$as_me: error: $2" >&2
+ as_fn_exit $as_status
+} # as_fn_error
+
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+ return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+ set +e
+ as_fn_set_status $1
+ exit $1
+} # as_fn_exit
+
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+ { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+ eval 'as_fn_append ()
+ {
+ eval $1+=\$2
+ }'
+else
+ as_fn_append ()
+ {
+ eval $1=\$$1\$2
+ }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+ eval 'as_fn_arith ()
+ {
+ as_val=$(( $* ))
+ }'
+else
+ as_fn_arith ()
+ {
+ as_val=`expr "$@" || test $? -eq 1`
+ }
+fi # as_fn_arith
+
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+ test "X`expr 00001 : '.*\(...\)'`" = X001; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+ as_basename=basename
+else
+ as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+ as_dirname=dirname
+else
+ as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+ case `echo 'xy\c'` in
+ *c*) ECHO_T=' ';; # ECHO_T is single tab character.
+ xy) ECHO_C='\c';;
+ *) echo `echo ksh88 bug on AIX 6.1` > /dev/null
+ ECHO_T=' ';;
+ esac;;
+*)
+ ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+ rm -f conf$$.dir/conf$$.file
+else
+ rm -f conf$$.dir
+ mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+ if ln -s conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s='ln -s'
+ # ... but there are two gotchas:
+ # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+ # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+ # In both cases, we have to default to `cp -pR'.
+ ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+ as_ln_s='cp -pR'
+ elif ln conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s=ln
+ else
+ as_ln_s='cp -pR'
+ fi
+else
+ as_ln_s='cp -pR'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+ case $as_dir in #(
+ -*) as_dir=./$as_dir;;
+ esac
+ test -d "$as_dir" || eval $as_mkdir_p || {
+ as_dirs=
+ while :; do
+ case $as_dir in #(
+ *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+ *) as_qdir=$as_dir;;
+ esac
+ as_dirs="'$as_qdir' $as_dirs"
+ as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_dir" : 'X\(//\)[^/]' \| \
+ X"$as_dir" : 'X\(//\)$' \| \
+ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ test -d "$as_dir" && break
+ done
+ test -z "$as_dirs" || eval "mkdir $as_dirs"
+ } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+if mkdir -p . 2>/dev/null; then
+ as_mkdir_p='mkdir -p "$as_dir"'
+else
+ test -d ./-p && rmdir ./-p
+ as_mkdir_p=false
+fi
+
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+ test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+exec 6>&1
+## ----------------------------------- ##
+## Main body of $CONFIG_STATUS script. ##
+## ----------------------------------- ##
+_ASEOF
+test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# Save the log message, to keep $0 and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.
+ac_log="
+This file was extended by btrfs-progs $as_me v4.4.1, which was
+generated by GNU Autoconf 2.69. Invocation command line was
+
+ CONFIG_FILES = $CONFIG_FILES
+ CONFIG_HEADERS = $CONFIG_HEADERS
+ CONFIG_LINKS = $CONFIG_LINKS
+ CONFIG_COMMANDS = $CONFIG_COMMANDS
+ $ $0 $@
+
+on `(hostname || uname -n) 2>/dev/null | sed 1q`
+"
+
+_ACEOF
+
+case $ac_config_files in *"
+"*) set x $ac_config_files; shift; ac_config_files=$*;;
+esac
+
+case $ac_config_headers in *"
+"*) set x $ac_config_headers; shift; ac_config_headers=$*;;
+esac
+
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+# Files that config.status was made for.
+config_files="$ac_config_files"
+config_headers="$ac_config_headers"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+ac_cs_usage="\
+\`$as_me' instantiates files and other configuration actions
+from templates according to the current configuration. Unless the files
+and actions are specified as TAGs, all are instantiated by default.
+
+Usage: $0 [OPTION]... [TAG]...
+
+ -h, --help print this help, then exit
+ -V, --version print version number and configuration settings, then exit
+ --config print configuration, then exit
+ -q, --quiet, --silent
+ do not print progress messages
+ -d, --debug don't remove temporary files
+ --recheck update $as_me by reconfiguring in the same conditions
+ --file=FILE[:TEMPLATE]
+ instantiate the configuration file FILE
+ --header=FILE[:TEMPLATE]
+ instantiate the configuration header FILE
+
+Configuration files:
+$config_files
+
+Configuration headers:
+$config_headers
+
+Report bugs to <linux-btrfs@vger.kernel.org>.
+btrfs-progs home page: <http://btrfs.wiki.kernel.org>."
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
+ac_cs_version="\\
+btrfs-progs config.status v4.4.1
+configured by $0, generated by GNU Autoconf 2.69,
+ with options \\"\$ac_cs_config\\"
+
+Copyright (C) 2012 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+
+ac_pwd='$ac_pwd'
+srcdir='$srcdir'
+INSTALL='$INSTALL'
+test -n "\$AWK" || AWK=awk
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# The default lists apply if the user does not specify any file.
+ac_need_defaults=:
+while test $# != 0
+do
+ case $1 in
+ --*=?*)
+ ac_option=`expr "X$1" : 'X\([^=]*\)='`
+ ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
+ ac_shift=:
+ ;;
+ --*=)
+ ac_option=`expr "X$1" : 'X\([^=]*\)='`
+ ac_optarg=
+ ac_shift=:
+ ;;
+ *)
+ ac_option=$1
+ ac_optarg=$2
+ ac_shift=shift
+ ;;
+ esac
+
+ case $ac_option in
+ # Handling of the options.
+ -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+ ac_cs_recheck=: ;;
+ --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
+ $as_echo "$ac_cs_version"; exit ;;
+ --config | --confi | --conf | --con | --co | --c )
+ $as_echo "$ac_cs_config"; exit ;;
+ --debug | --debu | --deb | --de | --d | -d )
+ debug=: ;;
+ --file | --fil | --fi | --f )
+ $ac_shift
+ case $ac_optarg in
+ *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+ '') as_fn_error $? "missing file argument" ;;
+ esac
+ as_fn_append CONFIG_FILES " '$ac_optarg'"
+ ac_need_defaults=false;;
+ --header | --heade | --head | --hea )
+ $ac_shift
+ case $ac_optarg in
+ *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+ esac
+ as_fn_append CONFIG_HEADERS " '$ac_optarg'"
+ ac_need_defaults=false;;
+ --he | --h)
+ # Conflict between --help and --header
+ as_fn_error $? "ambiguous option: \`$1'
+Try \`$0 --help' for more information.";;
+ --help | --hel | -h )
+ $as_echo "$ac_cs_usage"; exit ;;
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil | --si | --s)
+ ac_cs_silent=: ;;
+
+ # This is an error.
+ -*) as_fn_error $? "unrecognized option: \`$1'
+Try \`$0 --help' for more information." ;;
+
+ *) as_fn_append ac_config_targets " $1"
+ ac_need_defaults=false ;;
+
+ esac
+ shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+ exec 6>/dev/null
+ ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+if \$ac_cs_recheck; then
+ set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+ shift
+ \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6
+ CONFIG_SHELL='$SHELL'
+ export CONFIG_SHELL
+ exec "\$@"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+exec 5>>config.log
+{
+ echo
+ sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+ $as_echo "$ac_log"
+} >&5
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+
+# Handling of arguments.
+for ac_config_target in $ac_config_targets
+do
+ case $ac_config_target in
+ "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
+ "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+ "Documentation/Makefile") CONFIG_FILES="$CONFIG_FILES Documentation/Makefile" ;;
+ "version.h") CONFIG_FILES="$CONFIG_FILES version.h" ;;
+
+ *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
+ esac
+done
+
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used. Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+ test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+ test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
+fi
+
+# Have a temporary directory for convenience. Make it in the build tree
+# simply because there is no reason against having it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Hook for its removal unless debugging.
+# Note that there is a small window in which the directory will not be cleaned:
+# after its creation but before its name has been assigned to `$tmp'.
+$debug ||
+{
+ tmp= ac_tmp=
+ trap 'exit_status=$?
+ : "${ac_tmp:=$tmp}"
+ { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status
+' 0
+ trap 'as_fn_exit 1' 1 2 13 15
+}
+# Create a (secure) tmp directory for tmp files.
+
+{
+ tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
+ test -d "$tmp"
+} ||
+{
+ tmp=./conf$$-$RANDOM
+ (umask 077 && mkdir "$tmp")
+} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5
+ac_tmp=$tmp
+
+# Set up the scripts for CONFIG_FILES section.
+# No need to generate them if there are no CONFIG_FILES.
+# This happens for instance with `./config.status config.h'.
+if test -n "$CONFIG_FILES"; then
+
+
+ac_cr=`echo X | tr X '\015'`
+# On cygwin, bash can eat \r inside `` if the user requested igncr.
+# But we know of no other shell where ac_cr would be empty at this
+# point, so we can use a bashism as a fallback.
+if test "x$ac_cr" = x; then
+ eval ac_cr=\$\'\\r\'
+fi
+ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
+if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
+ ac_cs_awk_cr='\\r'
+else
+ ac_cs_awk_cr=$ac_cr
+fi
+
+echo 'BEGIN {' >"$ac_tmp/subs1.awk" &&
+_ACEOF
+
+
+{
+ echo "cat >conf$$subs.awk <<_ACEOF" &&
+ echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
+ echo "_ACEOF"
+} >conf$$subs.sh ||
+ as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'`
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+ . ./conf$$subs.sh ||
+ as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+
+ ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
+ if test $ac_delim_n = $ac_delim_num; then
+ break
+ elif $ac_last_try; then
+ as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ else
+ ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+ fi
+done
+rm -f conf$$subs.sh
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK &&
+_ACEOF
+sed -n '
+h
+s/^/S["/; s/!.*/"]=/
+p
+g
+s/^[^!]*!//
+:repl
+t repl
+s/'"$ac_delim"'$//
+t delim
+:nl
+h
+s/\(.\{148\}\)..*/\1/
+t more1
+s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/
+p
+n
+b repl
+:more1
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t nl
+:delim
+h
+s/\(.\{148\}\)..*/\1/
+t more2
+s/["\\]/\\&/g; s/^/"/; s/$/"/
+p
+b
+:more2
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t delim
+' <conf$$subs.awk | sed '
+/^[^""]/{
+ N
+ s/\n//
+}
+' >>$CONFIG_STATUS || ac_write_fail=1
+rm -f conf$$subs.awk
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+_ACAWK
+cat >>"\$ac_tmp/subs1.awk" <<_ACAWK &&
+ for (key in S) S_is_set[key] = 1
+ FS = ""
+
+}
+{
+ line = $ 0
+ nfields = split(line, field, "@")
+ substed = 0
+ len = length(field[1])
+ for (i = 2; i < nfields; i++) {
+ key = field[i]
+ keylen = length(key)
+ if (S_is_set[key]) {
+ value = S[key]
+ line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3)
+ len += length(value) + length(field[++i])
+ substed = 1
+ } else
+ len += 1 + keylen
+ }
+
+ print line
+}
+
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then
+ sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
+else
+ cat
+fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \
+ || as_fn_error $? "could not setup config files machinery" "$LINENO" 5
+_ACEOF
+
+# VPATH may cause trouble with some makes, so we remove sole $(srcdir),
+# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+ ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{
+h
+s///
+s/^/:/
+s/[ ]*$/:/
+s/:\$(srcdir):/:/g
+s/:\${srcdir}:/:/g
+s/:@srcdir@:/:/g
+s/^:*//
+s/:*$//
+x
+s/\(=[ ]*\).*/\1/
+G
+s/\n//
+s/^[^=]*=[ ]*$//
+}'
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+fi # test -n "$CONFIG_FILES"
+
+# Set up the scripts for CONFIG_HEADERS section.
+# No need to generate them if there are no CONFIG_HEADERS.
+# This happens for instance with `./config.status Makefile'.
+if test -n "$CONFIG_HEADERS"; then
+cat >"$ac_tmp/defines.awk" <<\_ACAWK ||
+BEGIN {
+_ACEOF
+
+# Transform confdefs.h into an awk script `defines.awk', embedded as
+# here-document in config.status, that substitutes the proper values into
+# config.h.in to produce config.h.
+
+# Create a delimiter string that does not exist in confdefs.h, to ease
+# handling of long lines.
+ac_delim='%!_!# '
+for ac_last_try in false false :; do
+ ac_tt=`sed -n "/$ac_delim/p" confdefs.h`
+ if test -z "$ac_tt"; then
+ break
+ elif $ac_last_try; then
+ as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5
+ else
+ ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+ fi
+done
+
+# For the awk script, D is an array of macro values keyed by name,
+# likewise P contains macro parameters if any. Preserve backslash
+# newline sequences.
+
+ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
+sed -n '
+s/.\{148\}/&'"$ac_delim"'/g
+t rset
+:rset
+s/^[ ]*#[ ]*define[ ][ ]*/ /
+t def
+d
+:def
+s/\\$//
+t bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\
+D["\1"]=" \3"/p
+s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p
+d
+:bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\
+D["\1"]=" \3\\\\\\n"\\/p
+t cont
+s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p
+t cont
+d
+:cont
+n
+s/.\{148\}/&'"$ac_delim"'/g
+t clear
+:clear
+s/\\$//
+t bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/"/p
+d
+:bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p
+b cont
+' <confdefs.h | sed '
+s/'"$ac_delim"'/"\\\
+"/g' >>$CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ for (key in D) D_is_set[key] = 1
+ FS = ""
+}
+/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ {
+ line = \$ 0
+ split(line, arg, " ")
+ if (arg[1] == "#") {
+ defundef = arg[2]
+ mac1 = arg[3]
+ } else {
+ defundef = substr(arg[1], 2)
+ mac1 = arg[2]
+ }
+ split(mac1, mac2, "(") #)
+ macro = mac2[1]
+ prefix = substr(line, 1, index(line, defundef) - 1)
+ if (D_is_set[macro]) {
+ # Preserve the white space surrounding the "#".
+ print prefix "define", macro P[macro] D[macro]
+ next
+ } else {
+ # Replace #undef with comments. This is necessary, for example,
+ # in the case of _POSIX_SOURCE, which is predefined and required
+ # on some systems where configure will not decide to define it.
+ if (defundef == "undef") {
+ print "/*", prefix defundef, macro, "*/"
+ next
+ }
+ }
+}
+{ print }
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+ as_fn_error $? "could not setup config headers machinery" "$LINENO" 5
+fi # test -n "$CONFIG_HEADERS"
+
+
+eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS "
+shift
+for ac_tag
+do
+ case $ac_tag in
+ :[FHLC]) ac_mode=$ac_tag; continue;;
+ esac
+ case $ac_mode$ac_tag in
+ :[FHL]*:*);;
+ :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;;
+ :[FH]-) ac_tag=-:-;;
+ :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
+ esac
+ ac_save_IFS=$IFS
+ IFS=:
+ set x $ac_tag
+ IFS=$ac_save_IFS
+ shift
+ ac_file=$1
+ shift
+
+ case $ac_mode in
+ :L) ac_source=$1;;
+ :[FH])
+ ac_file_inputs=
+ for ac_f
+ do
+ case $ac_f in
+ -) ac_f="$ac_tmp/stdin";;
+ *) # Look for the file first in the build tree, then in the source tree
+ # (if the path is not absolute). The absolute path cannot be DOS-style,
+ # because $ac_f cannot contain `:'.
+ test -f "$ac_f" ||
+ case $ac_f in
+ [\\/$]*) false;;
+ *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
+ esac ||
+ as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;;
+ esac
+ case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
+ as_fn_append ac_file_inputs " '$ac_f'"
+ done
+
+ # Let's still pretend it is `configure' which instantiates (i.e., don't
+ # use $as_me), people would be surprised to read:
+ # /* config.h. Generated by config.status. */
+ configure_input='Generated from '`
+ $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g'
+ `' by configure.'
+ if test x"$ac_file" != x-; then
+ configure_input="$ac_file. $configure_input"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5
+$as_echo "$as_me: creating $ac_file" >&6;}
+ fi
+ # Neutralize special characters interpreted by sed in replacement strings.
+ case $configure_input in #(
+ *\&* | *\|* | *\\* )
+ ac_sed_conf_input=`$as_echo "$configure_input" |
+ sed 's/[\\\\&|]/\\\\&/g'`;; #(
+ *) ac_sed_conf_input=$configure_input;;
+ esac
+
+ case $ac_tag in
+ *:-:* | *:-) cat >"$ac_tmp/stdin" \
+ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;;
+ esac
+ ;;
+ esac
+
+ ac_dir=`$as_dirname -- "$ac_file" ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$ac_file" : 'X\(//\)[^/]' \| \
+ X"$ac_file" : 'X\(//\)$' \| \
+ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$ac_file" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ as_dir="$ac_dir"; as_fn_mkdir_p
+ ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+ ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+ # A ".." for each directory in $ac_dir_suffix.
+ ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+ case $ac_top_builddir_sub in
+ "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+ *) ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+ esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+ .) # We are building in place.
+ ac_srcdir=.
+ ac_top_srcdir=$ac_top_builddir_sub
+ ac_abs_top_srcdir=$ac_pwd ;;
+ [\\/]* | ?:[\\/]* ) # Absolute name.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir
+ ac_abs_top_srcdir=$srcdir ;;
+ *) # Relative name.
+ ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_build_prefix$srcdir
+ ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+ case $ac_mode in
+ :F)
+ #
+ # CONFIG_FILE
+ #
+
+ case $INSTALL in
+ [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
+ *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;;
+ esac
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# If the template does not know about datarootdir, expand it.
+# FIXME: This hack should be removed a few years after 2.60.
+ac_datarootdir_hack=; ac_datarootdir_seen=
+ac_sed_dataroot='
+/datarootdir/ {
+ p
+ q
+}
+/@datadir@/p
+/@docdir@/p
+/@infodir@/p
+/@localedir@/p
+/@mandir@/p'
+case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in
+*datarootdir*) ac_datarootdir_seen=yes;;
+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ ac_datarootdir_hack='
+ s&@datadir@&$datadir&g
+ s&@docdir@&$docdir&g
+ s&@infodir@&$infodir&g
+ s&@localedir@&$localedir&g
+ s&@mandir@&$mandir&g
+ s&\\\${datarootdir}&$datarootdir&g' ;;
+esac
+_ACEOF
+
+# Neutralize VPATH when `$srcdir' = `.'.
+# Shell code in configure.ac might set extrasub.
+# FIXME: do we really want to maintain this feature?
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_sed_extra="$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s|@configure_input@|$ac_sed_conf_input|;t t
+s&@top_builddir@&$ac_top_builddir_sub&;t t
+s&@top_build_prefix@&$ac_top_build_prefix&;t t
+s&@srcdir@&$ac_srcdir&;t t
+s&@abs_srcdir@&$ac_abs_srcdir&;t t
+s&@top_srcdir@&$ac_top_srcdir&;t t
+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
+s&@builddir@&$ac_builddir&;t t
+s&@abs_builddir@&$ac_abs_builddir&;t t
+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+s&@INSTALL@&$ac_INSTALL&;t t
+$ac_datarootdir_hack
+"
+eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \
+ >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+
+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
+ { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } &&
+ { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \
+ "$ac_tmp/out"`; test -z "$ac_out"; } &&
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined. Please make sure it is defined" >&5
+$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined. Please make sure it is defined" >&2;}
+
+ rm -f "$ac_tmp/stdin"
+ case $ac_file in
+ -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";;
+ *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";;
+ esac \
+ || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ ;;
+ :H)
+ #
+ # CONFIG_HEADER
+ #
+ if test x"$ac_file" != x-; then
+ {
+ $as_echo "/* $configure_input */" \
+ && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs"
+ } >"$ac_tmp/config.h" \
+ || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
+$as_echo "$as_me: $ac_file is unchanged" >&6;}
+ else
+ rm -f "$ac_file"
+ mv "$ac_tmp/config.h" "$ac_file" \
+ || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ fi
+ else
+ $as_echo "/* $configure_input */" \
+ && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \
+ || as_fn_error $? "could not create -" "$LINENO" 5
+ fi
+ ;;
+
+
+ esac
+
+done # for ac_tag
+
+
+as_fn_exit 0
+_ACEOF
+ac_clean_files=$ac_clean_files_save
+
+test $ac_write_fail = 0 ||
+ as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded. So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status. When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+ ac_cs_success=:
+ ac_config_status_args=
+ test "$silent" = yes &&
+ ac_config_status_args="$ac_config_status_args --quiet"
+ exec 5>/dev/null
+ $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+ exec 5>>config.log
+ # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+ # would make configure fail if this is the last instruction.
+ $ac_cs_success || as_fn_exit 1
+fi
+if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
+$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result:
+ ${PACKAGE_NAME} ${PACKAGE_VERSION}
+
+ prefix: ${prefix}
+ exec prefix: ${exec_prefix}
+
+ bindir: ${bindir}
+ libdir: ${libdir}
+ includedir: ${includedir}
+
+ compiler: ${CC}
+ cflags: ${CFLAGS}
+ ldflags: ${LDFLAGS}
+
+ documentation: ${enable_documentation}
+ backtrace support: ${enable_backtrace}
+ btrfs-convert: ${enable_convert}
+
+ Type 'make' to compile.
+" >&5
+$as_echo "
+ ${PACKAGE_NAME} ${PACKAGE_VERSION}
+
+ prefix: ${prefix}
+ exec prefix: ${exec_prefix}
+
+ bindir: ${bindir}
+ libdir: ${libdir}
+ includedir: ${includedir}
+
+ compiler: ${CC}
+ cflags: ${CFLAGS}
+ ldflags: ${LDFLAGS}
+
+ documentation: ${enable_documentation}
+ backtrace support: ${enable_backtrace}
+ btrfs-convert: ${enable_convert}
+
+ Type 'make' to compile.
+" >&6; }
+
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 00000000..fc343ea1
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,174 @@
+AC_INIT([btrfs-progs],
+ m4_esyscmd([./version.sh --configure]),
+ [linux-btrfs@vger.kernel.org],,
+ [http://btrfs.wiki.kernel.org])
+
+if test "x$PACKAGE_URL" = "x"; then
+ AC_DEFINE([PACKAGE_URL], ["http://btrfs.wiki.kernel.org"], [URL])
+fi
+
+dnl library version
+LIBBTRFS_MAJOR=0
+LIBBTRFS_MINOR=1
+LIBBTRFS_PATCHLEVEL=1
+
+CFLAGS=${CFLAGS:-"-g -O1 -Wall -D_FORTIFY_SOURCE=2"}
+AC_SUBST([CFLAGS])
+
+AC_PREREQ([2.60])
+
+AC_CONFIG_AUX_DIR([config])
+dnl AC_USE_SYSTEM_EXTENSIONS must be called before any macros that run
+dnl the compiler (like AC_PROG_LIBTOOL) to avoid autoconf errors.
+AC_USE_SYSTEM_EXTENSIONS
+
+AC_CONFIG_SRCDIR([btrfs.c])
+AC_PREFIX_DEFAULT([/usr/local])
+
+AC_PROG_CC
+AC_CANONICAL_HOST
+AC_C_CONST
+AC_C_VOLATILE
+AC_C_BIGENDIAN
+
+AC_SYS_LARGEFILE
+
+AC_PROG_INSTALL
+AC_PROG_LN_S
+AC_CHECK_TOOL([AR], [ar])
+AC_PATH_PROG([RM], [rm], [rm])
+AC_PATH_PROG([RMDIR], [rmdir], [rmdir])
+
+AC_CHECK_FUNCS([openat], [],
+ [AC_MSG_ERROR([cannot find openat() function])])
+
+m4_ifndef([PKG_PROG_PKG_CONFIG],
+ [m4_fatal([Could not locate the pkg-config autoconf
+ macros. These are usually located in /usr/share/aclocal/pkg.m4.
+ If your macros are in a different location, try setting the
+ environment variable AL_OPTS="-I/other/macro/dir" before running
+ ./autogen.sh or autoreconf again.])])
+PKG_PROG_PKG_CONFIG
+
+
+dnl
+dnl Calls pkg-config --static
+dnl
+AC_DEFUN([PKG_STATIC], [
+ if AC_RUN_LOG([pkg-config --exists --print-errors "$2"]); then
+ $1=`pkg-config --libs --static "$2"`
+ AC_SUBST([$1])
+ else
+ AC_MSG_ERROR([pkg-config description of $2, needed for static build, is not available])
+ fi
+])
+
+
+AC_ARG_ENABLE([backtrace],
+ AS_HELP_STRING([--disable-backtrace], [disable btrfs backtrace]),
+ [], [enable_backtrace=yes]
+)
+
+AS_IF([test "x$enable_backtrace" = xno], [
+ AC_DEFINE([BTRFS_DISABLE_BACKTRACE], [1], [disable backtrace stuff in kerncompat.h ])
+])
+
+if test "x$enable_backtrace" = xyes; then
+ AC_CHECK_HEADERS([execinfo.h])
+ AC_CHECK_FUNCS([backtrace backtrace_symbols_fd], [],
+ AC_MSG_ERROR([standard library does not have backtrace support]))
+fi
+
+
+AC_ARG_ENABLE([documentation],
+ AS_HELP_STRING([--disable-documentation], [do not build domumentation]),
+ [], [enable_documentation=yes]
+)
+AS_IF([test "x$enable_documentation" = xyes], [DISABLE_DOCUMENTATION=0], [DISABLE_DOCUMENTATION=1])
+AC_SUBST([DISABLE_DOCUMENTATION])
+
+dnl detect tools to build documentation
+if test "x$enable_documentation" = xyes; then
+ AC_PATH_PROG([ASCIIDOC], [asciidoc], [asciidoc])
+ AC_PATH_PROG([XMLTO], [xmlto], [xmlto])
+ AC_PATH_PROG([GZIP], [gzip], [gzip])
+ AC_PATH_PROG([MV], [mv], [mv])
+ AC_PROG_SED
+fi
+
+AC_ARG_ENABLE([convert],
+ AS_HELP_STRING([--disable-convert], [do not build btrfs-convert]),
+ [], [enable_convert=yes]
+)
+
+AS_IF([test "x$enable_convert" = xyes], [DISABLE_BTRFSCONVERT=0], [DISABLE_BTRFSCONVERT=1])
+AC_SUBST([DISABLE_BTRFSCONVERT])
+
+if test "x$enable_convert" = xyes; then
+ PKG_CHECK_MODULES(EXT2FS, [ext2fs])
+ PKG_CHECK_MODULES(COM_ERR, [com_err])
+fi
+
+
+dnl Define <NAME>_LIBS= and <NAME>_CFLAGS= by pkg-config
+dnl
+dnl The default PKG_CHECK_MODULES() action-if-not-found is end the
+dnl execution with error. The static libs are optional.
+
+PKG_CHECK_MODULES(BLKID, [blkid])
+PKG_STATIC(BLKID_LIBS_STATIC, [blkid])
+
+PKG_CHECK_MODULES(UUID, [uuid])
+PKG_STATIC(UUID_LIBS_STATIC, [uuid])
+
+PKG_CHECK_MODULES(ZLIB, [zlib])
+PKG_STATIC(ZLIB_LIBS_STATIC, [zlib])
+
+dnl lzo library does not provide pkg-config, let use classic way
+AC_CHECK_LIB([lzo2], [lzo_version], [
+ LZO2_LIBS="-llzo2"
+ LZO2_CFLAGS=""
+ LZO2_LIBS_STATIC="-llzo2"],[
+ AC_MSG_ERROR([cannot find lzo2 library])
+])
+AC_SUBST([LZO2_LIBS])
+AC_SUBST([LZO2_LIBS_STATIC])
+AC_SUBST([LZO2_CFLAGS])
+
+
+dnl library stuff
+AC_SUBST([LIBBTRFS_MAJOR])
+AC_SUBST([LIBBTRFS_MINOR])
+AC_SUBST([LIBBTRFS_PATCHLEVEL])
+
+AC_CONFIG_HEADERS([config.h])
+
+AC_CONFIG_FILES([
+Makefile
+Documentation/Makefile
+version.h
+])
+
+AC_OUTPUT
+
+AC_MSG_RESULT([
+ ${PACKAGE_NAME} ${PACKAGE_VERSION}
+
+ prefix: ${prefix}
+ exec prefix: ${exec_prefix}
+
+ bindir: ${bindir}
+ libdir: ${libdir}
+ includedir: ${includedir}
+
+ compiler: ${CC}
+ cflags: ${CFLAGS}
+ ldflags: ${LDFLAGS}
+
+ documentation: ${enable_documentation}
+ backtrace support: ${enable_backtrace}
+ btrfs-convert: ${enable_convert}
+
+ Type 'make' to compile.
+])
+
diff --git a/crc32c.c b/crc32c.c
new file mode 100644
index 00000000..dfa4e6c1
--- /dev/null
+++ b/crc32c.c
@@ -0,0 +1,222 @@
+/*
+ * Copied from the kernel source code, lib/libcrc32c.c.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#include "kerncompat.h"
+#include "crc32c.h"
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+u32 __crc32c_le(u32 crc, unsigned char const *data, size_t length);
+static u32 (*crc_function)(u32 crc, unsigned char const *data, size_t length) = __crc32c_le;
+
+#ifdef __x86_64__
+
+/*
+ * Based on a posting to lkml by Austin Zhang <austin.zhang@intel.com>
+ *
+ * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
+ * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
+ * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
+ * http://www.intel.com/products/processor/manuals/
+ * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+ * Volume 2A: Instruction Set Reference, A-M
+ */
+#if __SIZEOF_LONG__ == 8
+#define REX_PRE "0x48, "
+#define SCALE_F 8
+#else
+#define REX_PRE
+#define SCALE_F 4
+#endif
+
+static int crc32c_probed = 0;
+static int crc32c_intel_available = 0;
+
+static uint32_t crc32c_intel_le_hw_byte(uint32_t crc, unsigned char const *data,
+ unsigned long length)
+{
+ while (length--) {
+ __asm__ __volatile__(
+ ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
+ :"=S"(crc)
+ :"0"(crc), "c"(*data)
+ );
+ data++;
+ }
+
+ return crc;
+}
+
+/*
+ * Steps through buffer one byte at at time, calculates reflected
+ * crc using table.
+ */
+static uint32_t crc32c_intel(u32 crc, unsigned char const *data, unsigned long length)
+{
+ unsigned int iquotient = length / SCALE_F;
+ unsigned int iremainder = length % SCALE_F;
+ unsigned long *ptmp = (unsigned long *)data;
+
+ while (iquotient--) {
+ __asm__ __volatile__(
+ ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
+ :"=S"(crc)
+ :"0"(crc), "c"(*ptmp)
+ );
+ ptmp++;
+ }
+
+ if (iremainder)
+ crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
+ iremainder);
+
+ return crc;
+}
+
+static void do_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx,
+ unsigned int *edx)
+{
+ int id = *eax;
+
+ asm("movl %4, %%eax;"
+ "cpuid;"
+ "movl %%eax, %0;"
+ "movl %%ebx, %1;"
+ "movl %%ecx, %2;"
+ "movl %%edx, %3;"
+ : "=r" (*eax), "=r" (*ebx), "=r" (*ecx), "=r" (*edx)
+ : "r" (id)
+ : "eax", "ebx", "ecx", "edx");
+}
+
+static void crc32c_intel_probe(void)
+{
+ if (!crc32c_probed) {
+ unsigned int eax, ebx, ecx, edx;
+
+ eax = 1;
+
+ do_cpuid(&eax, &ebx, &ecx, &edx);
+ crc32c_intel_available = (ecx & (1 << 20)) != 0;
+ crc32c_probed = 1;
+ }
+}
+
+void crc32c_optimization_init(void)
+{
+ crc32c_intel_probe();
+ if (crc32c_intel_available)
+ crc_function = crc32c_intel;
+}
+#else
+
+void crc32c_optimization_init(void)
+{
+}
+
+#endif /* __x86_64__ */
+
+/*
+ * This is the CRC-32C table
+ * Generated with:
+ * width = 32 bits
+ * poly = 0x1EDC6F41
+ * reflect input bytes = true
+ * reflect output bytes = true
+ */
+
+static const u32 crc32c_table[256] = {
+ 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L,
+ 0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL,
+ 0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL,
+ 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L,
+ 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL,
+ 0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L,
+ 0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L,
+ 0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL,
+ 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL,
+ 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L,
+ 0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L,
+ 0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL,
+ 0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L,
+ 0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL,
+ 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL,
+ 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L,
+ 0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L,
+ 0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L,
+ 0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L,
+ 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L,
+ 0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L,
+ 0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L,
+ 0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L,
+ 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L,
+ 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L,
+ 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L,
+ 0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L,
+ 0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L,
+ 0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L,
+ 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L,
+ 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L,
+ 0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L,
+ 0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL,
+ 0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L,
+ 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,
+ 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL,
+ 0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L,
+ 0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL,
+ 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL,
+ 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L,
+ 0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L,
+ 0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL,
+ 0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL,
+ 0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L,
+ 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,
+ 0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L,
+ 0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L,
+ 0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL,
+ 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L,
+ 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL,
+ 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL,
+ 0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L,
+ 0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL,
+ 0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L,
+ 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L,
+ 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL,
+ 0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL,
+ 0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L,
+ 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L,
+ 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL,
+ 0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L,
+ 0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL,
+ 0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL,
+ 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L
+};
+
+/*
+ * Steps through buffer one byte at at time, calculates reflected
+ * crc using table.
+ */
+
+u32 __crc32c_le(u32 crc, unsigned char const *data, size_t length)
+{
+ while (length--)
+ crc =
+ crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8);
+ return crc;
+}
+
+u32 crc32c_le(u32 crc, unsigned char const *data, size_t length)
+{
+ return crc_function(crc, data, length);
+}
diff --git a/crc32c.h b/crc32c.h
new file mode 100644
index 00000000..c552ef6e
--- /dev/null
+++ b/crc32c.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2007 Red Hat. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __CRC32C__
+#define __CRC32C__
+
+#if BTRFS_FLAT_INCLUDES
+#include "kerncompat.h"
+#else
+#include <btrfs/kerncompat.h>
+#endif /* BTRFS_FLAT_INCLUDES */
+
+u32 crc32c_le(u32 seed, unsigned char const *data, size_t length);
+void crc32c_optimization_init(void);
+
+#define crc32c(seed, data, length) crc32c_le(seed, (unsigned char const *)data, length)
+#define btrfs_crc32c crc32c
+#endif
diff --git a/ctree.c b/ctree.c
new file mode 100644
index 00000000..04cc476e
--- /dev/null
+++ b/ctree.c
@@ -0,0 +1,2940 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "print-tree.h"
+#include "repair.h"
+
+static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_path *path, int level);
+static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *ins_key,
+ struct btrfs_path *path, int data_size, int extend);
+static int push_node_left(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *dst,
+ struct extent_buffer *src, int empty);
+static int balance_node_right(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *dst_buf,
+ struct extent_buffer *src_buf);
+
+inline void btrfs_init_path(struct btrfs_path *p)
+{
+ memset(p, 0, sizeof(*p));
+}
+
+struct btrfs_path *btrfs_alloc_path(void)
+{
+ struct btrfs_path *path;
+ path = kzalloc(sizeof(struct btrfs_path), GFP_NOFS);
+ return path;
+}
+
+void btrfs_free_path(struct btrfs_path *p)
+{
+ if (!p)
+ return;
+ btrfs_release_path(p);
+ kfree(p);
+}
+
+void btrfs_release_path(struct btrfs_path *p)
+{
+ int i;
+ for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
+ if (!p->nodes[i])
+ continue;
+ free_extent_buffer(p->nodes[i]);
+ }
+ memset(p, 0, sizeof(*p));
+}
+
+void add_root_to_dirty_list(struct btrfs_root *root)
+{
+ if (root->track_dirty && list_empty(&root->dirty_list)) {
+ list_add(&root->dirty_list,
+ &root->fs_info->dirty_cowonly_roots);
+ }
+}
+
+int btrfs_copy_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *buf,
+ struct extent_buffer **cow_ret, u64 new_root_objectid)
+{
+ struct extent_buffer *cow;
+ int ret = 0;
+ int level;
+ struct btrfs_root *new_root;
+ struct btrfs_disk_key disk_key;
+
+ new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
+ if (!new_root)
+ return -ENOMEM;
+
+ memcpy(new_root, root, sizeof(*new_root));
+ new_root->root_key.objectid = new_root_objectid;
+
+ WARN_ON(root->ref_cows && trans->transid !=
+ root->fs_info->running_transaction->transid);
+ WARN_ON(root->ref_cows && trans->transid != root->last_trans);
+
+ level = btrfs_header_level(buf);
+ if (level == 0)
+ btrfs_item_key(buf, &disk_key, 0);
+ else
+ btrfs_node_key(buf, &disk_key, 0);
+ cow = btrfs_alloc_free_block(trans, new_root, buf->len,
+ new_root_objectid, &disk_key,
+ level, buf->start, 0);
+ if (IS_ERR(cow)) {
+ kfree(new_root);
+ return PTR_ERR(cow);
+ }
+
+ copy_extent_buffer(cow, buf, 0, 0, cow->len);
+ btrfs_set_header_bytenr(cow, cow->start);
+ btrfs_set_header_generation(cow, trans->transid);
+ btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
+ btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
+ BTRFS_HEADER_FLAG_RELOC);
+ if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
+ btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
+ else
+ btrfs_set_header_owner(cow, new_root_objectid);
+
+ write_extent_buffer(cow, root->fs_info->fsid,
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
+
+ WARN_ON(btrfs_header_generation(buf) > trans->transid);
+ ret = btrfs_inc_ref(trans, new_root, cow, 0);
+ kfree(new_root);
+
+ if (ret)
+ return ret;
+
+ btrfs_mark_buffer_dirty(cow);
+ *cow_ret = cow;
+ return 0;
+}
+
+/*
+ * check if the tree block can be shared by multiple trees
+ */
+static int btrfs_block_can_be_shared(struct btrfs_root *root,
+ struct extent_buffer *buf)
+{
+ /*
+ * Tree blocks not in refernece counted trees and tree roots
+ * are never shared. If a block was allocated after the last
+ * snapshot and the block was not allocated by tree relocation,
+ * we know the block is not shared.
+ */
+ if (root->ref_cows &&
+ buf != root->node && buf != root->commit_root &&
+ (btrfs_header_generation(buf) <=
+ btrfs_root_last_snapshot(&root->root_item) ||
+ btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
+ return 1;
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ if (root->ref_cows &&
+ btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
+ return 1;
+#endif
+ return 0;
+}
+
+static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *buf,
+ struct extent_buffer *cow)
+{
+ u64 refs;
+ u64 owner;
+ u64 flags;
+ u64 new_flags = 0;
+ int ret;
+
+ /*
+ * Backrefs update rules:
+ *
+ * Always use full backrefs for extent pointers in tree block
+ * allocated by tree relocation.
+ *
+ * If a shared tree block is no longer referenced by its owner
+ * tree (btrfs_header_owner(buf) == root->root_key.objectid),
+ * use full backrefs for extent pointers in tree block.
+ *
+ * If a tree block is been relocating
+ * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
+ * use full backrefs for extent pointers in tree block.
+ * The reason for this is some operations (such as drop tree)
+ * are only allowed for blocks use full backrefs.
+ */
+
+ if (btrfs_block_can_be_shared(root, buf)) {
+ ret = btrfs_lookup_extent_info(trans, root, buf->start,
+ btrfs_header_level(buf), 1,
+ &refs, &flags);
+ BUG_ON(ret);
+ BUG_ON(refs == 0);
+ } else {
+ refs = 1;
+ if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
+ btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
+ flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
+ else
+ flags = 0;
+ }
+
+ owner = btrfs_header_owner(buf);
+ BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) &&
+ owner == BTRFS_TREE_RELOC_OBJECTID);
+
+ if (refs > 1) {
+ if ((owner == root->root_key.objectid ||
+ root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
+ !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
+ ret = btrfs_inc_ref(trans, root, buf, 1);
+ BUG_ON(ret);
+
+ if (root->root_key.objectid ==
+ BTRFS_TREE_RELOC_OBJECTID) {
+ ret = btrfs_dec_ref(trans, root, buf, 0);
+ BUG_ON(ret);
+ ret = btrfs_inc_ref(trans, root, cow, 1);
+ BUG_ON(ret);
+ }
+ new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+ } else {
+
+ if (root->root_key.objectid ==
+ BTRFS_TREE_RELOC_OBJECTID)
+ ret = btrfs_inc_ref(trans, root, cow, 1);
+ else
+ ret = btrfs_inc_ref(trans, root, cow, 0);
+ BUG_ON(ret);
+ }
+ if (new_flags != 0) {
+ ret = btrfs_set_block_flags(trans, root, buf->start,
+ btrfs_header_level(buf),
+ new_flags);
+ BUG_ON(ret);
+ }
+ } else {
+ if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
+ if (root->root_key.objectid ==
+ BTRFS_TREE_RELOC_OBJECTID)
+ ret = btrfs_inc_ref(trans, root, cow, 1);
+ else
+ ret = btrfs_inc_ref(trans, root, cow, 0);
+ BUG_ON(ret);
+ ret = btrfs_dec_ref(trans, root, buf, 1);
+ BUG_ON(ret);
+ }
+ clean_tree_block(trans, root, buf);
+ }
+ return 0;
+}
+
+int __btrfs_cow_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *buf,
+ struct extent_buffer *parent, int parent_slot,
+ struct extent_buffer **cow_ret,
+ u64 search_start, u64 empty_size)
+{
+ struct extent_buffer *cow;
+ struct btrfs_disk_key disk_key;
+ int level;
+
+ WARN_ON(root->ref_cows && trans->transid !=
+ root->fs_info->running_transaction->transid);
+ WARN_ON(root->ref_cows && trans->transid != root->last_trans);
+
+ level = btrfs_header_level(buf);
+
+ if (level == 0)
+ btrfs_item_key(buf, &disk_key, 0);
+ else
+ btrfs_node_key(buf, &disk_key, 0);
+
+ cow = btrfs_alloc_free_block(trans, root, buf->len,
+ root->root_key.objectid, &disk_key,
+ level, search_start, empty_size);
+ if (IS_ERR(cow))
+ return PTR_ERR(cow);
+
+ copy_extent_buffer(cow, buf, 0, 0, cow->len);
+ btrfs_set_header_bytenr(cow, cow->start);
+ btrfs_set_header_generation(cow, trans->transid);
+ btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
+ btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
+ BTRFS_HEADER_FLAG_RELOC);
+ if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+ btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
+ else
+ btrfs_set_header_owner(cow, root->root_key.objectid);
+
+ write_extent_buffer(cow, root->fs_info->fsid,
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
+
+ WARN_ON(!(buf->flags & EXTENT_BAD_TRANSID) &&
+ btrfs_header_generation(buf) > trans->transid);
+
+ update_ref_for_cow(trans, root, buf, cow);
+
+ if (buf == root->node) {
+ root->node = cow;
+ extent_buffer_get(cow);
+
+ btrfs_free_extent(trans, root, buf->start, buf->len,
+ 0, root->root_key.objectid, level, 0);
+ free_extent_buffer(buf);
+ add_root_to_dirty_list(root);
+ } else {
+ btrfs_set_node_blockptr(parent, parent_slot,
+ cow->start);
+ WARN_ON(trans->transid == 0);
+ btrfs_set_node_ptr_generation(parent, parent_slot,
+ trans->transid);
+ btrfs_mark_buffer_dirty(parent);
+ WARN_ON(btrfs_header_generation(parent) != trans->transid);
+
+ btrfs_free_extent(trans, root, buf->start, buf->len,
+ 0, root->root_key.objectid, level, 1);
+ }
+ if (!list_empty(&buf->recow)) {
+ list_del_init(&buf->recow);
+ free_extent_buffer(buf);
+ }
+ free_extent_buffer(buf);
+ btrfs_mark_buffer_dirty(cow);
+ *cow_ret = cow;
+ return 0;
+}
+
+static inline int should_cow_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *buf)
+{
+ if (btrfs_header_generation(buf) == trans->transid &&
+ !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
+ !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
+ btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
+ return 0;
+ return 1;
+}
+
+int btrfs_cow_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *buf,
+ struct extent_buffer *parent, int parent_slot,
+ struct extent_buffer **cow_ret)
+{
+ u64 search_start;
+ int ret;
+ /*
+ if (trans->transaction != root->fs_info->running_transaction) {
+ printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
+ root->fs_info->running_transaction->transid);
+ WARN_ON(1);
+ }
+ */
+ if (trans->transid != root->fs_info->generation) {
+ printk(KERN_CRIT "trans %llu running %llu\n",
+ (unsigned long long)trans->transid,
+ (unsigned long long)root->fs_info->generation);
+ WARN_ON(1);
+ }
+ if (!should_cow_block(trans, root, buf)) {
+ *cow_ret = buf;
+ return 0;
+ }
+
+ search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
+ ret = __btrfs_cow_block(trans, root, buf, parent,
+ parent_slot, cow_ret, search_start, 0);
+ return ret;
+}
+
+int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
+{
+ if (k1->objectid > k2->objectid)
+ return 1;
+ if (k1->objectid < k2->objectid)
+ return -1;
+ if (k1->type > k2->type)
+ return 1;
+ if (k1->type < k2->type)
+ return -1;
+ if (k1->offset > k2->offset)
+ return 1;
+ if (k1->offset < k2->offset)
+ return -1;
+ return 0;
+}
+
+/*
+ * compare two keys in a memcmp fashion
+ */
+static int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
+{
+ struct btrfs_key k1;
+
+ btrfs_disk_key_to_cpu(&k1, disk);
+ return btrfs_comp_cpu_keys(&k1, k2);
+}
+
+/*
+ * The leaf data grows from end-to-front in the node.
+ * this returns the address of the start of the last item,
+ * which is the stop of the leaf data stack
+ */
+static inline unsigned int leaf_data_end(struct btrfs_root *root,
+ struct extent_buffer *leaf)
+{
+ u32 nr = btrfs_header_nritems(leaf);
+ if (nr == 0)
+ return BTRFS_LEAF_DATA_SIZE(root);
+ return btrfs_item_offset_nr(leaf, nr - 1);
+}
+
+enum btrfs_tree_block_status
+btrfs_check_node(struct btrfs_root *root, struct btrfs_disk_key *parent_key,
+ struct extent_buffer *buf)
+{
+ int i;
+ struct btrfs_key cpukey;
+ struct btrfs_disk_key key;
+ u32 nritems = btrfs_header_nritems(buf);
+ enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS;
+
+ if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root))
+ goto fail;
+
+ ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY;
+ if (parent_key && parent_key->type) {
+ btrfs_node_key(buf, &key, 0);
+ if (memcmp(parent_key, &key, sizeof(key)))
+ goto fail;
+ }
+ ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
+ for (i = 0; nritems > 1 && i < nritems - 2; i++) {
+ btrfs_node_key(buf, &key, i);
+ btrfs_node_key_to_cpu(buf, &cpukey, i + 1);
+ if (btrfs_comp_keys(&key, &cpukey) >= 0)
+ goto fail;
+ }
+ return BTRFS_TREE_BLOCK_CLEAN;
+fail:
+ if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) {
+ if (parent_key)
+ btrfs_disk_key_to_cpu(&cpukey, parent_key);
+ else
+ btrfs_node_key_to_cpu(buf, &cpukey, 0);
+ btrfs_add_corrupt_extent_record(root->fs_info, &cpukey,
+ buf->start, buf->len,
+ btrfs_header_level(buf));
+ }
+ return ret;
+}
+
+enum btrfs_tree_block_status
+btrfs_check_leaf(struct btrfs_root *root, struct btrfs_disk_key *parent_key,
+ struct extent_buffer *buf)
+{
+ int i;
+ struct btrfs_key cpukey;
+ struct btrfs_disk_key key;
+ u32 nritems = btrfs_header_nritems(buf);
+ enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS;
+
+ if (nritems * sizeof(struct btrfs_item) > buf->len) {
+ fprintf(stderr, "invalid number of items %llu\n",
+ (unsigned long long)buf->start);
+ goto fail;
+ }
+
+ if (btrfs_header_level(buf) != 0) {
+ ret = BTRFS_TREE_BLOCK_INVALID_LEVEL;
+ fprintf(stderr, "leaf is not a leaf %llu\n",
+ (unsigned long long)btrfs_header_bytenr(buf));
+ goto fail;
+ }
+ if (btrfs_leaf_free_space(root, buf) < 0) {
+ ret = BTRFS_TREE_BLOCK_INVALID_FREE_SPACE;
+ fprintf(stderr, "leaf free space incorrect %llu %d\n",
+ (unsigned long long)btrfs_header_bytenr(buf),
+ btrfs_leaf_free_space(root, buf));
+ goto fail;
+ }
+
+ if (nritems == 0)
+ return BTRFS_TREE_BLOCK_CLEAN;
+
+ btrfs_item_key(buf, &key, 0);
+ if (parent_key && parent_key->type &&
+ memcmp(parent_key, &key, sizeof(key))) {
+ ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY;
+ fprintf(stderr, "leaf parent key incorrect %llu\n",
+ (unsigned long long)btrfs_header_bytenr(buf));
+ goto fail;
+ }
+ for (i = 0; nritems > 1 && i < nritems - 1; i++) {
+ btrfs_item_key(buf, &key, i);
+ btrfs_item_key_to_cpu(buf, &cpukey, i + 1);
+ if (btrfs_comp_keys(&key, &cpukey) >= 0) {
+ ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
+ fprintf(stderr, "bad key ordering %d %d\n", i, i+1);
+ goto fail;
+ }
+ if (btrfs_item_offset_nr(buf, i) !=
+ btrfs_item_end_nr(buf, i + 1)) {
+ ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
+ fprintf(stderr, "incorrect offsets %u %u\n",
+ btrfs_item_offset_nr(buf, i),
+ btrfs_item_end_nr(buf, i + 1));
+ goto fail;
+ }
+ if (i == 0 && btrfs_item_end_nr(buf, i) !=
+ BTRFS_LEAF_DATA_SIZE(root)) {
+ ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
+ fprintf(stderr, "bad item end %u wanted %u\n",
+ btrfs_item_end_nr(buf, i),
+ (unsigned)BTRFS_LEAF_DATA_SIZE(root));
+ goto fail;
+ }
+ }
+
+ for (i = 0; i < nritems; i++) {
+ if (btrfs_item_end_nr(buf, i) > BTRFS_LEAF_DATA_SIZE(root)) {
+ btrfs_item_key(buf, &key, 0);
+ btrfs_print_key(&key);
+ fflush(stdout);
+ ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
+ fprintf(stderr, "slot end outside of leaf %llu > %llu\n",
+ (unsigned long long)btrfs_item_end_nr(buf, i),
+ (unsigned long long)BTRFS_LEAF_DATA_SIZE(root));
+ goto fail;
+ }
+ }
+
+ return BTRFS_TREE_BLOCK_CLEAN;
+fail:
+ if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) {
+ if (parent_key)
+ btrfs_disk_key_to_cpu(&cpukey, parent_key);
+ else
+ btrfs_item_key_to_cpu(buf, &cpukey, 0);
+
+ btrfs_add_corrupt_extent_record(root->fs_info, &cpukey,
+ buf->start, buf->len, 0);
+ }
+ return ret;
+}
+
+static int noinline check_block(struct btrfs_root *root,
+ struct btrfs_path *path, int level)
+{
+ struct btrfs_disk_key key;
+ struct btrfs_disk_key *key_ptr = NULL;
+ struct extent_buffer *parent;
+ enum btrfs_tree_block_status ret;
+
+ if (path->skip_check_block)
+ return 0;
+ if (path->nodes[level + 1]) {
+ parent = path->nodes[level + 1];
+ btrfs_node_key(parent, &key, path->slots[level + 1]);
+ key_ptr = &key;
+ }
+ if (level == 0)
+ ret = btrfs_check_leaf(root, key_ptr, path->nodes[0]);
+ else
+ ret = btrfs_check_node(root, key_ptr, path->nodes[level]);
+ if (ret == BTRFS_TREE_BLOCK_CLEAN)
+ return 0;
+ return -EIO;
+}
+
+/*
+ * search for key in the extent_buffer. The items start at offset p,
+ * and they are item_size apart. There are 'max' items in p.
+ *
+ * the slot in the array is returned via slot, and it points to
+ * the place where you would insert key if it is not found in
+ * the array.
+ *
+ * slot may point to max if the key is bigger than all of the keys
+ */
+static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
+ int item_size, struct btrfs_key *key,
+ int max, int *slot)
+{
+ int low = 0;
+ int high = max;
+ int mid;
+ int ret;
+ unsigned long offset;
+ struct btrfs_disk_key *tmp;
+
+ while(low < high) {
+ mid = (low + high) / 2;
+ offset = p + mid * item_size;
+
+ tmp = (struct btrfs_disk_key *)(eb->data + offset);
+ ret = btrfs_comp_keys(tmp, key);
+
+ if (ret < 0)
+ low = mid + 1;
+ else if (ret > 0)
+ high = mid;
+ else {
+ *slot = mid;
+ return 0;
+ }
+ }
+ *slot = low;
+ return 1;
+}
+
+/*
+ * simple bin_search frontend that does the right thing for
+ * leaves vs nodes
+ */
+static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+ int level, int *slot)
+{
+ if (level == 0)
+ return generic_bin_search(eb,
+ offsetof(struct btrfs_leaf, items),
+ sizeof(struct btrfs_item),
+ key, btrfs_header_nritems(eb),
+ slot);
+ else
+ return generic_bin_search(eb,
+ offsetof(struct btrfs_node, ptrs),
+ sizeof(struct btrfs_key_ptr),
+ key, btrfs_header_nritems(eb),
+ slot);
+}
+
+struct extent_buffer *read_node_slot(struct btrfs_root *root,
+ struct extent_buffer *parent, int slot)
+{
+ int level = btrfs_header_level(parent);
+ if (slot < 0)
+ return NULL;
+ if (slot >= btrfs_header_nritems(parent))
+ return NULL;
+
+ if (level == 0)
+ return NULL;
+
+ return read_tree_block(root, btrfs_node_blockptr(parent, slot),
+ btrfs_level_size(root, level - 1),
+ btrfs_node_ptr_generation(parent, slot));
+}
+
+static int balance_level(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, int level)
+{
+ struct extent_buffer *right = NULL;
+ struct extent_buffer *mid;
+ struct extent_buffer *left = NULL;
+ struct extent_buffer *parent = NULL;
+ int ret = 0;
+ int wret;
+ int pslot;
+ int orig_slot = path->slots[level];
+ u64 orig_ptr;
+
+ if (level == 0)
+ return 0;
+
+ mid = path->nodes[level];
+ WARN_ON(btrfs_header_generation(mid) != trans->transid);
+
+ orig_ptr = btrfs_node_blockptr(mid, orig_slot);
+
+ if (level < BTRFS_MAX_LEVEL - 1) {
+ parent = path->nodes[level + 1];
+ pslot = path->slots[level + 1];
+ }
+
+ /*
+ * deal with the case where there is only one pointer in the root
+ * by promoting the node below to a root
+ */
+ if (!parent) {
+ struct extent_buffer *child;
+
+ if (btrfs_header_nritems(mid) != 1)
+ return 0;
+
+ /* promote the child to a root */
+ child = read_node_slot(root, mid, 0);
+ BUG_ON(!extent_buffer_uptodate(child));
+ ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
+ BUG_ON(ret);
+
+ root->node = child;
+ add_root_to_dirty_list(root);
+ path->nodes[level] = NULL;
+ clean_tree_block(trans, root, mid);
+ wait_on_tree_block_writeback(root, mid);
+ /* once for the path */
+ free_extent_buffer(mid);
+
+ ret = btrfs_free_extent(trans, root, mid->start, mid->len,
+ 0, root->root_key.objectid,
+ level, 1);
+ /* once for the root ptr */
+ free_extent_buffer(mid);
+ return ret;
+ }
+ if (btrfs_header_nritems(mid) >
+ BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
+ return 0;
+
+ left = read_node_slot(root, parent, pslot - 1);
+ if (extent_buffer_uptodate(left)) {
+ wret = btrfs_cow_block(trans, root, left,
+ parent, pslot - 1, &left);
+ if (wret) {
+ ret = wret;
+ goto enospc;
+ }
+ }
+ right = read_node_slot(root, parent, pslot + 1);
+ if (extent_buffer_uptodate(right)) {
+ wret = btrfs_cow_block(trans, root, right,
+ parent, pslot + 1, &right);
+ if (wret) {
+ ret = wret;
+ goto enospc;
+ }
+ }
+
+ /* first, try to make some room in the middle buffer */
+ if (left) {
+ orig_slot += btrfs_header_nritems(left);
+ wret = push_node_left(trans, root, left, mid, 1);
+ if (wret < 0)
+ ret = wret;
+ }
+
+ /*
+ * then try to empty the right most buffer into the middle
+ */
+ if (right) {
+ wret = push_node_left(trans, root, mid, right, 1);
+ if (wret < 0 && wret != -ENOSPC)
+ ret = wret;
+ if (btrfs_header_nritems(right) == 0) {
+ u64 bytenr = right->start;
+ u32 blocksize = right->len;
+
+ clean_tree_block(trans, root, right);
+ wait_on_tree_block_writeback(root, right);
+ free_extent_buffer(right);
+ right = NULL;
+ wret = btrfs_del_ptr(trans, root, path,
+ level + 1, pslot + 1);
+ if (wret)
+ ret = wret;
+ wret = btrfs_free_extent(trans, root, bytenr,
+ blocksize, 0,
+ root->root_key.objectid,
+ level, 0);
+ if (wret)
+ ret = wret;
+ } else {
+ struct btrfs_disk_key right_key;
+ btrfs_node_key(right, &right_key, 0);
+ btrfs_set_node_key(parent, &right_key, pslot + 1);
+ btrfs_mark_buffer_dirty(parent);
+ }
+ }
+ if (btrfs_header_nritems(mid) == 1) {
+ /*
+ * we're not allowed to leave a node with one item in the
+ * tree during a delete. A deletion from lower in the tree
+ * could try to delete the only pointer in this node.
+ * So, pull some keys from the left.
+ * There has to be a left pointer at this point because
+ * otherwise we would have pulled some pointers from the
+ * right
+ */
+ BUG_ON(!left);
+ wret = balance_node_right(trans, root, mid, left);
+ if (wret < 0) {
+ ret = wret;
+ goto enospc;
+ }
+ if (wret == 1) {
+ wret = push_node_left(trans, root, left, mid, 1);
+ if (wret < 0)
+ ret = wret;
+ }
+ BUG_ON(wret == 1);
+ }
+ if (btrfs_header_nritems(mid) == 0) {
+ /* we've managed to empty the middle node, drop it */
+ u64 bytenr = mid->start;
+ u32 blocksize = mid->len;
+ clean_tree_block(trans, root, mid);
+ wait_on_tree_block_writeback(root, mid);
+ free_extent_buffer(mid);
+ mid = NULL;
+ wret = btrfs_del_ptr(trans, root, path, level + 1, pslot);
+ if (wret)
+ ret = wret;
+ wret = btrfs_free_extent(trans, root, bytenr, blocksize,
+ 0, root->root_key.objectid,
+ level, 0);
+ if (wret)
+ ret = wret;
+ } else {
+ /* update the parent key to reflect our changes */
+ struct btrfs_disk_key mid_key;
+ btrfs_node_key(mid, &mid_key, 0);
+ btrfs_set_node_key(parent, &mid_key, pslot);
+ btrfs_mark_buffer_dirty(parent);
+ }
+
+ /* update the path */
+ if (left) {
+ if (btrfs_header_nritems(left) > orig_slot) {
+ extent_buffer_get(left);
+ path->nodes[level] = left;
+ path->slots[level + 1] -= 1;
+ path->slots[level] = orig_slot;
+ if (mid)
+ free_extent_buffer(mid);
+ } else {
+ orig_slot -= btrfs_header_nritems(left);
+ path->slots[level] = orig_slot;
+ }
+ }
+ /* double check we haven't messed things up */
+ check_block(root, path, level);
+ if (orig_ptr !=
+ btrfs_node_blockptr(path->nodes[level], path->slots[level]))
+ BUG();
+enospc:
+ if (right)
+ free_extent_buffer(right);
+ if (left)
+ free_extent_buffer(left);
+ return ret;
+}
+
+/* returns zero if the push worked, non-zero otherwise */
+static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, int level)
+{
+ struct extent_buffer *right = NULL;
+ struct extent_buffer *mid;
+ struct extent_buffer *left = NULL;
+ struct extent_buffer *parent = NULL;
+ int ret = 0;
+ int wret;
+ int pslot;
+ int orig_slot = path->slots[level];
+
+ if (level == 0)
+ return 1;
+
+ mid = path->nodes[level];
+ WARN_ON(btrfs_header_generation(mid) != trans->transid);
+
+ if (level < BTRFS_MAX_LEVEL - 1) {
+ parent = path->nodes[level + 1];
+ pslot = path->slots[level + 1];
+ }
+
+ if (!parent)
+ return 1;
+
+ left = read_node_slot(root, parent, pslot - 1);
+
+ /* first, try to make some room in the middle buffer */
+ if (extent_buffer_uptodate(left)) {
+ u32 left_nr;
+ left_nr = btrfs_header_nritems(left);
+ if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
+ wret = 1;
+ } else {
+ ret = btrfs_cow_block(trans, root, left, parent,
+ pslot - 1, &left);
+ if (ret)
+ wret = 1;
+ else {
+ wret = push_node_left(trans, root,
+ left, mid, 0);
+ }
+ }
+ if (wret < 0)
+ ret = wret;
+ if (wret == 0) {
+ struct btrfs_disk_key disk_key;
+ orig_slot += left_nr;
+ btrfs_node_key(mid, &disk_key, 0);
+ btrfs_set_node_key(parent, &disk_key, pslot);
+ btrfs_mark_buffer_dirty(parent);
+ if (btrfs_header_nritems(left) > orig_slot) {
+ path->nodes[level] = left;
+ path->slots[level + 1] -= 1;
+ path->slots[level] = orig_slot;
+ free_extent_buffer(mid);
+ } else {
+ orig_slot -=
+ btrfs_header_nritems(left);
+ path->slots[level] = orig_slot;
+ free_extent_buffer(left);
+ }
+ return 0;
+ }
+ free_extent_buffer(left);
+ }
+ right= read_node_slot(root, parent, pslot + 1);
+
+ /*
+ * then try to empty the right most buffer into the middle
+ */
+ if (extent_buffer_uptodate(right)) {
+ u32 right_nr;
+ right_nr = btrfs_header_nritems(right);
+ if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
+ wret = 1;
+ } else {
+ ret = btrfs_cow_block(trans, root, right,
+ parent, pslot + 1,
+ &right);
+ if (ret)
+ wret = 1;
+ else {
+ wret = balance_node_right(trans, root,
+ right, mid);
+ }
+ }
+ if (wret < 0)
+ ret = wret;
+ if (wret == 0) {
+ struct btrfs_disk_key disk_key;
+
+ btrfs_node_key(right, &disk_key, 0);
+ btrfs_set_node_key(parent, &disk_key, pslot + 1);
+ btrfs_mark_buffer_dirty(parent);
+
+ if (btrfs_header_nritems(mid) <= orig_slot) {
+ path->nodes[level] = right;
+ path->slots[level + 1] += 1;
+ path->slots[level] = orig_slot -
+ btrfs_header_nritems(mid);
+ free_extent_buffer(mid);
+ } else {
+ free_extent_buffer(right);
+ }
+ return 0;
+ }
+ free_extent_buffer(right);
+ }
+ return 1;
+}
+
+/*
+ * readahead one full node of leaves
+ */
+void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
+ int level, int slot, u64 objectid)
+{
+ struct extent_buffer *node;
+ struct btrfs_disk_key disk_key;
+ u32 nritems;
+ u64 search;
+ u64 lowest_read;
+ u64 highest_read;
+ u64 nread = 0;
+ int direction = path->reada;
+ struct extent_buffer *eb;
+ u32 nr;
+ u32 blocksize;
+ u32 nscan = 0;
+
+ if (level != 1)
+ return;
+
+ if (!path->nodes[level])
+ return;
+
+ node = path->nodes[level];
+ search = btrfs_node_blockptr(node, slot);
+ blocksize = btrfs_level_size(root, level - 1);
+ eb = btrfs_find_tree_block(root, search, blocksize);
+ if (eb) {
+ free_extent_buffer(eb);
+ return;
+ }
+
+ highest_read = search;
+ lowest_read = search;
+
+ nritems = btrfs_header_nritems(node);
+ nr = slot;
+ while(1) {
+ if (direction < 0) {
+ if (nr == 0)
+ break;
+ nr--;
+ } else if (direction > 0) {
+ nr++;
+ if (nr >= nritems)
+ break;
+ }
+ if (path->reada < 0 && objectid) {
+ btrfs_node_key(node, &disk_key, nr);
+ if (btrfs_disk_key_objectid(&disk_key) != objectid)
+ break;
+ }
+ search = btrfs_node_blockptr(node, nr);
+ if ((search >= lowest_read && search <= highest_read) ||
+ (search < lowest_read && lowest_read - search <= 32768) ||
+ (search > highest_read && search - highest_read <= 32768)) {
+ readahead_tree_block(root, search, blocksize,
+ btrfs_node_ptr_generation(node, nr));
+ nread += blocksize;
+ }
+ nscan++;
+ if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
+ break;
+ if(nread > (1024 * 1024) || nscan > 128)
+ break;
+
+ if (search < lowest_read)
+ lowest_read = search;
+ if (search > highest_read)
+ highest_read = search;
+ }
+}
+
+int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
+ u64 iobjectid, u64 ioff, u8 key_type,
+ struct btrfs_key *found_key)
+{
+ int ret;
+ struct btrfs_key key;
+ struct extent_buffer *eb;
+ struct btrfs_path *path;
+
+ key.type = key_type;
+ key.objectid = iobjectid;
+ key.offset = ioff;
+
+ if (found_path == NULL) {
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ } else
+ path = found_path;
+
+ ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
+ if ((ret < 0) || (found_key == NULL))
+ goto out;
+
+ eb = path->nodes[0];
+ if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
+ ret = btrfs_next_leaf(fs_root, path);
+ if (ret)
+ goto out;
+ eb = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
+ if (found_key->type != key.type ||
+ found_key->objectid != key.objectid) {
+ ret = 1;
+ goto out;
+ }
+
+out:
+ if (path != found_path)
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * look for key in the tree. path is filled in with nodes along the way
+ * if key is found, we return zero and you can find the item in the leaf
+ * level of the path (level 0)
+ *
+ * If the key isn't found, the path points to the slot where it should
+ * be inserted, and 1 is returned. If there are other errors during the
+ * search a negative error number is returned.
+ *
+ * if ins_len > 0, nodes and leaves will be split as we walk down the
+ * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
+ * possible)
+ */
+int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *key, struct btrfs_path *p, int
+ ins_len, int cow)
+{
+ struct extent_buffer *b;
+ int slot;
+ int ret;
+ int level;
+ int should_reada = p->reada;
+ u8 lowest_level = 0;
+
+ lowest_level = p->lowest_level;
+ WARN_ON(lowest_level && ins_len > 0);
+ WARN_ON(p->nodes[0] != NULL);
+ /*
+ WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
+ */
+again:
+ b = root->node;
+ extent_buffer_get(b);
+ while (b) {
+ level = btrfs_header_level(b);
+ if (cow) {
+ int wret;
+ wret = btrfs_cow_block(trans, root, b,
+ p->nodes[level + 1],
+ p->slots[level + 1],
+ &b);
+ if (wret) {
+ free_extent_buffer(b);
+ return wret;
+ }
+ }
+ BUG_ON(!cow && ins_len);
+ if (level != btrfs_header_level(b))
+ WARN_ON(1);
+ level = btrfs_header_level(b);
+ p->nodes[level] = b;
+ ret = check_block(root, p, level);
+ if (ret)
+ return -1;
+ ret = bin_search(b, key, level, &slot);
+ if (level != 0) {
+ if (ret && slot > 0)
+ slot -= 1;
+ p->slots[level] = slot;
+ if ((p->search_for_split || ins_len > 0) &&
+ btrfs_header_nritems(b) >=
+ BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
+ int sret = split_node(trans, root, p, level);
+ BUG_ON(sret > 0);
+ if (sret)
+ return sret;
+ b = p->nodes[level];
+ slot = p->slots[level];
+ } else if (ins_len < 0) {
+ int sret = balance_level(trans, root, p,
+ level);
+ if (sret)
+ return sret;
+ b = p->nodes[level];
+ if (!b) {
+ btrfs_release_path(p);
+ goto again;
+ }
+ slot = p->slots[level];
+ BUG_ON(btrfs_header_nritems(b) == 1);
+ }
+ /* this is only true while dropping a snapshot */
+ if (level == lowest_level)
+ break;
+
+ if (should_reada)
+ reada_for_search(root, p, level, slot,
+ key->objectid);
+
+ b = read_node_slot(root, b, slot);
+ if (!extent_buffer_uptodate(b))
+ return -EIO;
+ } else {
+ p->slots[level] = slot;
+ if (ins_len > 0 &&
+ ins_len > btrfs_leaf_free_space(root, b)) {
+ int sret = split_leaf(trans, root, key,
+ p, ins_len, ret == 0);
+ BUG_ON(sret > 0);
+ if (sret)
+ return sret;
+ }
+ return ret;
+ }
+ }
+ return 1;
+}
+
+/*
+ * adjust the pointers going up the tree, starting at level
+ * making sure the right key of each node is points to 'key'.
+ * This is used after shifting pointers to the left, so it stops
+ * fixing up pointers when a given leaf/node is not in slot 0 of the
+ * higher levels
+ */
+void btrfs_fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_disk_key *key, int level)
+{
+ int i;
+ struct extent_buffer *t;
+
+ for (i = level; i < BTRFS_MAX_LEVEL; i++) {
+ int tslot = path->slots[i];
+ if (!path->nodes[i])
+ break;
+ t = path->nodes[i];
+ btrfs_set_node_key(t, key, tslot);
+ btrfs_mark_buffer_dirty(path->nodes[i]);
+ if (tslot != 0)
+ break;
+ }
+}
+
+/*
+ * update item key.
+ *
+ * This function isn't completely safe. It's the caller's responsibility
+ * that the new key won't break the order
+ */
+int btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *new_key)
+{
+ struct btrfs_disk_key disk_key;
+ struct extent_buffer *eb;
+ int slot;
+
+ eb = path->nodes[0];
+ slot = path->slots[0];
+ if (slot > 0) {
+ btrfs_item_key(eb, &disk_key, slot - 1);
+ if (btrfs_comp_keys(&disk_key, new_key) >= 0)
+ return -1;
+ }
+ if (slot < btrfs_header_nritems(eb) - 1) {
+ btrfs_item_key(eb, &disk_key, slot + 1);
+ if (btrfs_comp_keys(&disk_key, new_key) <= 0)
+ return -1;
+ }
+
+ btrfs_cpu_key_to_disk(&disk_key, new_key);
+ btrfs_set_item_key(eb, &disk_key, slot);
+ btrfs_mark_buffer_dirty(eb);
+ if (slot == 0)
+ btrfs_fixup_low_keys(root, path, &disk_key, 1);
+ return 0;
+}
+
+/*
+ * update an item key without the safety checks. This is meant to be called by
+ * fsck only.
+ */
+void btrfs_set_item_key_unsafe(struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *new_key)
+{
+ struct btrfs_disk_key disk_key;
+ struct extent_buffer *eb;
+ int slot;
+
+ eb = path->nodes[0];
+ slot = path->slots[0];
+
+ btrfs_cpu_key_to_disk(&disk_key, new_key);
+ btrfs_set_item_key(eb, &disk_key, slot);
+ btrfs_mark_buffer_dirty(eb);
+ if (slot == 0)
+ btrfs_fixup_low_keys(root, path, &disk_key, 1);
+}
+
+/*
+ * try to push data from one node into the next node left in the
+ * tree.
+ *
+ * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
+ * error, and > 0 if there was no room in the left hand block.
+ */
+static int push_node_left(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *dst,
+ struct extent_buffer *src, int empty)
+{
+ int push_items = 0;
+ int src_nritems;
+ int dst_nritems;
+ int ret = 0;
+
+ src_nritems = btrfs_header_nritems(src);
+ dst_nritems = btrfs_header_nritems(dst);
+ push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
+ WARN_ON(btrfs_header_generation(src) != trans->transid);
+ WARN_ON(btrfs_header_generation(dst) != trans->transid);
+
+ if (!empty && src_nritems <= 8)
+ return 1;
+
+ if (push_items <= 0) {
+ return 1;
+ }
+
+ if (empty) {
+ push_items = min(src_nritems, push_items);
+ if (push_items < src_nritems) {
+ /* leave at least 8 pointers in the node if
+ * we aren't going to empty it
+ */
+ if (src_nritems - push_items < 8) {
+ if (push_items <= 8)
+ return 1;
+ push_items -= 8;
+ }
+ }
+ } else
+ push_items = min(src_nritems - 8, push_items);
+
+ copy_extent_buffer(dst, src,
+ btrfs_node_key_ptr_offset(dst_nritems),
+ btrfs_node_key_ptr_offset(0),
+ push_items * sizeof(struct btrfs_key_ptr));
+
+ if (push_items < src_nritems) {
+ memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
+ btrfs_node_key_ptr_offset(push_items),
+ (src_nritems - push_items) *
+ sizeof(struct btrfs_key_ptr));
+ }
+ btrfs_set_header_nritems(src, src_nritems - push_items);
+ btrfs_set_header_nritems(dst, dst_nritems + push_items);
+ btrfs_mark_buffer_dirty(src);
+ btrfs_mark_buffer_dirty(dst);
+
+ return ret;
+}
+
+/*
+ * try to push data from one node into the next node right in the
+ * tree.
+ *
+ * returns 0 if some ptrs were pushed, < 0 if there was some horrible
+ * error, and > 0 if there was no room in the right hand block.
+ *
+ * this will only push up to 1/2 the contents of the left node over
+ */
+static int balance_node_right(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *dst,
+ struct extent_buffer *src)
+{
+ int push_items = 0;
+ int max_push;
+ int src_nritems;
+ int dst_nritems;
+ int ret = 0;
+
+ WARN_ON(btrfs_header_generation(src) != trans->transid);
+ WARN_ON(btrfs_header_generation(dst) != trans->transid);
+
+ src_nritems = btrfs_header_nritems(src);
+ dst_nritems = btrfs_header_nritems(dst);
+ push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
+ if (push_items <= 0) {
+ return 1;
+ }
+
+ if (src_nritems < 4) {
+ return 1;
+ }
+
+ max_push = src_nritems / 2 + 1;
+ /* don't try to empty the node */
+ if (max_push >= src_nritems) {
+ return 1;
+ }
+
+ if (max_push < push_items)
+ push_items = max_push;
+
+ memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
+ btrfs_node_key_ptr_offset(0),
+ (dst_nritems) *
+ sizeof(struct btrfs_key_ptr));
+
+ copy_extent_buffer(dst, src,
+ btrfs_node_key_ptr_offset(0),
+ btrfs_node_key_ptr_offset(src_nritems - push_items),
+ push_items * sizeof(struct btrfs_key_ptr));
+
+ btrfs_set_header_nritems(src, src_nritems - push_items);
+ btrfs_set_header_nritems(dst, dst_nritems + push_items);
+
+ btrfs_mark_buffer_dirty(src);
+ btrfs_mark_buffer_dirty(dst);
+
+ return ret;
+}
+
+/*
+ * helper function to insert a new root level in the tree.
+ * A new node is allocated, and a single item is inserted to
+ * point to the existing root
+ *
+ * returns zero on success or < 0 on failure.
+ */
+static int noinline insert_new_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, int level)
+{
+ u64 lower_gen;
+ struct extent_buffer *lower;
+ struct extent_buffer *c;
+ struct extent_buffer *old;
+ struct btrfs_disk_key lower_key;
+
+ BUG_ON(path->nodes[level]);
+ BUG_ON(path->nodes[level-1] != root->node);
+
+ lower = path->nodes[level-1];
+ if (level == 1)
+ btrfs_item_key(lower, &lower_key, 0);
+ else
+ btrfs_node_key(lower, &lower_key, 0);
+
+ c = btrfs_alloc_free_block(trans, root, root->nodesize,
+ root->root_key.objectid, &lower_key,
+ level, root->node->start, 0);
+
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+
+ memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
+ btrfs_set_header_nritems(c, 1);
+ btrfs_set_header_level(c, level);
+ btrfs_set_header_bytenr(c, c->start);
+ btrfs_set_header_generation(c, trans->transid);
+ btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
+ btrfs_set_header_owner(c, root->root_key.objectid);
+
+ write_extent_buffer(c, root->fs_info->fsid,
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
+
+ write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
+ btrfs_header_chunk_tree_uuid(c),
+ BTRFS_UUID_SIZE);
+
+ btrfs_set_node_key(c, &lower_key, 0);
+ btrfs_set_node_blockptr(c, 0, lower->start);
+ lower_gen = btrfs_header_generation(lower);
+ WARN_ON(lower_gen != trans->transid);
+
+ btrfs_set_node_ptr_generation(c, 0, lower_gen);
+
+ btrfs_mark_buffer_dirty(c);
+
+ old = root->node;
+ root->node = c;
+
+ /* the super has an extra ref to root->node */
+ free_extent_buffer(old);
+
+ add_root_to_dirty_list(root);
+ extent_buffer_get(c);
+ path->nodes[level] = c;
+ path->slots[level] = 0;
+ return 0;
+}
+
+/*
+ * worker function to insert a single pointer in a node.
+ * the node should have enough room for the pointer already
+ *
+ * slot and level indicate where you want the key to go, and
+ * blocknr is the block the key points to.
+ *
+ * returns zero on success and < 0 on any error
+ */
+static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_path *path, struct btrfs_disk_key
+ *key, u64 bytenr, int slot, int level)
+{
+ struct extent_buffer *lower;
+ int nritems;
+
+ BUG_ON(!path->nodes[level]);
+ lower = path->nodes[level];
+ nritems = btrfs_header_nritems(lower);
+ if (slot > nritems)
+ BUG();
+ if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
+ BUG();
+ if (slot != nritems) {
+ memmove_extent_buffer(lower,
+ btrfs_node_key_ptr_offset(slot + 1),
+ btrfs_node_key_ptr_offset(slot),
+ (nritems - slot) * sizeof(struct btrfs_key_ptr));
+ }
+ btrfs_set_node_key(lower, key, slot);
+ btrfs_set_node_blockptr(lower, slot, bytenr);
+ WARN_ON(trans->transid == 0);
+ btrfs_set_node_ptr_generation(lower, slot, trans->transid);
+ btrfs_set_header_nritems(lower, nritems + 1);
+ btrfs_mark_buffer_dirty(lower);
+ return 0;
+}
+
+/*
+ * split the node at the specified level in path in two.
+ * The path is corrected to point to the appropriate node after the split
+ *
+ * Before splitting this tries to make some room in the node by pushing
+ * left and right, if either one works, it returns right away.
+ *
+ * returns 0 on success and < 0 on failure
+ */
+static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_path *path, int level)
+{
+ struct extent_buffer *c;
+ struct extent_buffer *split;
+ struct btrfs_disk_key disk_key;
+ int mid;
+ int ret;
+ int wret;
+ u32 c_nritems;
+
+ c = path->nodes[level];
+ WARN_ON(btrfs_header_generation(c) != trans->transid);
+ if (c == root->node) {
+ /* trying to split the root, lets make a new one */
+ ret = insert_new_root(trans, root, path, level + 1);
+ if (ret)
+ return ret;
+ } else {
+ ret = push_nodes_for_insert(trans, root, path, level);
+ c = path->nodes[level];
+ if (!ret && btrfs_header_nritems(c) <
+ BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
+ return 0;
+ if (ret < 0)
+ return ret;
+ }
+
+ c_nritems = btrfs_header_nritems(c);
+ mid = (c_nritems + 1) / 2;
+ btrfs_node_key(c, &disk_key, mid);
+
+ split = btrfs_alloc_free_block(trans, root, root->nodesize,
+ root->root_key.objectid,
+ &disk_key, level, c->start, 0);
+ if (IS_ERR(split))
+ return PTR_ERR(split);
+
+ memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
+ btrfs_set_header_level(split, btrfs_header_level(c));
+ btrfs_set_header_bytenr(split, split->start);
+ btrfs_set_header_generation(split, trans->transid);
+ btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
+ btrfs_set_header_owner(split, root->root_key.objectid);
+ write_extent_buffer(split, root->fs_info->fsid,
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
+ write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
+ btrfs_header_chunk_tree_uuid(split),
+ BTRFS_UUID_SIZE);
+
+
+ copy_extent_buffer(split, c,
+ btrfs_node_key_ptr_offset(0),
+ btrfs_node_key_ptr_offset(mid),
+ (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
+ btrfs_set_header_nritems(split, c_nritems - mid);
+ btrfs_set_header_nritems(c, mid);
+ ret = 0;
+
+ btrfs_mark_buffer_dirty(c);
+ btrfs_mark_buffer_dirty(split);
+
+ wret = insert_ptr(trans, root, path, &disk_key, split->start,
+ path->slots[level + 1] + 1,
+ level + 1);
+ if (wret)
+ ret = wret;
+
+ if (path->slots[level] >= mid) {
+ path->slots[level] -= mid;
+ free_extent_buffer(c);
+ path->nodes[level] = split;
+ path->slots[level + 1] += 1;
+ } else {
+ free_extent_buffer(split);
+ }
+ return ret;
+}
+
+/*
+ * how many bytes are required to store the items in a leaf. start
+ * and nr indicate which items in the leaf to check. This totals up the
+ * space used both by the item structs and the item data
+ */
+static int leaf_space_used(struct extent_buffer *l, int start, int nr)
+{
+ int data_len;
+ int nritems = btrfs_header_nritems(l);
+ int end = min(nritems, start + nr) - 1;
+
+ if (!nr)
+ return 0;
+ data_len = btrfs_item_end_nr(l, start);
+ data_len = data_len - btrfs_item_offset_nr(l, end);
+ data_len += sizeof(struct btrfs_item) * nr;
+ WARN_ON(data_len < 0);
+ return data_len;
+}
+
+/*
+ * The space between the end of the leaf items and
+ * the start of the leaf data. IOW, how much room
+ * the leaf has left for both items and data
+ */
+int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
+{
+ u32 nodesize = (root ? BTRFS_LEAF_DATA_SIZE(root) : leaf->len);
+ int nritems = btrfs_header_nritems(leaf);
+ int ret;
+ ret = nodesize - leaf_space_used(leaf, 0, nritems);
+ if (ret < 0) {
+ printk("leaf free space ret %d, leaf data size %u, used %d nritems %d\n",
+ ret, nodesize, leaf_space_used(leaf, 0, nritems),
+ nritems);
+ }
+ return ret;
+}
+
+/*
+ * push some data in the path leaf to the right, trying to free up at
+ * least data_size bytes. returns zero if the push worked, nonzero otherwise
+ *
+ * returns 1 if the push failed because the other node didn't have enough
+ * room, 0 if everything worked out and < 0 if there were major errors.
+ */
+static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_path *path, int data_size,
+ int empty)
+{
+ struct extent_buffer *left = path->nodes[0];
+ struct extent_buffer *right;
+ struct extent_buffer *upper;
+ struct btrfs_disk_key disk_key;
+ int slot;
+ u32 i;
+ int free_space;
+ int push_space = 0;
+ int push_items = 0;
+ struct btrfs_item *item;
+ u32 left_nritems;
+ u32 nr;
+ u32 right_nritems;
+ u32 data_end;
+ u32 this_item_size;
+ int ret;
+
+ slot = path->slots[1];
+ if (!path->nodes[1]) {
+ return 1;
+ }
+ upper = path->nodes[1];
+ if (slot >= btrfs_header_nritems(upper) - 1)
+ return 1;
+
+ right = read_node_slot(root, upper, slot + 1);
+ if (!extent_buffer_uptodate(right)) {
+ if (IS_ERR(right))
+ return PTR_ERR(right);
+ return -EIO;
+ }
+ free_space = btrfs_leaf_free_space(root, right);
+ if (free_space < data_size) {
+ free_extent_buffer(right);
+ return 1;
+ }
+
+ /* cow and double check */
+ ret = btrfs_cow_block(trans, root, right, upper,
+ slot + 1, &right);
+ if (ret) {
+ free_extent_buffer(right);
+ return 1;
+ }
+ free_space = btrfs_leaf_free_space(root, right);
+ if (free_space < data_size) {
+ free_extent_buffer(right);
+ return 1;
+ }
+
+ left_nritems = btrfs_header_nritems(left);
+ if (left_nritems == 0) {
+ free_extent_buffer(right);
+ return 1;
+ }
+
+ if (empty)
+ nr = 0;
+ else
+ nr = 1;
+
+ i = left_nritems - 1;
+ while (i >= nr) {
+ item = btrfs_item_nr(i);
+
+ if (path->slots[0] == i)
+ push_space += data_size + sizeof(*item);
+
+ this_item_size = btrfs_item_size(left, item);
+ if (this_item_size + sizeof(*item) + push_space > free_space)
+ break;
+ push_items++;
+ push_space += this_item_size + sizeof(*item);
+ if (i == 0)
+ break;
+ i--;
+ }
+
+ if (push_items == 0) {
+ free_extent_buffer(right);
+ return 1;
+ }
+
+ if (!empty && push_items == left_nritems)
+ WARN_ON(1);
+
+ /* push left to right */
+ right_nritems = btrfs_header_nritems(right);
+
+ push_space = btrfs_item_end_nr(left, left_nritems - push_items);
+ push_space -= leaf_data_end(root, left);
+
+ /* make room in the right data area */
+ data_end = leaf_data_end(root, right);
+ memmove_extent_buffer(right,
+ btrfs_leaf_data(right) + data_end - push_space,
+ btrfs_leaf_data(right) + data_end,
+ BTRFS_LEAF_DATA_SIZE(root) - data_end);
+
+ /* copy from the left data area */
+ copy_extent_buffer(right, left, btrfs_leaf_data(right) +
+ BTRFS_LEAF_DATA_SIZE(root) - push_space,
+ btrfs_leaf_data(left) + leaf_data_end(root, left),
+ push_space);
+
+ memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
+ btrfs_item_nr_offset(0),
+ right_nritems * sizeof(struct btrfs_item));
+
+ /* copy the items from left to right */
+ copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
+ btrfs_item_nr_offset(left_nritems - push_items),
+ push_items * sizeof(struct btrfs_item));
+
+ /* update the item pointers */
+ right_nritems += push_items;
+ btrfs_set_header_nritems(right, right_nritems);
+ push_space = BTRFS_LEAF_DATA_SIZE(root);
+ for (i = 0; i < right_nritems; i++) {
+ item = btrfs_item_nr(i);
+ push_space -= btrfs_item_size(right, item);
+ btrfs_set_item_offset(right, item, push_space);
+ }
+
+ left_nritems -= push_items;
+ btrfs_set_header_nritems(left, left_nritems);
+
+ if (left_nritems)
+ btrfs_mark_buffer_dirty(left);
+ btrfs_mark_buffer_dirty(right);
+
+ btrfs_item_key(right, &disk_key, 0);
+ btrfs_set_node_key(upper, &disk_key, slot + 1);
+ btrfs_mark_buffer_dirty(upper);
+
+ /* then fixup the leaf pointer in the path */
+ if (path->slots[0] >= left_nritems) {
+ path->slots[0] -= left_nritems;
+ free_extent_buffer(path->nodes[0]);
+ path->nodes[0] = right;
+ path->slots[1] += 1;
+ } else {
+ free_extent_buffer(right);
+ }
+ return 0;
+}
+/*
+ * push some data in the path leaf to the left, trying to free up at
+ * least data_size bytes. returns zero if the push worked, nonzero otherwise
+ */
+static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_path *path, int data_size,
+ int empty)
+{
+ struct btrfs_disk_key disk_key;
+ struct extent_buffer *right = path->nodes[0];
+ struct extent_buffer *left;
+ int slot;
+ int i;
+ int free_space;
+ int push_space = 0;
+ int push_items = 0;
+ struct btrfs_item *item;
+ u32 old_left_nritems;
+ u32 right_nritems;
+ u32 nr;
+ int ret = 0;
+ u32 this_item_size;
+ u32 old_left_item_size;
+
+ slot = path->slots[1];
+ if (slot == 0)
+ return 1;
+ if (!path->nodes[1])
+ return 1;
+
+ right_nritems = btrfs_header_nritems(right);
+ if (right_nritems == 0) {
+ return 1;
+ }
+
+ left = read_node_slot(root, path->nodes[1], slot - 1);
+ free_space = btrfs_leaf_free_space(root, left);
+ if (free_space < data_size) {
+ free_extent_buffer(left);
+ return 1;
+ }
+
+ /* cow and double check */
+ ret = btrfs_cow_block(trans, root, left,
+ path->nodes[1], slot - 1, &left);
+ if (ret) {
+ /* we hit -ENOSPC, but it isn't fatal here */
+ free_extent_buffer(left);
+ return 1;
+ }
+
+ free_space = btrfs_leaf_free_space(root, left);
+ if (free_space < data_size) {
+ free_extent_buffer(left);
+ return 1;
+ }
+
+ if (empty)
+ nr = right_nritems;
+ else
+ nr = right_nritems - 1;
+
+ for (i = 0; i < nr; i++) {
+ item = btrfs_item_nr(i);
+
+ if (path->slots[0] == i)
+ push_space += data_size + sizeof(*item);
+
+ this_item_size = btrfs_item_size(right, item);
+ if (this_item_size + sizeof(*item) + push_space > free_space)
+ break;
+
+ push_items++;
+ push_space += this_item_size + sizeof(*item);
+ }
+
+ if (push_items == 0) {
+ free_extent_buffer(left);
+ return 1;
+ }
+ if (!empty && push_items == btrfs_header_nritems(right))
+ WARN_ON(1);
+
+ /* push data from right to left */
+ copy_extent_buffer(left, right,
+ btrfs_item_nr_offset(btrfs_header_nritems(left)),
+ btrfs_item_nr_offset(0),
+ push_items * sizeof(struct btrfs_item));
+
+ push_space = BTRFS_LEAF_DATA_SIZE(root) -
+ btrfs_item_offset_nr(right, push_items -1);
+
+ copy_extent_buffer(left, right, btrfs_leaf_data(left) +
+ leaf_data_end(root, left) - push_space,
+ btrfs_leaf_data(right) +
+ btrfs_item_offset_nr(right, push_items - 1),
+ push_space);
+ old_left_nritems = btrfs_header_nritems(left);
+ BUG_ON(old_left_nritems == 0);
+
+ old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
+ for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
+ u32 ioff;
+
+ item = btrfs_item_nr(i);
+ ioff = btrfs_item_offset(left, item);
+ btrfs_set_item_offset(left, item,
+ ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
+ }
+ btrfs_set_header_nritems(left, old_left_nritems + push_items);
+
+ /* fixup right node */
+ if (push_items > right_nritems) {
+ printk("push items %d nr %u\n", push_items, right_nritems);
+ WARN_ON(1);
+ }
+
+ if (push_items < right_nritems) {
+ push_space = btrfs_item_offset_nr(right, push_items - 1) -
+ leaf_data_end(root, right);
+ memmove_extent_buffer(right, btrfs_leaf_data(right) +
+ BTRFS_LEAF_DATA_SIZE(root) - push_space,
+ btrfs_leaf_data(right) +
+ leaf_data_end(root, right), push_space);
+
+ memmove_extent_buffer(right, btrfs_item_nr_offset(0),
+ btrfs_item_nr_offset(push_items),
+ (btrfs_header_nritems(right) - push_items) *
+ sizeof(struct btrfs_item));
+ }
+ right_nritems -= push_items;
+ btrfs_set_header_nritems(right, right_nritems);
+ push_space = BTRFS_LEAF_DATA_SIZE(root);
+ for (i = 0; i < right_nritems; i++) {
+ item = btrfs_item_nr(i);
+ push_space = push_space - btrfs_item_size(right, item);
+ btrfs_set_item_offset(right, item, push_space);
+ }
+
+ btrfs_mark_buffer_dirty(left);
+ if (right_nritems)
+ btrfs_mark_buffer_dirty(right);
+
+ btrfs_item_key(right, &disk_key, 0);
+ btrfs_fixup_low_keys(root, path, &disk_key, 1);
+
+ /* then fixup the leaf pointer in the path */
+ if (path->slots[0] < push_items) {
+ path->slots[0] += old_left_nritems;
+ free_extent_buffer(path->nodes[0]);
+ path->nodes[0] = left;
+ path->slots[1] -= 1;
+ } else {
+ free_extent_buffer(left);
+ path->slots[0] -= push_items;
+ }
+ BUG_ON(path->slots[0] < 0);
+ return ret;
+}
+
+/*
+ * split the path's leaf in two, making sure there is at least data_size
+ * available for the resulting leaf level of the path.
+ *
+ * returns 0 if all went well and < 0 on failure.
+ */
+static noinline int copy_for_split(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct extent_buffer *l,
+ struct extent_buffer *right,
+ int slot, int mid, int nritems)
+{
+ int data_copy_size;
+ int rt_data_off;
+ int i;
+ int ret = 0;
+ int wret;
+ struct btrfs_disk_key disk_key;
+
+ nritems = nritems - mid;
+ btrfs_set_header_nritems(right, nritems);
+ data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
+
+ copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
+ btrfs_item_nr_offset(mid),
+ nritems * sizeof(struct btrfs_item));
+
+ copy_extent_buffer(right, l,
+ btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
+ data_copy_size, btrfs_leaf_data(l) +
+ leaf_data_end(root, l), data_copy_size);
+
+ rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
+ btrfs_item_end_nr(l, mid);
+
+ for (i = 0; i < nritems; i++) {
+ struct btrfs_item *item = btrfs_item_nr(i);
+ u32 ioff = btrfs_item_offset(right, item);
+ btrfs_set_item_offset(right, item, ioff + rt_data_off);
+ }
+
+ btrfs_set_header_nritems(l, mid);
+ ret = 0;
+ btrfs_item_key(right, &disk_key, 0);
+ wret = insert_ptr(trans, root, path, &disk_key, right->start,
+ path->slots[1] + 1, 1);
+ if (wret)
+ ret = wret;
+
+ btrfs_mark_buffer_dirty(right);
+ btrfs_mark_buffer_dirty(l);
+ BUG_ON(path->slots[0] != slot);
+
+ if (mid <= slot) {
+ free_extent_buffer(path->nodes[0]);
+ path->nodes[0] = right;
+ path->slots[0] -= mid;
+ path->slots[1] += 1;
+ } else {
+ free_extent_buffer(right);
+ }
+
+ BUG_ON(path->slots[0] < 0);
+
+ return ret;
+}
+
+/*
+ * split the path's leaf in two, making sure there is at least data_size
+ * available for the resulting leaf level of the path.
+ *
+ * returns 0 if all went well and < 0 on failure.
+ */
+static noinline int split_leaf(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_key *ins_key,
+ struct btrfs_path *path, int data_size,
+ int extend)
+{
+ struct btrfs_disk_key disk_key;
+ struct extent_buffer *l;
+ u32 nritems;
+ int mid;
+ int slot;
+ struct extent_buffer *right;
+ int ret = 0;
+ int wret;
+ int split;
+ int num_doubles = 0;
+
+ l = path->nodes[0];
+ slot = path->slots[0];
+ if (extend && data_size + btrfs_item_size_nr(l, slot) +
+ sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root))
+ return -EOVERFLOW;
+
+ /* first try to make some room by pushing left and right */
+ if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
+ wret = push_leaf_right(trans, root, path, data_size, 0);
+ if (wret < 0)
+ return wret;
+ if (wret) {
+ wret = push_leaf_left(trans, root, path, data_size, 0);
+ if (wret < 0)
+ return wret;
+ }
+ l = path->nodes[0];
+
+ /* did the pushes work? */
+ if (btrfs_leaf_free_space(root, l) >= data_size)
+ return 0;
+ }
+
+ if (!path->nodes[1]) {
+ ret = insert_new_root(trans, root, path, 1);
+ if (ret)
+ return ret;
+ }
+again:
+ split = 1;
+ l = path->nodes[0];
+ slot = path->slots[0];
+ nritems = btrfs_header_nritems(l);
+ mid = (nritems + 1) / 2;
+
+ if (mid <= slot) {
+ if (nritems == 1 ||
+ leaf_space_used(l, mid, nritems - mid) + data_size >
+ BTRFS_LEAF_DATA_SIZE(root)) {
+ if (slot >= nritems) {
+ split = 0;
+ } else {
+ mid = slot;
+ if (mid != nritems &&
+ leaf_space_used(l, mid, nritems - mid) +
+ data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+ split = 2;
+ }
+ }
+ }
+ } else {
+ if (leaf_space_used(l, 0, mid) + data_size >
+ BTRFS_LEAF_DATA_SIZE(root)) {
+ if (!extend && data_size && slot == 0) {
+ split = 0;
+ } else if ((extend || !data_size) && slot == 0) {
+ mid = 1;
+ } else {
+ mid = slot;
+ if (mid != nritems &&
+ leaf_space_used(l, mid, nritems - mid) +
+ data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+ split = 2 ;
+ }
+ }
+ }
+ }
+
+ if (split == 0)
+ btrfs_cpu_key_to_disk(&disk_key, ins_key);
+ else
+ btrfs_item_key(l, &disk_key, mid);
+
+ right = btrfs_alloc_free_block(trans, root, root->leafsize,
+ root->root_key.objectid,
+ &disk_key, 0, l->start, 0);
+ if (IS_ERR(right)) {
+ BUG_ON(1);
+ return PTR_ERR(right);
+ }
+
+ memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
+ btrfs_set_header_bytenr(right, right->start);
+ btrfs_set_header_generation(right, trans->transid);
+ btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
+ btrfs_set_header_owner(right, root->root_key.objectid);
+ btrfs_set_header_level(right, 0);
+ write_extent_buffer(right, root->fs_info->fsid,
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
+
+ write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
+ btrfs_header_chunk_tree_uuid(right),
+ BTRFS_UUID_SIZE);
+
+ if (split == 0) {
+ if (mid <= slot) {
+ btrfs_set_header_nritems(right, 0);
+ wret = insert_ptr(trans, root, path,
+ &disk_key, right->start,
+ path->slots[1] + 1, 1);
+ if (wret)
+ ret = wret;
+
+ free_extent_buffer(path->nodes[0]);
+ path->nodes[0] = right;
+ path->slots[0] = 0;
+ path->slots[1] += 1;
+ } else {
+ btrfs_set_header_nritems(right, 0);
+ wret = insert_ptr(trans, root, path,
+ &disk_key,
+ right->start,
+ path->slots[1], 1);
+ if (wret)
+ ret = wret;
+ free_extent_buffer(path->nodes[0]);
+ path->nodes[0] = right;
+ path->slots[0] = 0;
+ if (path->slots[1] == 0) {
+ btrfs_fixup_low_keys(root, path,
+ &disk_key, 1);
+ }
+ }
+ btrfs_mark_buffer_dirty(right);
+ return ret;
+ }
+
+ ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
+ BUG_ON(ret);
+
+ if (split == 2) {
+ BUG_ON(num_doubles != 0);
+ num_doubles++;
+ goto again;
+ }
+
+ return ret;
+}
+
+/*
+ * This function splits a single item into two items,
+ * giving 'new_key' to the new item and splitting the
+ * old one at split_offset (from the start of the item).
+ *
+ * The path may be released by this operation. After
+ * the split, the path is pointing to the old item. The
+ * new item is going to be in the same node as the old one.
+ *
+ * Note, the item being split must be smaller enough to live alone on
+ * a tree block with room for one extra struct btrfs_item
+ *
+ * This allows us to split the item in place, keeping a lock on the
+ * leaf the entire time.
+ */
+int btrfs_split_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *new_key,
+ unsigned long split_offset)
+{
+ u32 item_size;
+ struct extent_buffer *leaf;
+ struct btrfs_key orig_key;
+ struct btrfs_item *item;
+ struct btrfs_item *new_item;
+ int ret = 0;
+ int slot;
+ u32 nritems;
+ u32 orig_offset;
+ struct btrfs_disk_key disk_key;
+ char *buf;
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]);
+ if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item))
+ goto split;
+
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ btrfs_release_path(path);
+
+ path->search_for_split = 1;
+
+ ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1);
+ path->search_for_split = 0;
+
+ /* if our item isn't there or got smaller, return now */
+ if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0],
+ path->slots[0])) {
+ return -EAGAIN;
+ }
+
+ ret = split_leaf(trans, root, &orig_key, path, 0, 0);
+ BUG_ON(ret);
+
+ BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
+ leaf = path->nodes[0];
+
+split:
+ item = btrfs_item_nr(path->slots[0]);
+ orig_offset = btrfs_item_offset(leaf, item);
+ item_size = btrfs_item_size(leaf, item);
+
+
+ buf = kmalloc(item_size, GFP_NOFS);
+ BUG_ON(!buf);
+ read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
+ path->slots[0]), item_size);
+ slot = path->slots[0] + 1;
+ leaf = path->nodes[0];
+
+ nritems = btrfs_header_nritems(leaf);
+
+ if (slot != nritems) {
+ /* shift the items */
+ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
+ btrfs_item_nr_offset(slot),
+ (nritems - slot) * sizeof(struct btrfs_item));
+
+ }
+
+ btrfs_cpu_key_to_disk(&disk_key, new_key);
+ btrfs_set_item_key(leaf, &disk_key, slot);
+
+ new_item = btrfs_item_nr(slot);
+
+ btrfs_set_item_offset(leaf, new_item, orig_offset);
+ btrfs_set_item_size(leaf, new_item, item_size - split_offset);
+
+ btrfs_set_item_offset(leaf, item,
+ orig_offset + item_size - split_offset);
+ btrfs_set_item_size(leaf, item, split_offset);
+
+ btrfs_set_header_nritems(leaf, nritems + 1);
+
+ /* write the data for the start of the original item */
+ write_extent_buffer(leaf, buf,
+ btrfs_item_ptr_offset(leaf, path->slots[0]),
+ split_offset);
+
+ /* write the data for the new item */
+ write_extent_buffer(leaf, buf + split_offset,
+ btrfs_item_ptr_offset(leaf, slot),
+ item_size - split_offset);
+ btrfs_mark_buffer_dirty(leaf);
+
+ ret = 0;
+ if (btrfs_leaf_free_space(root, leaf) < 0) {
+ btrfs_print_leaf(root, leaf);
+ BUG();
+ }
+ kfree(buf);
+ return ret;
+}
+
+int btrfs_truncate_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u32 new_size, int from_end)
+{
+ int ret = 0;
+ int slot;
+ struct extent_buffer *leaf;
+ struct btrfs_item *item;
+ u32 nritems;
+ unsigned int data_end;
+ unsigned int old_data_start;
+ unsigned int old_size;
+ unsigned int size_diff;
+ int i;
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+
+ old_size = btrfs_item_size_nr(leaf, slot);
+ if (old_size == new_size)
+ return 0;
+
+ nritems = btrfs_header_nritems(leaf);
+ data_end = leaf_data_end(root, leaf);
+
+ old_data_start = btrfs_item_offset_nr(leaf, slot);
+
+ size_diff = old_size - new_size;
+
+ BUG_ON(slot < 0);
+ BUG_ON(slot >= nritems);
+
+ /*
+ * item0..itemN ... dataN.offset..dataN.size .. data0.size
+ */
+ /* first correct the data pointers */
+ for (i = slot; i < nritems; i++) {
+ u32 ioff;
+ item = btrfs_item_nr(i);
+ ioff = btrfs_item_offset(leaf, item);
+ btrfs_set_item_offset(leaf, item, ioff + size_diff);
+ }
+
+ /* shift the data */
+ if (from_end) {
+ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+ data_end + size_diff, btrfs_leaf_data(leaf) +
+ data_end, old_data_start + new_size - data_end);
+ } else {
+ struct btrfs_disk_key disk_key;
+ u64 offset;
+
+ btrfs_item_key(leaf, &disk_key, slot);
+
+ if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
+ unsigned long ptr;
+ struct btrfs_file_extent_item *fi;
+
+ fi = btrfs_item_ptr(leaf, slot,
+ struct btrfs_file_extent_item);
+ fi = (struct btrfs_file_extent_item *)(
+ (unsigned long)fi - size_diff);
+
+ if (btrfs_file_extent_type(leaf, fi) ==
+ BTRFS_FILE_EXTENT_INLINE) {
+ ptr = btrfs_item_ptr_offset(leaf, slot);
+ memmove_extent_buffer(leaf, ptr,
+ (unsigned long)fi,
+ offsetof(struct btrfs_file_extent_item,
+ disk_bytenr));
+ }
+ }
+
+ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+ data_end + size_diff, btrfs_leaf_data(leaf) +
+ data_end, old_data_start - data_end);
+
+ offset = btrfs_disk_key_offset(&disk_key);
+ btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
+ btrfs_set_item_key(leaf, &disk_key, slot);
+ if (slot == 0)
+ btrfs_fixup_low_keys(root, path, &disk_key, 1);
+ }
+
+ item = btrfs_item_nr(slot);
+ btrfs_set_item_size(leaf, item, new_size);
+ btrfs_mark_buffer_dirty(leaf);
+
+ ret = 0;
+ if (btrfs_leaf_free_space(root, leaf) < 0) {
+ btrfs_print_leaf(root, leaf);
+ BUG();
+ }
+ return ret;
+}
+
+int btrfs_extend_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ u32 data_size)
+{
+ int ret = 0;
+ int slot;
+ struct extent_buffer *leaf;
+ struct btrfs_item *item;
+ u32 nritems;
+ unsigned int data_end;
+ unsigned int old_data;
+ unsigned int old_size;
+ int i;
+
+ leaf = path->nodes[0];
+
+ nritems = btrfs_header_nritems(leaf);
+ data_end = leaf_data_end(root, leaf);
+
+ if (btrfs_leaf_free_space(root, leaf) < data_size) {
+ btrfs_print_leaf(root, leaf);
+ BUG();
+ }
+ slot = path->slots[0];
+ old_data = btrfs_item_end_nr(leaf, slot);
+
+ BUG_ON(slot < 0);
+ if (slot >= nritems) {
+ btrfs_print_leaf(root, leaf);
+ printk("slot %d too large, nritems %d\n", slot, nritems);
+ BUG_ON(1);
+ }
+
+ /*
+ * item0..itemN ... dataN.offset..dataN.size .. data0.size
+ */
+ /* first correct the data pointers */
+ for (i = slot; i < nritems; i++) {
+ u32 ioff;
+ item = btrfs_item_nr(i);
+ ioff = btrfs_item_offset(leaf, item);
+ btrfs_set_item_offset(leaf, item, ioff - data_size);
+ }
+
+ /* shift the data */
+ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+ data_end - data_size, btrfs_leaf_data(leaf) +
+ data_end, old_data - data_end);
+
+ data_end = old_data;
+ old_size = btrfs_item_size_nr(leaf, slot);
+ item = btrfs_item_nr(slot);
+ btrfs_set_item_size(leaf, item, old_size + data_size);
+ btrfs_mark_buffer_dirty(leaf);
+
+ ret = 0;
+ if (btrfs_leaf_free_space(root, leaf) < 0) {
+ btrfs_print_leaf(root, leaf);
+ BUG();
+ }
+ return ret;
+}
+
+/*
+ * Given a key and some data, insert an item into the tree.
+ * This does all the path init required, making room in the tree if needed.
+ */
+int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *cpu_key, u32 *data_size,
+ int nr)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_item *item;
+ int ret = 0;
+ int slot;
+ int i;
+ u32 nritems;
+ u32 total_size = 0;
+ u32 total_data = 0;
+ unsigned int data_end;
+ struct btrfs_disk_key disk_key;
+
+ for (i = 0; i < nr; i++) {
+ total_data += data_size[i];
+ }
+
+ /* create a root if there isn't one */
+ if (!root->node)
+ BUG();
+
+ total_size = total_data + nr * sizeof(struct btrfs_item);
+ ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
+ if (ret == 0) {
+ return -EEXIST;
+ }
+ if (ret < 0)
+ goto out;
+
+ leaf = path->nodes[0];
+
+ nritems = btrfs_header_nritems(leaf);
+ data_end = leaf_data_end(root, leaf);
+
+ if (btrfs_leaf_free_space(root, leaf) < total_size) {
+ btrfs_print_leaf(root, leaf);
+ printk("not enough freespace need %u have %d\n",
+ total_size, btrfs_leaf_free_space(root, leaf));
+ BUG();
+ }
+
+ slot = path->slots[0];
+ BUG_ON(slot < 0);
+
+ if (slot != nritems) {
+ unsigned int old_data = btrfs_item_end_nr(leaf, slot);
+
+ if (old_data < data_end) {
+ btrfs_print_leaf(root, leaf);
+ printk("slot %d old_data %d data_end %d\n",
+ slot, old_data, data_end);
+ BUG_ON(1);
+ }
+ /*
+ * item0..itemN ... dataN.offset..dataN.size .. data0.size
+ */
+ /* first correct the data pointers */
+ for (i = slot; i < nritems; i++) {
+ u32 ioff;
+
+ item = btrfs_item_nr(i);
+ ioff = btrfs_item_offset(leaf, item);
+ btrfs_set_item_offset(leaf, item, ioff - total_data);
+ }
+
+ /* shift the items */
+ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
+ btrfs_item_nr_offset(slot),
+ (nritems - slot) * sizeof(struct btrfs_item));
+
+ /* shift the data */
+ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+ data_end - total_data, btrfs_leaf_data(leaf) +
+ data_end, old_data - data_end);
+ data_end = old_data;
+ }
+
+ /* setup the item for the new data */
+ for (i = 0; i < nr; i++) {
+ btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
+ btrfs_set_item_key(leaf, &disk_key, slot + i);
+ item = btrfs_item_nr(slot + i);
+ btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
+ data_end -= data_size[i];
+ btrfs_set_item_size(leaf, item, data_size[i]);
+ }
+ btrfs_set_header_nritems(leaf, nritems + nr);
+ btrfs_mark_buffer_dirty(leaf);
+
+ ret = 0;
+ if (slot == 0) {
+ btrfs_cpu_key_to_disk(&disk_key, cpu_key);
+ btrfs_fixup_low_keys(root, path, &disk_key, 1);
+ }
+
+ if (btrfs_leaf_free_space(root, leaf) < 0) {
+ btrfs_print_leaf(root, leaf);
+ BUG();
+ }
+
+out:
+ return ret;
+}
+
+/*
+ * Given a key and some data, insert an item into the tree.
+ * This does all the path init required, making room in the tree if needed.
+ */
+int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *cpu_key, void *data, u32
+ data_size)
+{
+ int ret = 0;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ unsigned long ptr;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+ ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
+ if (!ret) {
+ leaf = path->nodes[0];
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ write_extent_buffer(leaf, data, ptr, data_size);
+ btrfs_mark_buffer_dirty(leaf);
+ }
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * delete the pointer from a given node.
+ *
+ * If the delete empties a node, the node is removed from the tree,
+ * continuing all the way the root if required. The root is converted into
+ * a leaf if all the nodes are emptied.
+ */
+int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_path *path, int level, int slot)
+{
+ struct extent_buffer *parent = path->nodes[level];
+ u32 nritems;
+ int ret = 0;
+
+ nritems = btrfs_header_nritems(parent);
+ if (slot != nritems -1) {
+ memmove_extent_buffer(parent,
+ btrfs_node_key_ptr_offset(slot),
+ btrfs_node_key_ptr_offset(slot + 1),
+ sizeof(struct btrfs_key_ptr) *
+ (nritems - slot - 1));
+ }
+ nritems--;
+ btrfs_set_header_nritems(parent, nritems);
+ if (nritems == 0 && parent == root->node) {
+ BUG_ON(btrfs_header_level(root->node) != 1);
+ /* just turn the root into a leaf and break */
+ btrfs_set_header_level(root->node, 0);
+ } else if (slot == 0) {
+ struct btrfs_disk_key disk_key;
+
+ btrfs_node_key(parent, &disk_key, 0);
+ btrfs_fixup_low_keys(root, path, &disk_key, level + 1);
+ }
+ btrfs_mark_buffer_dirty(parent);
+ return ret;
+}
+
+/*
+ * a helper function to delete the leaf pointed to by path->slots[1] and
+ * path->nodes[1].
+ *
+ * This deletes the pointer in path->nodes[1] and frees the leaf
+ * block extent. zero is returned if it all worked out, < 0 otherwise.
+ *
+ * The path must have already been setup for deleting the leaf, including
+ * all the proper balancing. path->nodes[1] must be locked.
+ */
+static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct extent_buffer *leaf)
+{
+ int ret;
+
+ WARN_ON(btrfs_header_generation(leaf) != trans->transid);
+ ret = btrfs_del_ptr(trans, root, path, 1, path->slots[1]);
+ if (ret)
+ return ret;
+
+ ret = btrfs_free_extent(trans, root, leaf->start, leaf->len,
+ 0, root->root_key.objectid, 0, 0);
+ return ret;
+}
+
+/*
+ * delete the item at the leaf level in path. If that empties
+ * the leaf, remove it from the tree
+ */
+int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_path *path, int slot, int nr)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_item *item;
+ int last_off;
+ int dsize = 0;
+ int ret = 0;
+ int wret;
+ int i;
+ u32 nritems;
+
+ leaf = path->nodes[0];
+ last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
+
+ for (i = 0; i < nr; i++)
+ dsize += btrfs_item_size_nr(leaf, slot + i);
+
+ nritems = btrfs_header_nritems(leaf);
+
+ if (slot + nr != nritems) {
+ int data_end = leaf_data_end(root, leaf);
+
+ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+ data_end + dsize,
+ btrfs_leaf_data(leaf) + data_end,
+ last_off - data_end);
+
+ for (i = slot + nr; i < nritems; i++) {
+ u32 ioff;
+
+ item = btrfs_item_nr(i);
+ ioff = btrfs_item_offset(leaf, item);
+ btrfs_set_item_offset(leaf, item, ioff + dsize);
+ }
+
+ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
+ btrfs_item_nr_offset(slot + nr),
+ sizeof(struct btrfs_item) *
+ (nritems - slot - nr));
+ }
+ btrfs_set_header_nritems(leaf, nritems - nr);
+ nritems -= nr;
+
+ /* delete the leaf if we've emptied it */
+ if (nritems == 0) {
+ if (leaf == root->node) {
+ btrfs_set_header_level(leaf, 0);
+ } else {
+ clean_tree_block(trans, root, leaf);
+ wait_on_tree_block_writeback(root, leaf);
+
+ wret = btrfs_del_leaf(trans, root, path, leaf);
+ BUG_ON(ret);
+ if (wret)
+ ret = wret;
+ }
+ } else {
+ int used = leaf_space_used(leaf, 0, nritems);
+ if (slot == 0) {
+ struct btrfs_disk_key disk_key;
+
+ btrfs_item_key(leaf, &disk_key, 0);
+ btrfs_fixup_low_keys(root, path, &disk_key, 1);
+ }
+
+ /* delete the leaf if it is mostly empty */
+ if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
+ /* push_leaf_left fixes the path.
+ * make sure the path still points to our leaf
+ * for possible call to del_ptr below
+ */
+ slot = path->slots[1];
+ extent_buffer_get(leaf);
+
+ wret = push_leaf_left(trans, root, path, 1, 1);
+ if (wret < 0 && wret != -ENOSPC)
+ ret = wret;
+
+ if (path->nodes[0] == leaf &&
+ btrfs_header_nritems(leaf)) {
+ wret = push_leaf_right(trans, root, path, 1, 1);
+ if (wret < 0 && wret != -ENOSPC)
+ ret = wret;
+ }
+
+ if (btrfs_header_nritems(leaf) == 0) {
+ clean_tree_block(trans, root, leaf);
+ wait_on_tree_block_writeback(root, leaf);
+
+ path->slots[1] = slot;
+ ret = btrfs_del_leaf(trans, root, path, leaf);
+ BUG_ON(ret);
+ free_extent_buffer(leaf);
+
+ } else {
+ btrfs_mark_buffer_dirty(leaf);
+ free_extent_buffer(leaf);
+ }
+ } else {
+ btrfs_mark_buffer_dirty(leaf);
+ }
+ }
+ return ret;
+}
+
+/*
+ * walk up the tree as far as required to find the previous leaf.
+ * returns 0 if it found something or 1 if there are no lesser leaves.
+ * returns < 0 on io errors.
+ */
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
+{
+ int slot;
+ int level = 1;
+ struct extent_buffer *c;
+ struct extent_buffer *next = NULL;
+
+ while(level < BTRFS_MAX_LEVEL) {
+ if (!path->nodes[level])
+ return 1;
+
+ slot = path->slots[level];
+ c = path->nodes[level];
+ if (slot == 0) {
+ level++;
+ if (level == BTRFS_MAX_LEVEL)
+ return 1;
+ continue;
+ }
+ slot--;
+
+ next = read_node_slot(root, c, slot);
+ if (!extent_buffer_uptodate(next)) {
+ if (IS_ERR(next))
+ return PTR_ERR(next);
+ return -EIO;
+ }
+ break;
+ }
+ path->slots[level] = slot;
+ while(1) {
+ level--;
+ c = path->nodes[level];
+ free_extent_buffer(c);
+ slot = btrfs_header_nritems(next);
+ if (slot != 0)
+ slot--;
+ path->nodes[level] = next;
+ path->slots[level] = slot;
+ if (!level)
+ break;
+ next = read_node_slot(root, next, slot);
+ if (!extent_buffer_uptodate(next)) {
+ if (IS_ERR(next))
+ return PTR_ERR(next);
+ return -EIO;
+ }
+ }
+ return 0;
+}
+
+/*
+ * walk up the tree as far as required to find the next leaf.
+ * returns 0 if it found something or 1 if there are no greater leaves.
+ * returns < 0 on io errors.
+ */
+int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
+{
+ int slot;
+ int level = 1;
+ struct extent_buffer *c;
+ struct extent_buffer *next = NULL;
+
+ while(level < BTRFS_MAX_LEVEL) {
+ if (!path->nodes[level])
+ return 1;
+
+ slot = path->slots[level] + 1;
+ c = path->nodes[level];
+ if (slot >= btrfs_header_nritems(c)) {
+ level++;
+ if (level == BTRFS_MAX_LEVEL)
+ return 1;
+ continue;
+ }
+
+ if (path->reada)
+ reada_for_search(root, path, level, slot, 0);
+
+ next = read_node_slot(root, c, slot);
+ if (!extent_buffer_uptodate(next))
+ return -EIO;
+ break;
+ }
+ path->slots[level] = slot;
+ while(1) {
+ level--;
+ c = path->nodes[level];
+ free_extent_buffer(c);
+ path->nodes[level] = next;
+ path->slots[level] = 0;
+ if (!level)
+ break;
+ if (path->reada)
+ reada_for_search(root, path, level, 0, 0);
+ next = read_node_slot(root, next, 0);
+ if (!extent_buffer_uptodate(next))
+ return -EIO;
+ }
+ return 0;
+}
+
+int btrfs_previous_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 min_objectid,
+ int type)
+{
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
+ int ret;
+
+ while(1) {
+ if (path->slots[0] == 0) {
+ ret = btrfs_prev_leaf(root, path);
+ if (ret != 0)
+ return ret;
+ } else {
+ path->slots[0]--;
+ }
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.type == type)
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * search in extent tree to find a previous Metadata/Data extent item with
+ * min objecitd.
+ *
+ * returns 0 if something is found, 1 if nothing was found and < 0 on error
+ */
+int btrfs_previous_extent_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 min_objectid)
+{
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
+ u32 nritems;
+ int ret;
+
+ while (1) {
+ if (path->slots[0] == 0) {
+ ret = btrfs_prev_leaf(root, path);
+ if (ret != 0)
+ return ret;
+ } else {
+ path->slots[0]--;
+ }
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+ if (nritems == 0)
+ return 1;
+ if (path->slots[0] == nritems)
+ path->slots[0]--;
+
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.objectid < min_objectid)
+ break;
+ if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
+ found_key.type == BTRFS_METADATA_ITEM_KEY)
+ return 0;
+ if (found_key.objectid == min_objectid &&
+ found_key.type < BTRFS_EXTENT_ITEM_KEY)
+ break;
+ }
+ return 1;
+}
diff --git a/ctree.h b/ctree.h
new file mode 100644
index 00000000..5ab0f4a4
--- /dev/null
+++ b/ctree.h
@@ -0,0 +1,2570 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_CTREE_H__
+#define __BTRFS_CTREE_H__
+
+#if BTRFS_FLAT_INCLUDES
+#include "list.h"
+#include "kerncompat.h"
+#include "radix-tree.h"
+#include "extent-cache.h"
+#include "extent_io.h"
+#include "ioctl.h"
+#else
+#include <btrfs/list.h>
+#include <btrfs/kerncompat.h>
+#include <btrfs/radix-tree.h>
+#include <btrfs/extent-cache.h>
+#include <btrfs/extent_io.h>
+#include <btrfs/ioctl.h>
+#endif /* BTRFS_FLAT_INCLUDES */
+
+struct btrfs_root;
+struct btrfs_trans_handle;
+struct btrfs_free_space_ctl;
+#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
+
+#define BTRFS_MAX_MIRRORS 3
+
+#define BTRFS_MAX_LEVEL 8
+
+#define BTRFS_COMPAT_EXTENT_TREE_V0
+
+/* holds pointers to all of the tree roots */
+#define BTRFS_ROOT_TREE_OBJECTID 1ULL
+
+/* stores information about which extents are in use, and reference counts */
+#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
+
+/*
+ * chunk tree stores translations from logical -> physical block numbering
+ * the super block points to the chunk tree
+ */
+#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
+
+/*
+ * stores information about which areas of a given device are in use.
+ * one per device. The tree of tree roots points to the device tree
+ */
+#define BTRFS_DEV_TREE_OBJECTID 4ULL
+
+/* one per subvolume, storing files and directories */
+#define BTRFS_FS_TREE_OBJECTID 5ULL
+
+/* directory objectid inside the root tree */
+#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
+/* holds checksums of all the data extents */
+#define BTRFS_CSUM_TREE_OBJECTID 7ULL
+#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
+
+/* for storing items that use the BTRFS_UUID_KEY* */
+#define BTRFS_UUID_TREE_OBJECTID 9ULL
+
+/* tracks free space in block groups. */
+#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
+
+/* for storing balance parameters in the root tree */
+#define BTRFS_BALANCE_OBJECTID -4ULL
+
+/* oprhan objectid for tracking unlinked/truncated files */
+#define BTRFS_ORPHAN_OBJECTID -5ULL
+
+/* does write ahead logging to speed up fsyncs */
+#define BTRFS_TREE_LOG_OBJECTID -6ULL
+#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
+
+/* space balancing */
+#define BTRFS_TREE_RELOC_OBJECTID -8ULL
+#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
+
+/*
+ * extent checksums all have this objectid
+ * this allows them to share the logging tree
+ * for fsyncs
+ */
+#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
+
+/* For storing free space cache */
+#define BTRFS_FREE_SPACE_OBJECTID -11ULL
+
+/*
+ * The inode number assigned to the special inode for sotring
+ * free ino cache
+ */
+#define BTRFS_FREE_INO_OBJECTID -12ULL
+
+/* dummy objectid represents multiple objectids */
+#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
+
+/*
+ * All files have objectids in this range.
+ */
+#define BTRFS_FIRST_FREE_OBJECTID 256ULL
+#define BTRFS_LAST_FREE_OBJECTID -256ULL
+#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
+
+
+
+/*
+ * the device items go into the chunk tree. The key is in the form
+ * [ 1 BTRFS_DEV_ITEM_KEY device_id ]
+ */
+#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
+
+/*
+ * the max metadata block size. This limit is somewhat artificial,
+ * but the memmove costs go through the roof for larger blocks.
+ */
+#define BTRFS_MAX_METADATA_BLOCKSIZE 65536
+
+/*
+ * we can actually store much bigger names, but lets not confuse the rest
+ * of linux
+ */
+#define BTRFS_NAME_LEN 255
+
+/*
+ * Theoretical limit is larger, but we keep this down to a sane
+ * value. That should limit greatly the possibility of collisions on
+ * inode ref items.
+ */
+#define BTRFS_LINK_MAX 65535U
+
+/* 32 bytes in various csum fields */
+#define BTRFS_CSUM_SIZE 32
+
+/* csum types */
+#define BTRFS_CSUM_TYPE_CRC32 0
+
+static int btrfs_csum_sizes[] = { 4 };
+
+/* four bytes for CRC32 */
+#define BTRFS_CRC32_SIZE 4
+#define BTRFS_EMPTY_DIR_SIZE 0
+
+#define BTRFS_FT_UNKNOWN 0
+#define BTRFS_FT_REG_FILE 1
+#define BTRFS_FT_DIR 2
+#define BTRFS_FT_CHRDEV 3
+#define BTRFS_FT_BLKDEV 4
+#define BTRFS_FT_FIFO 5
+#define BTRFS_FT_SOCK 6
+#define BTRFS_FT_SYMLINK 7
+#define BTRFS_FT_XATTR 8
+#define BTRFS_FT_MAX 9
+
+#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
+
+/*
+ * the key defines the order in the tree, and so it also defines (optimal)
+ * block layout. objectid corresonds to the inode number. The flags
+ * tells us things about the object, and is a kind of stream selector.
+ * so for a given inode, keys with flags of 1 might refer to the inode
+ * data, flags of 2 may point to file data in the btree and flags == 3
+ * may point to extents.
+ *
+ * offset is the starting byte offset for this key in the stream.
+ *
+ * btrfs_disk_key is in disk byte order. struct btrfs_key is always
+ * in cpu native order. Otherwise they are identical and their sizes
+ * should be the same (ie both packed)
+ */
+struct btrfs_disk_key {
+ __le64 objectid;
+ u8 type;
+ __le64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_key {
+ u64 objectid;
+ u8 type;
+ u64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_mapping_tree {
+ struct cache_tree cache_tree;
+};
+
+#define BTRFS_UUID_SIZE 16
+struct btrfs_dev_item {
+ /* the internal btrfs device id */
+ __le64 devid;
+
+ /* size of the device */
+ __le64 total_bytes;
+
+ /* bytes used */
+ __le64 bytes_used;
+
+ /* optimal io alignment for this device */
+ __le32 io_align;
+
+ /* optimal io width for this device */
+ __le32 io_width;
+
+ /* minimal io size for this device */
+ __le32 sector_size;
+
+ /* type and info about this device */
+ __le64 type;
+
+ /* expected generation for this device */
+ __le64 generation;
+
+ /*
+ * starting byte of this partition on the device,
+ * to allowr for stripe alignment in the future
+ */
+ __le64 start_offset;
+
+ /* grouping information for allocation decisions */
+ __le32 dev_group;
+
+ /* seek speed 0-100 where 100 is fastest */
+ u8 seek_speed;
+
+ /* bandwidth 0-100 where 100 is fastest */
+ u8 bandwidth;
+
+ /* btrfs generated uuid for this device */
+ u8 uuid[BTRFS_UUID_SIZE];
+
+ /* uuid of FS who owns this device */
+ u8 fsid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_stripe {
+ __le64 devid;
+ __le64 offset;
+ u8 dev_uuid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_chunk {
+ /* size of this chunk in bytes */
+ __le64 length;
+
+ /* objectid of the root referencing this chunk */
+ __le64 owner;
+
+ __le64 stripe_len;
+ __le64 type;
+
+ /* optimal io alignment for this chunk */
+ __le32 io_align;
+
+ /* optimal io width for this chunk */
+ __le32 io_width;
+
+ /* minimal io size for this chunk */
+ __le32 sector_size;
+
+ /* 2^16 stripes is quite a lot, a second limit is the size of a single
+ * item in the btree
+ */
+ __le16 num_stripes;
+
+ /* sub stripes only matter for raid10 */
+ __le16 sub_stripes;
+ struct btrfs_stripe stripe;
+ /* additional stripes go here */
+} __attribute__ ((__packed__));
+
+#define BTRFS_FREE_SPACE_EXTENT 1
+#define BTRFS_FREE_SPACE_BITMAP 2
+
+struct btrfs_free_space_entry {
+ __le64 offset;
+ __le64 bytes;
+ u8 type;
+} __attribute__ ((__packed__));
+
+struct btrfs_free_space_header {
+ struct btrfs_disk_key location;
+ __le64 generation;
+ __le64 num_entries;
+ __le64 num_bitmaps;
+} __attribute__ ((__packed__));
+
+static inline unsigned long btrfs_chunk_item_size(int num_stripes)
+{
+ BUG_ON(num_stripes == 0);
+ return sizeof(struct btrfs_chunk) +
+ sizeof(struct btrfs_stripe) * (num_stripes - 1);
+}
+
+#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
+#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
+#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
+#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
+#define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34)
+#define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35)
+
+#define BTRFS_BACKREF_REV_MAX 256
+#define BTRFS_BACKREF_REV_SHIFT 56
+#define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \
+ BTRFS_BACKREF_REV_SHIFT)
+
+#define BTRFS_OLD_BACKREF_REV 0
+#define BTRFS_MIXED_BACKREF_REV 1
+
+/*
+ * every tree block (leaf or node) starts with this header.
+ */
+struct btrfs_header {
+ /* these first four must match the super block */
+ u8 csum[BTRFS_CSUM_SIZE];
+ u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+ __le64 bytenr; /* which block this node is supposed to live in */
+ __le64 flags;
+
+ /* allowed to be different from the super from here on down */
+ u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+ __le64 generation;
+ __le64 owner;
+ __le32 nritems;
+ u8 level;
+} __attribute__ ((__packed__));
+
+#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \
+ sizeof(struct btrfs_header)) / \
+ sizeof(struct btrfs_key_ptr))
+#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header))
+#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize))
+#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
+ sizeof(struct btrfs_item) - \
+ sizeof(struct btrfs_file_extent_item))
+#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
+ sizeof(struct btrfs_item) -\
+ sizeof(struct btrfs_dir_item))
+
+
+/*
+ * this is a very generous portion of the super block, giving us
+ * room to translate 14 chunks with 3 stripes each.
+ */
+#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
+#define BTRFS_LABEL_SIZE 256
+
+/*
+ * just in case we somehow lose the roots and are not able to mount,
+ * we store an array of the roots from previous transactions
+ * in the super.
+ */
+#define BTRFS_NUM_BACKUP_ROOTS 4
+struct btrfs_root_backup {
+ __le64 tree_root;
+ __le64 tree_root_gen;
+
+ __le64 chunk_root;
+ __le64 chunk_root_gen;
+
+ __le64 extent_root;
+ __le64 extent_root_gen;
+
+ __le64 fs_root;
+ __le64 fs_root_gen;
+
+ __le64 dev_root;
+ __le64 dev_root_gen;
+
+ __le64 csum_root;
+ __le64 csum_root_gen;
+
+ __le64 total_bytes;
+ __le64 bytes_used;
+ __le64 num_devices;
+ /* future */
+ __le64 unsed_64[4];
+
+ u8 tree_root_level;
+ u8 chunk_root_level;
+ u8 extent_root_level;
+ u8 fs_root_level;
+ u8 dev_root_level;
+ u8 csum_root_level;
+ /* future and to align */
+ u8 unused_8[10];
+} __attribute__ ((__packed__));
+
+/*
+ * the super block basically lists the main trees of the FS
+ * it currently lacks any block count etc etc
+ */
+struct btrfs_super_block {
+ u8 csum[BTRFS_CSUM_SIZE];
+ /* the first 3 fields must match struct btrfs_header */
+ u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+ __le64 bytenr; /* this block number */
+ __le64 flags;
+
+ /* allowed to be different from the btrfs_header from here own down */
+ __le64 magic;
+ __le64 generation;
+ __le64 root;
+ __le64 chunk_root;
+ __le64 log_root;
+
+ /* this will help find the new super based on the log root */
+ __le64 log_root_transid;
+ __le64 total_bytes;
+ __le64 bytes_used;
+ __le64 root_dir_objectid;
+ __le64 num_devices;
+ __le32 sectorsize;
+ __le32 nodesize;
+ __le32 leafsize;
+ __le32 stripesize;
+ __le32 sys_chunk_array_size;
+ __le64 chunk_root_generation;
+ __le64 compat_flags;
+ __le64 compat_ro_flags;
+ __le64 incompat_flags;
+ __le16 csum_type;
+ u8 root_level;
+ u8 chunk_root_level;
+ u8 log_root_level;
+ struct btrfs_dev_item dev_item;
+
+ char label[BTRFS_LABEL_SIZE];
+
+ __le64 cache_generation;
+ __le64 uuid_tree_generation;
+
+ /* future expansion */
+ __le64 reserved[30];
+ u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
+ struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
+} __attribute__ ((__packed__));
+
+/*
+ * Compat flags that we support. If any incompat flags are set other than the
+ * ones specified below then we will fail to mount
+ */
+#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE (1ULL << 0)
+
+#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
+#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
+#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
+
+/*
+ * some patches floated around with a second compression method
+ * lets save that incompat here for when they do get in
+ * Note we don't actually support it, we're just reserving the
+ * number
+ */
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4)
+
+/*
+ * older kernels tried to do bigger metadata blocks, but the
+ * code was pretty buggy. Lets not let them try anymore.
+ */
+#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5)
+#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
+#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7)
+#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
+#define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9)
+
+#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
+
+#define BTRFS_FEATURE_COMPAT_RO_SUPP \
+ (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)
+
+#define BTRFS_FEATURE_INCOMPAT_SUPP \
+ (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
+ BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
+ BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
+ BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
+ BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
+ BTRFS_FEATURE_INCOMPAT_RAID56 | \
+ BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
+ BTRFS_FEATURE_INCOMPAT_NO_HOLES)
+
+/*
+ * A leaf is full of items. offset and size tell us where to find
+ * the item in the leaf (relative to the start of the data area)
+ */
+struct btrfs_item {
+ struct btrfs_disk_key key;
+ __le32 offset;
+ __le32 size;
+} __attribute__ ((__packed__));
+
+/*
+ * leaves have an item area and a data area:
+ * [item0, item1....itemN] [free space] [dataN...data1, data0]
+ *
+ * The data is separate from the items to get the keys closer together
+ * during searches.
+ */
+struct btrfs_leaf {
+ struct btrfs_header header;
+ struct btrfs_item items[];
+} __attribute__ ((__packed__));
+
+/*
+ * all non-leaf blocks are nodes, they hold only keys and pointers to
+ * other blocks
+ */
+struct btrfs_key_ptr {
+ struct btrfs_disk_key key;
+ __le64 blockptr;
+ __le64 generation;
+} __attribute__ ((__packed__));
+
+struct btrfs_node {
+ struct btrfs_header header;
+ struct btrfs_key_ptr ptrs[];
+} __attribute__ ((__packed__));
+
+/*
+ * btrfs_paths remember the path taken from the root down to the leaf.
+ * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point
+ * to any other levels that are present.
+ *
+ * The slots array records the index of the item or block pointer
+ * used while walking the tree.
+ */
+
+struct btrfs_path {
+ struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
+ int slots[BTRFS_MAX_LEVEL];
+ /* if there is real range locking, this locks field will change */
+ int locks[BTRFS_MAX_LEVEL];
+ int reada;
+ /* keep some upper locks as we walk down */
+ int lowest_level;
+
+ /*
+ * set by btrfs_split_item, tells search_slot to keep all locks
+ * and to force calls to keep space in the nodes
+ */
+ unsigned int search_for_split:1;
+ unsigned int skip_check_block:1;
+};
+
+/*
+ * items in the extent btree are used to record the objectid of the
+ * owner of the block and the number of references
+ */
+
+struct btrfs_extent_item {
+ __le64 refs;
+ __le64 generation;
+ __le64 flags;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_item_v0 {
+ __le32 refs;
+} __attribute__ ((__packed__));
+
+#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r) >> 4) - \
+ sizeof(struct btrfs_item))
+
+#define BTRFS_EXTENT_FLAG_DATA (1ULL << 0)
+#define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1)
+
+/* following flags only apply to tree blocks */
+
+/* use full backrefs for extent pointers in the block*/
+#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8)
+
+struct btrfs_tree_block_info {
+ struct btrfs_disk_key key;
+ u8 level;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_data_ref {
+ __le64 root;
+ __le64 objectid;
+ __le64 offset;
+ __le32 count;
+} __attribute__ ((__packed__));
+
+struct btrfs_shared_data_ref {
+ __le32 count;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_inline_ref {
+ u8 type;
+ __le64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_ref_v0 {
+ __le64 root;
+ __le64 generation;
+ __le64 objectid;
+ __le32 count;
+} __attribute__ ((__packed__));
+
+/* dev extents record free space on individual devices. The owner
+ * field points back to the chunk allocation mapping tree that allocated
+ * the extent. The chunk tree uuid field is a way to double check the owner
+ */
+struct btrfs_dev_extent {
+ __le64 chunk_tree;
+ __le64 chunk_objectid;
+ __le64 chunk_offset;
+ __le64 length;
+ u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_inode_ref {
+ __le64 index;
+ __le16 name_len;
+ /* name goes here */
+} __attribute__ ((__packed__));
+
+struct btrfs_inode_extref {
+ __le64 parent_objectid;
+ __le64 index;
+ __le16 name_len;
+ __u8 name[0]; /* name goes here */
+} __attribute__ ((__packed__));
+
+struct btrfs_timespec {
+ __le64 sec;
+ __le32 nsec;
+} __attribute__ ((__packed__));
+
+typedef enum {
+ BTRFS_COMPRESS_NONE = 0,
+ BTRFS_COMPRESS_ZLIB = 1,
+ BTRFS_COMPRESS_LZO = 2,
+ BTRFS_COMPRESS_TYPES = 2,
+ BTRFS_COMPRESS_LAST = 3,
+} btrfs_compression_type;
+
+/* we don't understand any encryption methods right now */
+typedef enum {
+ BTRFS_ENCRYPTION_NONE = 0,
+ BTRFS_ENCRYPTION_LAST = 1,
+} btrfs_encryption_type;
+
+enum btrfs_tree_block_status {
+ BTRFS_TREE_BLOCK_CLEAN,
+ BTRFS_TREE_BLOCK_INVALID_NRITEMS,
+ BTRFS_TREE_BLOCK_INVALID_PARENT_KEY,
+ BTRFS_TREE_BLOCK_BAD_KEY_ORDER,
+ BTRFS_TREE_BLOCK_INVALID_LEVEL,
+ BTRFS_TREE_BLOCK_INVALID_FREE_SPACE,
+ BTRFS_TREE_BLOCK_INVALID_OFFSETS,
+};
+
+struct btrfs_inode_item {
+ /* nfs style generation number */
+ __le64 generation;
+ /* transid that last touched this inode */
+ __le64 transid;
+ __le64 size;
+ __le64 nbytes;
+ __le64 block_group;
+ __le32 nlink;
+ __le32 uid;
+ __le32 gid;
+ __le32 mode;
+ __le64 rdev;
+ __le64 flags;
+
+ /* modification sequence number for NFS */
+ __le64 sequence;
+
+ /*
+ * a little future expansion, for more than this we can
+ * just grow the inode item and version it
+ */
+ __le64 reserved[4];
+ struct btrfs_timespec atime;
+ struct btrfs_timespec ctime;
+ struct btrfs_timespec mtime;
+ struct btrfs_timespec otime;
+} __attribute__ ((__packed__));
+
+struct btrfs_dir_log_item {
+ __le64 end;
+} __attribute__ ((__packed__));
+
+struct btrfs_dir_item {
+ struct btrfs_disk_key location;
+ __le64 transid;
+ __le16 data_len;
+ __le16 name_len;
+ u8 type;
+} __attribute__ ((__packed__));
+
+struct btrfs_root_item_v0 {
+ struct btrfs_inode_item inode;
+ __le64 generation;
+ __le64 root_dirid;
+ __le64 bytenr;
+ __le64 byte_limit;
+ __le64 bytes_used;
+ __le64 last_snapshot;
+ __le64 flags;
+ __le32 refs;
+ struct btrfs_disk_key drop_progress;
+ u8 drop_level;
+ u8 level;
+} __attribute__ ((__packed__));
+
+struct btrfs_root_item {
+ struct btrfs_inode_item inode;
+ __le64 generation;
+ __le64 root_dirid;
+ __le64 bytenr;
+ __le64 byte_limit;
+ __le64 bytes_used;
+ __le64 last_snapshot;
+ __le64 flags;
+ __le32 refs;
+ struct btrfs_disk_key drop_progress;
+ u8 drop_level;
+ u8 level;
+
+ /*
+ * The following fields appear after subvol_uuids+subvol_times
+ * were introduced.
+ */
+
+ /*
+ * This generation number is used to test if the new fields are valid
+ * and up to date while reading the root item. Everytime the root item
+ * is written out, the "generation" field is copied into this field. If
+ * anyone ever mounted the fs with an older kernel, we will have
+ * mismatching generation values here and thus must invalidate the
+ * new fields. See btrfs_update_root and btrfs_find_last_root for
+ * details.
+ * the offset of generation_v2 is also used as the start for the memset
+ * when invalidating the fields.
+ */
+ __le64 generation_v2;
+ u8 uuid[BTRFS_UUID_SIZE];
+ u8 parent_uuid[BTRFS_UUID_SIZE];
+ u8 received_uuid[BTRFS_UUID_SIZE];
+ __le64 ctransid; /* updated when an inode changes */
+ __le64 otransid; /* trans when created */
+ __le64 stransid; /* trans when sent. non-zero for received subvol */
+ __le64 rtransid; /* trans when received. non-zero for received subvol */
+ struct btrfs_timespec ctime;
+ struct btrfs_timespec otime;
+ struct btrfs_timespec stime;
+ struct btrfs_timespec rtime;
+ __le64 reserved[8]; /* for future */
+} __attribute__ ((__packed__));
+
+/*
+ * this is used for both forward and backward root refs
+ */
+struct btrfs_root_ref {
+ __le64 dirid;
+ __le64 sequence;
+ __le16 name_len;
+} __attribute__ ((__packed__));
+
+#define BTRFS_FILE_EXTENT_INLINE 0
+#define BTRFS_FILE_EXTENT_REG 1
+#define BTRFS_FILE_EXTENT_PREALLOC 2
+
+struct btrfs_file_extent_item {
+ /*
+ * transaction id that created this extent
+ */
+ __le64 generation;
+ /*
+ * max number of bytes to hold this extent in ram
+ * when we split a compressed extent we can't know how big
+ * each of the resulting pieces will be. So, this is
+ * an upper limit on the size of the extent in ram instead of
+ * an exact limit.
+ */
+ __le64 ram_bytes;
+
+ /*
+ * 32 bits for the various ways we might encode the data,
+ * including compression and encryption. If any of these
+ * are set to something a given disk format doesn't understand
+ * it is treated like an incompat flag for reading and writing,
+ * but not for stat.
+ */
+ u8 compression;
+ u8 encryption;
+ __le16 other_encoding; /* spare for later use */
+
+ /* are we inline data or a real extent? */
+ u8 type;
+
+ /*
+ * disk space consumed by the extent, checksum blocks are included
+ * in these numbers
+ */
+ __le64 disk_bytenr;
+ __le64 disk_num_bytes;
+ /*
+ * the logical offset in file blocks (no csums)
+ * this extent record is for. This allows a file extent to point
+ * into the middle of an existing extent on disk, sharing it
+ * between two snapshots (useful if some bytes in the middle of the
+ * extent have changed
+ */
+ __le64 offset;
+ /*
+ * the logical number of file blocks (no csums included)
+ */
+ __le64 num_bytes;
+
+} __attribute__ ((__packed__));
+
+struct btrfs_csum_item {
+ u8 csum;
+} __attribute__ ((__packed__));
+
+/*
+ * We don't want to overwrite 1M at the beginning of device, even though
+ * there is our 1st superblock at 64k. Some possible reasons:
+ * - the first 64k blank is useful for some boot loader/manager
+ * - the first 1M could be scratched by buggy partitioner or somesuch
+ */
+#define BTRFS_BLOCK_RESERVED_1M_FOR_SUPER ((u64)1024 * 1024)
+
+/* tag for the radix tree of block groups in ram */
+#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
+#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
+#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2)
+#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3)
+#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
+#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
+#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
+#define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7)
+#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8)
+#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE
+#define BTRFS_NR_RAID_TYPES 7
+
+#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
+ BTRFS_BLOCK_GROUP_SYSTEM | \
+ BTRFS_BLOCK_GROUP_METADATA)
+
+#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
+ BTRFS_BLOCK_GROUP_RAID1 | \
+ BTRFS_BLOCK_GROUP_RAID5 | \
+ BTRFS_BLOCK_GROUP_RAID6 | \
+ BTRFS_BLOCK_GROUP_DUP | \
+ BTRFS_BLOCK_GROUP_RAID10)
+
+/* used in struct btrfs_balance_args fields */
+#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
+
+/*
+ * GLOBAL_RSV does not exist as a on-disk block group type and is used
+ * internally for exporting info about global block reserve from space infos
+ */
+#define BTRFS_SPACE_INFO_GLOBAL_RSV (1ULL << 49)
+
+#define BTRFS_QGROUP_LEVEL_SHIFT 48
+
+static inline u64 btrfs_qgroup_level(u64 qgroupid)
+{
+ return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
+}
+
+static inline u64 btrfs_qgroup_subvid(u64 qgroupid)
+{
+ return qgroupid & ((1ULL << BTRFS_QGROUP_LEVEL_SHIFT) - 1);
+}
+
+#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0)
+#define BTRFS_QGROUP_STATUS_FLAG_RESCAN (1ULL << 1)
+#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2)
+
+struct btrfs_qgroup_status_item {
+ __le64 version;
+ __le64 generation;
+ __le64 flags;
+ __le64 scan; /* progress during scanning */
+} __attribute__ ((__packed__));
+
+struct btrfs_block_group_item {
+ __le64 used;
+ __le64 chunk_objectid;
+ __le64 flags;
+} __attribute__ ((__packed__));
+
+struct btrfs_free_space_info {
+ __le32 extent_count;
+ __le32 flags;
+} __attribute__ ((__packed__));
+
+#define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0)
+
+struct btrfs_qgroup_info_item {
+ __le64 generation;
+ __le64 referenced;
+ __le64 referenced_compressed;
+ __le64 exclusive;
+ __le64 exclusive_compressed;
+} __attribute__ ((__packed__));
+
+/* flags definition for qgroup limits */
+#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0)
+#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1)
+#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2)
+#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3)
+#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4)
+#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5)
+
+struct btrfs_qgroup_limit_item {
+ __le64 flags;
+ __le64 max_referenced;
+ __le64 max_exclusive;
+ __le64 rsv_referenced;
+ __le64 rsv_exclusive;
+} __attribute__ ((__packed__));
+
+struct btrfs_space_info {
+ u64 flags;
+ u64 total_bytes;
+ u64 bytes_used;
+ u64 bytes_pinned;
+ int full;
+ struct list_head list;
+};
+
+struct btrfs_block_group_cache {
+ struct cache_extent cache;
+ struct btrfs_key key;
+ struct btrfs_block_group_item item;
+ struct btrfs_space_info *space_info;
+ struct btrfs_free_space_ctl *free_space_ctl;
+ u64 bytes_super;
+ u64 pinned;
+ u64 flags;
+ int cached;
+ int ro;
+};
+
+struct btrfs_extent_ops {
+ int (*alloc_extent)(struct btrfs_root *root, u64 num_bytes,
+ u64 hint_byte, struct btrfs_key *ins, int metadata);
+ int (*free_extent)(struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes);
+};
+
+struct btrfs_device;
+struct btrfs_fs_devices;
+struct btrfs_fs_info {
+ u8 fsid[BTRFS_FSID_SIZE];
+ u8 *new_fsid;
+ u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+ u8 *new_chunk_tree_uuid;
+ struct btrfs_root *fs_root;
+ struct btrfs_root *extent_root;
+ struct btrfs_root *tree_root;
+ struct btrfs_root *chunk_root;
+ struct btrfs_root *dev_root;
+ struct btrfs_root *csum_root;
+ struct btrfs_root *quota_root;
+ struct btrfs_root *free_space_root;
+
+ struct rb_root fs_root_tree;
+
+ /* the log root tree is a directory of all the other log roots */
+ struct btrfs_root *log_root_tree;
+
+ struct extent_io_tree extent_cache;
+ struct extent_io_tree free_space_cache;
+ struct extent_io_tree block_group_cache;
+ struct extent_io_tree pinned_extents;
+ struct extent_io_tree pending_del;
+ struct extent_io_tree extent_ins;
+ struct extent_io_tree *excluded_extents;
+
+ /* logical->physical extent mapping */
+ struct btrfs_mapping_tree mapping_tree;
+
+ u64 generation;
+ u64 last_trans_committed;
+
+ u64 avail_data_alloc_bits;
+ u64 avail_metadata_alloc_bits;
+ u64 avail_system_alloc_bits;
+ u64 data_alloc_profile;
+ u64 metadata_alloc_profile;
+ u64 system_alloc_profile;
+ u64 alloc_start;
+
+ struct btrfs_trans_handle *running_transaction;
+ struct btrfs_super_block *super_copy;
+ struct mutex fs_mutex;
+
+ u64 super_bytenr;
+ u64 total_pinned;
+
+ struct btrfs_extent_ops *extent_ops;
+ struct list_head dirty_cowonly_roots;
+ struct list_head recow_ebs;
+
+ struct btrfs_fs_devices *fs_devices;
+ struct list_head space_info;
+ int system_allocs;
+
+ unsigned int readonly:1;
+ unsigned int on_restoring:1;
+ unsigned int is_chunk_recover:1;
+ unsigned int quota_enabled:1;
+ unsigned int suppress_check_block_errors:1;
+ unsigned int ignore_fsid_mismatch:1;
+ unsigned int ignore_chunk_tree_error:1;
+
+ int (*free_extent_hook)(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 owner, u64 offset,
+ int refs_to_drop);
+ struct cache_tree *fsck_extent_cache;
+ struct cache_tree *corrupt_blocks;
+
+};
+
+/*
+ * in ram representation of the tree. extent_root is used for all allocations
+ * and for the extent tree extent_root root.
+ */
+struct btrfs_root {
+ struct extent_buffer *node;
+ struct extent_buffer *commit_root;
+ struct btrfs_root_item root_item;
+ struct btrfs_key root_key;
+ struct btrfs_fs_info *fs_info;
+ u64 objectid;
+ u64 last_trans;
+
+ /* data allocations are done in sectorsize units */
+ u32 sectorsize;
+
+ /* node allocations are done in nodesize units */
+ u32 nodesize;
+
+ /* leaf allocations are done in leafsize units */
+ u32 leafsize;
+
+ /* leaf allocations are done in leafsize units */
+ u32 stripesize;
+
+ int ref_cows;
+ int track_dirty;
+
+
+ u32 type;
+ u64 highest_inode;
+ u64 last_inode_alloc;
+
+ /*
+ * Record orphan data extent ref
+ *
+ * TODO: Don't restore things in btrfs_root.
+ * Directly record it into inode_record, which needs a lot of
+ * infrastructure change to allow cooperation between extent
+ * and fs tree scan.
+ */
+ struct list_head orphan_data_extents;
+
+ /* the dirty list is only used by non-reference counted roots */
+ struct list_head dirty_list;
+ struct rb_node rb_node;
+};
+
+/*
+ * inode items have the data typically returned from stat and store other
+ * info about object characteristics. There is one for every file and dir in
+ * the FS
+ */
+#define BTRFS_INODE_ITEM_KEY 1
+#define BTRFS_INODE_REF_KEY 12
+#define BTRFS_INODE_EXTREF_KEY 13
+#define BTRFS_XATTR_ITEM_KEY 24
+#define BTRFS_ORPHAN_ITEM_KEY 48
+
+#define BTRFS_DIR_LOG_ITEM_KEY 60
+#define BTRFS_DIR_LOG_INDEX_KEY 72
+/*
+ * dir items are the name -> inode pointers in a directory. There is one
+ * for every name in a directory.
+ */
+#define BTRFS_DIR_ITEM_KEY 84
+#define BTRFS_DIR_INDEX_KEY 96
+
+/*
+ * extent data is for file data
+ */
+#define BTRFS_EXTENT_DATA_KEY 108
+
+/*
+ * csum items have the checksums for data in the extents
+ */
+#define BTRFS_CSUM_ITEM_KEY 120
+/*
+ * extent csums are stored in a separate tree and hold csums for
+ * an entire extent on disk.
+ */
+#define BTRFS_EXTENT_CSUM_KEY 128
+
+/*
+ * root items point to tree roots. There are typically in the root
+ * tree used by the super block to find all the other trees
+ */
+#define BTRFS_ROOT_ITEM_KEY 132
+
+/*
+ * root backrefs tie subvols and snapshots to the directory entries that
+ * reference them
+ */
+#define BTRFS_ROOT_BACKREF_KEY 144
+
+/*
+ * root refs make a fast index for listing all of the snapshots and
+ * subvolumes referenced by a given root. They point directly to the
+ * directory item in the root that references the subvol
+ */
+#define BTRFS_ROOT_REF_KEY 156
+
+/*
+ * extent items are in the extent map tree. These record which blocks
+ * are used, and how many references there are to each block
+ */
+#define BTRFS_EXTENT_ITEM_KEY 168
+
+/*
+ * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
+ * the length, so we save the level in key->offset instead of the length.
+ */
+#define BTRFS_METADATA_ITEM_KEY 169
+
+#define BTRFS_TREE_BLOCK_REF_KEY 176
+
+#define BTRFS_EXTENT_DATA_REF_KEY 178
+
+/* old style extent backrefs */
+#define BTRFS_EXTENT_REF_V0_KEY 180
+
+#define BTRFS_SHARED_BLOCK_REF_KEY 182
+
+#define BTRFS_SHARED_DATA_REF_KEY 184
+
+
+/*
+ * block groups give us hints into the extent allocation trees. Which
+ * blocks are free etc etc
+ */
+#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
+
+/*
+ * Every block group is represented in the free space tree by a free space info
+ * item, which stores some accounting information. It is keyed on
+ * (block_group_start, FREE_SPACE_INFO, block_group_length).
+ */
+#define BTRFS_FREE_SPACE_INFO_KEY 198
+
+/*
+ * A free space extent tracks an extent of space that is free in a block group.
+ * It is keyed on (start, FREE_SPACE_EXTENT, length).
+ */
+#define BTRFS_FREE_SPACE_EXTENT_KEY 199
+
+/*
+ * When a block group becomes very fragmented, we convert it to use bitmaps
+ * instead of extents. A free space bitmap is keyed on
+ * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with
+ * (length / sectorsize) bits.
+ */
+#define BTRFS_FREE_SPACE_BITMAP_KEY 200
+
+#define BTRFS_DEV_EXTENT_KEY 204
+#define BTRFS_DEV_ITEM_KEY 216
+#define BTRFS_CHUNK_ITEM_KEY 228
+
+#define BTRFS_BALANCE_ITEM_KEY 248
+
+/*
+ * quota groups
+ */
+#define BTRFS_QGROUP_STATUS_KEY 240
+#define BTRFS_QGROUP_INFO_KEY 242
+#define BTRFS_QGROUP_LIMIT_KEY 244
+#define BTRFS_QGROUP_RELATION_KEY 246
+
+/*
+ * Persistently stores the io stats in the device tree.
+ * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid).
+ */
+#define BTRFS_DEV_STATS_KEY 249
+
+/*
+ * Persistently stores the device replace state in the device tree.
+ * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0).
+ */
+#define BTRFS_DEV_REPLACE_KEY 250
+
+/*
+ * Stores items that allow to quickly map UUIDs to something else.
+ * These items are part of the filesystem UUID tree.
+ * The key is built like this:
+ * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits).
+ */
+#if BTRFS_UUID_SIZE != 16
+#error "UUID items require BTRFS_UUID_SIZE == 16!"
+#endif
+#define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */
+#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to
+ * received subvols */
+
+/*
+ * string items are for debugging. They just store a short string of
+ * data in the FS
+ */
+#define BTRFS_STRING_ITEM_KEY 253
+/*
+ * Inode flags
+ */
+#define BTRFS_INODE_NODATASUM (1 << 0)
+#define BTRFS_INODE_NODATACOW (1 << 1)
+#define BTRFS_INODE_READONLY (1 << 2)
+
+#define read_eb_member(eb, ptr, type, member, result) ( \
+ read_extent_buffer(eb, (char *)(result), \
+ ((unsigned long)(ptr)) + \
+ offsetof(type, member), \
+ sizeof(((type *)0)->member)))
+
+#define write_eb_member(eb, ptr, type, member, result) ( \
+ write_extent_buffer(eb, (char *)(result), \
+ ((unsigned long)(ptr)) + \
+ offsetof(type, member), \
+ sizeof(((type *)0)->member)))
+
+#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
+static inline u##bits btrfs_##name(const struct extent_buffer *eb) \
+{ \
+ const struct btrfs_header *h = (struct btrfs_header *)eb->data; \
+ return le##bits##_to_cpu(h->member); \
+} \
+static inline void btrfs_set_##name(struct extent_buffer *eb, \
+ u##bits val) \
+{ \
+ struct btrfs_header *h = (struct btrfs_header *)eb->data; \
+ h->member = cpu_to_le##bits(val); \
+}
+
+#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
+static inline u##bits btrfs_##name(const struct extent_buffer *eb, \
+ const type *s) \
+{ \
+ unsigned long offset = (unsigned long)s; \
+ const type *p = (type *) (eb->data + offset); \
+ return get_unaligned_le##bits(&p->member); \
+} \
+static inline void btrfs_set_##name(struct extent_buffer *eb, \
+ type *s, u##bits val) \
+{ \
+ unsigned long offset = (unsigned long)s; \
+ type *p = (type *) (eb->data + offset); \
+ put_unaligned_le##bits(val, &p->member); \
+}
+
+#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \
+static inline u##bits btrfs_##name(const type *s) \
+{ \
+ return le##bits##_to_cpu(s->member); \
+} \
+static inline void btrfs_set_##name(type *s, u##bits val) \
+{ \
+ s->member = cpu_to_le##bits(val); \
+}
+
+BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64);
+BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64);
+BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64);
+BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32);
+BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32);
+BTRFS_SETGET_FUNCS(device_start_offset, struct btrfs_dev_item,
+ start_offset, 64);
+BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32);
+BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64);
+BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32);
+BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8);
+BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8);
+BTRFS_SETGET_FUNCS(device_generation, struct btrfs_dev_item, generation, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item,
+ total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item,
+ bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item,
+ io_align, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item,
+ io_width, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item,
+ sector_size, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item,
+ dev_group, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item,
+ seek_speed, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item,
+ bandwidth, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item,
+ generation, 64);
+
+static inline char *btrfs_device_uuid(struct btrfs_dev_item *d)
+{
+ return (char *)d + offsetof(struct btrfs_dev_item, uuid);
+}
+
+static inline char *btrfs_device_fsid(struct btrfs_dev_item *d)
+{
+ return (char *)d + offsetof(struct btrfs_dev_item, fsid);
+}
+
+BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64);
+BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64);
+BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64);
+BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32);
+BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32);
+BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32);
+BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64);
+BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16);
+BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16);
+BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64);
+BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64);
+
+static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s)
+{
+ return (char *)s + offsetof(struct btrfs_stripe, dev_uuid);
+}
+
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk,
+ stripe_len, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk,
+ io_align, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk,
+ io_width, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk,
+ sector_size, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk,
+ num_stripes, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk,
+ sub_stripes, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64);
+
+static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c,
+ int nr)
+{
+ unsigned long offset = (unsigned long)c;
+ offset += offsetof(struct btrfs_chunk, stripe);
+ offset += nr * sizeof(struct btrfs_stripe);
+ return (struct btrfs_stripe *)offset;
+}
+
+static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr)
+{
+ return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr));
+}
+
+static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
+ struct btrfs_chunk *c, int nr)
+{
+ return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
+}
+
+static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb,
+ struct btrfs_chunk *c, int nr,
+ u64 val)
+{
+ btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val);
+}
+
+static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
+ struct btrfs_chunk *c, int nr)
+{
+ return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
+}
+
+static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb,
+ struct btrfs_chunk *c, int nr,
+ u64 val)
+{
+ btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val);
+}
+
+/* struct btrfs_block_group_item */
+BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
+ used, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item,
+ used, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid,
+ struct btrfs_block_group_item, chunk_objectid, 64);
+
+BTRFS_SETGET_FUNCS(disk_block_group_chunk_objectid,
+ struct btrfs_block_group_item, chunk_objectid, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_flags,
+ struct btrfs_block_group_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_flags,
+ struct btrfs_block_group_item, flags, 64);
+
+/* struct btrfs_free_space_info */
+BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info,
+ extent_count, 32);
+BTRFS_SETGET_FUNCS(free_space_flags, struct btrfs_free_space_info, flags, 32);
+
+/* struct btrfs_inode_ref */
+BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
+BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64);
+
+/* struct btrfs_inode_extref */
+BTRFS_SETGET_FUNCS(inode_extref_parent, struct btrfs_inode_extref,
+ parent_objectid, 64);
+BTRFS_SETGET_FUNCS(inode_extref_name_len, struct btrfs_inode_extref,
+ name_len, 16);
+BTRFS_SETGET_FUNCS(inode_extref_index, struct btrfs_inode_extref, index, 64);
+
+/* struct btrfs_inode_item */
+BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64);
+BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64);
+BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64);
+BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64);
+BTRFS_SETGET_FUNCS(inode_nbytes, struct btrfs_inode_item, nbytes, 64);
+BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64);
+BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
+BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
+BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
+BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
+BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
+BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_inode_generation,
+ struct btrfs_inode_item, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence,
+ struct btrfs_inode_item, sequence, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_transid,
+ struct btrfs_inode_item, transid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_size,
+ struct btrfs_inode_item, size, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes,
+ struct btrfs_inode_item, nbytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group,
+ struct btrfs_inode_item, block_group, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink,
+ struct btrfs_inode_item, nlink, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_uid,
+ struct btrfs_inode_item, uid, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_gid,
+ struct btrfs_inode_item, gid, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_mode,
+ struct btrfs_inode_item, mode, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev,
+ struct btrfs_inode_item, rdev, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_flags,
+ struct btrfs_inode_item, flags, 64);
+
+static inline struct btrfs_timespec *
+btrfs_inode_atime(struct btrfs_inode_item *inode_item)
+{
+ unsigned long ptr = (unsigned long)inode_item;
+ ptr += offsetof(struct btrfs_inode_item, atime);
+ return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
+{
+ unsigned long ptr = (unsigned long)inode_item;
+ ptr += offsetof(struct btrfs_inode_item, mtime);
+ return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
+{
+ unsigned long ptr = (unsigned long)inode_item;
+ ptr += offsetof(struct btrfs_inode_item, ctime);
+ return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_otime(struct btrfs_inode_item *inode_item)
+{
+ unsigned long ptr = (unsigned long)inode_item;
+ ptr += offsetof(struct btrfs_inode_item, otime);
+ return (struct btrfs_timespec *)ptr;
+}
+
+BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
+BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec,
+ sec, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec,
+ nsec, 32);
+
+/* struct btrfs_dev_extent */
+BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent,
+ chunk_tree, 64);
+BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent,
+ chunk_objectid, 64);
+BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent,
+ chunk_offset, 64);
+BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_dev_extent_length, struct btrfs_dev_extent,
+ length, 64);
+
+static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
+{
+ unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid);
+ return (u8 *)((unsigned long)dev + ptr);
+}
+
+
+/* struct btrfs_extent_item */
+BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, refs, 64);
+BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item,
+ generation, 64);
+BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_flags, struct btrfs_extent_item, flags, 64);
+
+BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
+
+BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
+
+static inline void btrfs_tree_block_key(struct extent_buffer *eb,
+ struct btrfs_tree_block_info *item,
+ struct btrfs_disk_key *key)
+{
+ read_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
+}
+
+static inline void btrfs_set_tree_block_key(struct extent_buffer *eb,
+ struct btrfs_tree_block_info *item,
+ struct btrfs_disk_key *key)
+{
+ write_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
+}
+
+BTRFS_SETGET_FUNCS(extent_data_ref_root, struct btrfs_extent_data_ref,
+ root, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_objectid, struct btrfs_extent_data_ref,
+ objectid, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_offset, struct btrfs_extent_data_ref,
+ offset, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref,
+ count, 32);
+
+BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref,
+ count, 32);
+
+BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref,
+ type, 8);
+BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref,
+ offset, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_inline_ref_type,
+ struct btrfs_extent_inline_ref, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_inline_ref_offset,
+ struct btrfs_extent_inline_ref, offset, 64);
+
+static inline u32 btrfs_extent_inline_ref_size(int type)
+{
+ if (type == BTRFS_TREE_BLOCK_REF_KEY ||
+ type == BTRFS_SHARED_BLOCK_REF_KEY)
+ return sizeof(struct btrfs_extent_inline_ref);
+ if (type == BTRFS_SHARED_DATA_REF_KEY)
+ return sizeof(struct btrfs_shared_data_ref) +
+ sizeof(struct btrfs_extent_inline_ref);
+ if (type == BTRFS_EXTENT_DATA_REF_KEY)
+ return sizeof(struct btrfs_extent_data_ref) +
+ offsetof(struct btrfs_extent_inline_ref, offset);
+ BUG();
+ return 0;
+}
+
+BTRFS_SETGET_FUNCS(ref_root_v0, struct btrfs_extent_ref_v0, root, 64);
+BTRFS_SETGET_FUNCS(ref_generation_v0, struct btrfs_extent_ref_v0,
+ generation, 64);
+BTRFS_SETGET_FUNCS(ref_objectid_v0, struct btrfs_extent_ref_v0, objectid, 64);
+BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32);
+
+/* struct btrfs_node */
+BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
+BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64);
+
+static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
+{
+ unsigned long ptr;
+ ptr = offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nr;
+ return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr);
+}
+
+static inline void btrfs_set_node_blockptr(struct extent_buffer *eb,
+ int nr, u64 val)
+{
+ unsigned long ptr;
+ ptr = offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nr;
+ btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val);
+}
+
+static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr)
+{
+ unsigned long ptr;
+ ptr = offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nr;
+ return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr);
+}
+
+static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb,
+ int nr, u64 val)
+{
+ unsigned long ptr;
+ ptr = offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nr;
+ btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val);
+}
+
+static inline unsigned long btrfs_node_key_ptr_offset(int nr)
+{
+ return offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nr;
+}
+
+static inline void btrfs_node_key(struct extent_buffer *eb,
+ struct btrfs_disk_key *disk_key, int nr)
+{
+ unsigned long ptr;
+ ptr = btrfs_node_key_ptr_offset(nr);
+ read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
+ struct btrfs_key_ptr, key, disk_key);
+}
+
+static inline void btrfs_set_node_key(struct extent_buffer *eb,
+ struct btrfs_disk_key *disk_key, int nr)
+{
+ unsigned long ptr;
+ ptr = btrfs_node_key_ptr_offset(nr);
+ write_eb_member(eb, (struct btrfs_key_ptr *)ptr,
+ struct btrfs_key_ptr, key, disk_key);
+}
+
+/* struct btrfs_item */
+BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32);
+BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32);
+
+static inline unsigned long btrfs_item_nr_offset(int nr)
+{
+ return offsetof(struct btrfs_leaf, items) +
+ sizeof(struct btrfs_item) * nr;
+}
+
+static inline struct btrfs_item *btrfs_item_nr(int nr)
+{
+ return (struct btrfs_item *)btrfs_item_nr_offset(nr);
+}
+
+static inline u32 btrfs_item_end(struct extent_buffer *eb,
+ struct btrfs_item *item)
+{
+ return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item);
+}
+
+static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
+{
+ return btrfs_item_end(eb, btrfs_item_nr(nr));
+}
+
+static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr)
+{
+ return btrfs_item_offset(eb, btrfs_item_nr(nr));
+}
+
+static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
+{
+ return btrfs_item_size(eb, btrfs_item_nr(nr));
+}
+
+static inline void btrfs_item_key(struct extent_buffer *eb,
+ struct btrfs_disk_key *disk_key, int nr)
+{
+ struct btrfs_item *item = btrfs_item_nr(nr);
+ read_eb_member(eb, item, struct btrfs_item, key, disk_key);
+}
+
+static inline void btrfs_set_item_key(struct extent_buffer *eb,
+ struct btrfs_disk_key *disk_key, int nr)
+{
+ struct btrfs_item *item = btrfs_item_nr(nr);
+ write_eb_member(eb, item, struct btrfs_item, key, disk_key);
+}
+
+BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64);
+
+/*
+ * struct btrfs_root_ref
+ */
+BTRFS_SETGET_FUNCS(root_ref_dirid, struct btrfs_root_ref, dirid, 64);
+BTRFS_SETGET_FUNCS(root_ref_sequence, struct btrfs_root_ref, sequence, 64);
+BTRFS_SETGET_FUNCS(root_ref_name_len, struct btrfs_root_ref, name_len, 16);
+
+BTRFS_SETGET_STACK_FUNCS(stack_root_ref_dirid, struct btrfs_root_ref, dirid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_root_ref_sequence, struct btrfs_root_ref, sequence, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_root_ref_name_len, struct btrfs_root_ref, name_len, 16);
+
+/* struct btrfs_dir_item */
+BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16);
+BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8);
+BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16);
+BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item, name_len, 16);
+
+static inline void btrfs_dir_item_key(struct extent_buffer *eb,
+ struct btrfs_dir_item *item,
+ struct btrfs_disk_key *key)
+{
+ read_eb_member(eb, item, struct btrfs_dir_item, location, key);
+}
+
+static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
+ struct btrfs_dir_item *item,
+ struct btrfs_disk_key *key)
+{
+ write_eb_member(eb, item, struct btrfs_dir_item, location, key);
+}
+
+/* struct btrfs_free_space_header */
+BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
+ num_entries, 64);
+BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
+ num_bitmaps, 64);
+BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
+ generation, 64);
+
+static inline void btrfs_free_space_key(struct extent_buffer *eb,
+ struct btrfs_free_space_header *h,
+ struct btrfs_disk_key *key)
+{
+ read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
+static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
+ struct btrfs_free_space_header *h,
+ struct btrfs_disk_key *key)
+{
+ write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
+/* struct btrfs_disk_key */
+BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
+ objectid, 64);
+BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64);
+BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8);
+
+static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
+ struct btrfs_disk_key *disk)
+{
+ cpu->offset = le64_to_cpu(disk->offset);
+ cpu->type = disk->type;
+ cpu->objectid = le64_to_cpu(disk->objectid);
+}
+
+static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk,
+ struct btrfs_key *cpu)
+{
+ disk->offset = cpu_to_le64(cpu->offset);
+ disk->type = cpu->type;
+ disk->objectid = cpu_to_le64(cpu->objectid);
+}
+
+static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb,
+ struct btrfs_key *key, int nr)
+{
+ struct btrfs_disk_key disk_key;
+ btrfs_node_key(eb, &disk_key, nr);
+ btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb,
+ struct btrfs_key *key, int nr)
+{
+ struct btrfs_disk_key disk_key;
+ btrfs_item_key(eb, &disk_key, nr);
+ btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb,
+ struct btrfs_dir_item *item,
+ struct btrfs_key *key)
+{
+ struct btrfs_disk_key disk_key;
+ btrfs_dir_item_key(eb, item, &disk_key);
+ btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+
+static inline u8 btrfs_key_type(struct btrfs_key *key)
+{
+ return key->type;
+}
+
+static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val)
+{
+ key->type = val;
+}
+
+/* struct btrfs_header */
+BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header,
+ generation, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32);
+BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header, nritems,
+ 32);
+BTRFS_SETGET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_header_generation, struct btrfs_header,
+ generation, 64);
+
+static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
+{
+ return (btrfs_header_flags(eb) & flag) == flag;
+}
+
+static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
+{
+ u64 flags = btrfs_header_flags(eb);
+ btrfs_set_header_flags(eb, flags | flag);
+ return (flags & flag) == flag;
+}
+
+static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
+{
+ u64 flags = btrfs_header_flags(eb);
+ btrfs_set_header_flags(eb, flags & ~flag);
+ return (flags & flag) == flag;
+}
+
+static inline int btrfs_header_backref_rev(struct extent_buffer *eb)
+{
+ u64 flags = btrfs_header_flags(eb);
+ return flags >> BTRFS_BACKREF_REV_SHIFT;
+}
+
+static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
+ int rev)
+{
+ u64 flags = btrfs_header_flags(eb);
+ flags &= ~BTRFS_BACKREF_REV_MASK;
+ flags |= (u64)rev << BTRFS_BACKREF_REV_SHIFT;
+ btrfs_set_header_flags(eb, flags);
+}
+
+static inline unsigned long btrfs_header_fsid(void)
+{
+ return offsetof(struct btrfs_header, fsid);
+}
+
+static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
+{
+ return offsetof(struct btrfs_header, chunk_tree_uuid);
+}
+
+static inline u8 *btrfs_super_fsid(struct extent_buffer *eb)
+{
+ unsigned long ptr = offsetof(struct btrfs_super_block, fsid);
+ return (u8 *)ptr;
+}
+
+static inline u8 *btrfs_header_csum(struct extent_buffer *eb)
+{
+ unsigned long ptr = offsetof(struct btrfs_header, csum);
+ return (u8 *)ptr;
+}
+
+static inline struct btrfs_node *btrfs_buffer_node(struct extent_buffer *eb)
+{
+ return NULL;
+}
+
+static inline struct btrfs_leaf *btrfs_buffer_leaf(struct extent_buffer *eb)
+{
+ return NULL;
+}
+
+static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb)
+{
+ return NULL;
+}
+
+static inline int btrfs_is_leaf(struct extent_buffer *eb)
+{
+ return (btrfs_header_level(eb) == 0);
+}
+
+/* struct btrfs_root_item */
+BTRFS_SETGET_FUNCS(disk_root_generation, struct btrfs_root_item,
+ generation, 64);
+BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32);
+BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64);
+BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(root_generation, struct btrfs_root_item,
+ generation, 64);
+BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8);
+BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32);
+BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
+BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
+ last_snapshot, 64);
+BTRFS_SETGET_STACK_FUNCS(root_generation_v2, struct btrfs_root_item,
+ generation_v2, 64);
+BTRFS_SETGET_STACK_FUNCS(root_ctransid, struct btrfs_root_item,
+ ctransid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_otransid, struct btrfs_root_item,
+ otransid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item,
+ stransid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item,
+ rtransid, 64);
+
+/* struct btrfs_root_backup */
+BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup,
+ tree_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_tree_root_gen, struct btrfs_root_backup,
+ tree_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_tree_root_level, struct btrfs_root_backup,
+ tree_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_chunk_root, struct btrfs_root_backup,
+ chunk_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_gen, struct btrfs_root_backup,
+ chunk_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_level, struct btrfs_root_backup,
+ chunk_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_extent_root, struct btrfs_root_backup,
+ extent_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_extent_root_gen, struct btrfs_root_backup,
+ extent_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_extent_root_level, struct btrfs_root_backup,
+ extent_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_fs_root, struct btrfs_root_backup,
+ fs_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_fs_root_gen, struct btrfs_root_backup,
+ fs_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_fs_root_level, struct btrfs_root_backup,
+ fs_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_dev_root, struct btrfs_root_backup,
+ dev_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_dev_root_gen, struct btrfs_root_backup,
+ dev_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_dev_root_level, struct btrfs_root_backup,
+ dev_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_csum_root, struct btrfs_root_backup,
+ csum_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_csum_root_gen, struct btrfs_root_backup,
+ csum_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_csum_root_level, struct btrfs_root_backup,
+ csum_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(backup_total_bytes, struct btrfs_root_backup,
+ total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
+ bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
+ num_devices, 64);
+
+/* struct btrfs_super_block */
+
+BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
+ generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_sys_array_size,
+ struct btrfs_super_block, sys_chunk_array_size, 32);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root_generation,
+ struct btrfs_super_block, chunk_root_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block,
+ root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block,
+ chunk_root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block,
+ chunk_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block,
+ log_root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_log_root_transid, struct btrfs_super_block,
+ log_root_transid, 64);
+BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block,
+ log_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block,
+ total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block,
+ bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block,
+ sectorsize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block,
+ nodesize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block,
+ leafsize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block,
+ stripesize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block,
+ root_dir_objectid, 64);
+BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block,
+ num_devices, 64);
+BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block,
+ compat_flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block,
+ compat_ro_flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
+ incompat_flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
+ csum_type, 16);
+BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
+ cache_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
+ uuid_tree_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
+
+static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
+{
+ int t = btrfs_super_csum_type(s);
+ BUG_ON(t >= ARRAY_SIZE(btrfs_csum_sizes));
+ return btrfs_csum_sizes[t];
+}
+
+static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
+{
+ return offsetof(struct btrfs_leaf, items);
+}
+
+/* struct btrfs_file_extent_item */
+BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item, type, 8);
+
+static inline unsigned long btrfs_file_extent_inline_start(struct
+ btrfs_file_extent_item *e)
+{
+ unsigned long offset = (unsigned long)e;
+ offset += offsetof(struct btrfs_file_extent_item, disk_bytenr);
+ return offset;
+}
+
+static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
+{
+ return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize;
+}
+
+BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item,
+ disk_bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr, struct btrfs_file_extent_item,
+ disk_bytenr, 64);
+BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item,
+ generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation, struct btrfs_file_extent_item,
+ generation, 64);
+BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item,
+ disk_num_bytes, 64);
+BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item,
+ offset, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset, struct btrfs_file_extent_item,
+ offset, 64);
+BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item,
+ num_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes, struct btrfs_file_extent_item,
+ num_bytes, 64);
+BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item,
+ ram_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_ram_bytes, struct btrfs_file_extent_item,
+ ram_bytes, 64);
+BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item,
+ compression, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression, struct btrfs_file_extent_item,
+ compression, 8);
+BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item,
+ encryption, 8);
+BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
+ other_encoding, 16);
+
+/* btrfs_qgroup_status_item */
+BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item,
+ version, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item,
+ generation, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
+ flags, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item,
+ scan, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_version,
+ struct btrfs_qgroup_status_item, version, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_generation,
+ struct btrfs_qgroup_status_item, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_flags,
+ struct btrfs_qgroup_status_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_scan,
+ struct btrfs_qgroup_status_item, scan, 64);
+
+/* btrfs_qgroup_info_item */
+BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
+ generation, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_referenced, struct btrfs_qgroup_info_item,
+ referenced, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_referenced_compressed,
+ struct btrfs_qgroup_info_item, referenced_compressed, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_exclusive, struct btrfs_qgroup_info_item,
+ exclusive, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_exclusive_compressed,
+ struct btrfs_qgroup_info_item, exclusive_compressed, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation,
+ struct btrfs_qgroup_info_item, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_referenced,
+ struct btrfs_qgroup_info_item, referenced, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_referenced_compressed,
+ struct btrfs_qgroup_info_item, referenced_compressed, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_exclusive,
+ struct btrfs_qgroup_info_item, exclusive, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_exclusive_compressed,
+ struct btrfs_qgroup_info_item, exclusive_compressed, 64);
+
+/* btrfs_qgroup_limit_item */
+BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item,
+ flags, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_max_referenced, struct btrfs_qgroup_limit_item,
+ max_referenced, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_max_exclusive, struct btrfs_qgroup_limit_item,
+ max_exclusive, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_rsv_referenced, struct btrfs_qgroup_limit_item,
+ rsv_referenced, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_rsv_exclusive, struct btrfs_qgroup_limit_item,
+ rsv_exclusive, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_flags,
+ struct btrfs_qgroup_limit_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_max_referenced,
+ struct btrfs_qgroup_limit_item, max_referenced, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_max_exclusive,
+ struct btrfs_qgroup_limit_item, max_exclusive, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_rsv_referenced,
+ struct btrfs_qgroup_limit_item, rsv_referenced, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_rsv_exclusive,
+ struct btrfs_qgroup_limit_item, rsv_exclusive, 64);
+
+/*
+ * this returns the number of bytes used by the item on disk, minus the
+ * size of any extent headers. If a file is compressed on disk, this is
+ * the compressed size
+ */
+static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
+ struct btrfs_item *e)
+{
+ unsigned long offset;
+ offset = offsetof(struct btrfs_file_extent_item, disk_bytenr);
+ return btrfs_item_size(eb, e) - offset;
+}
+
+/* this returns the number of file bytes represented by the inline item.
+ * If an item is compressed, this is the uncompressed size
+ */
+static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
+ int slot,
+ struct btrfs_file_extent_item *fi)
+{
+ /*
+ * return the space used on disk if this item isn't
+ * compressed or encoded
+ */
+ if (btrfs_file_extent_compression(eb, fi) == 0 &&
+ btrfs_file_extent_encryption(eb, fi) == 0 &&
+ btrfs_file_extent_other_encoding(eb, fi) == 0) {
+ return btrfs_file_extent_inline_item_len(eb,
+ btrfs_item_nr(slot));
+ }
+
+ /* otherwise use the ram bytes field */
+ return btrfs_file_extent_ram_bytes(eb, fi);
+}
+
+static inline u32 btrfs_level_size(struct btrfs_root *root, int level) {
+ if (level == 0)
+ return root->leafsize;
+ return root->nodesize;
+}
+
+static inline int btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
+{
+ struct btrfs_super_block *disk_super;
+ disk_super = fs_info->super_copy;
+ return !!(btrfs_super_incompat_flags(disk_super) & flag);
+}
+
+static inline int btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag)
+{
+ struct btrfs_super_block *disk_super;
+ disk_super = fs_info->super_copy;
+ return !!(btrfs_super_compat_ro_flags(disk_super) & flag);
+}
+
+/* helper function to cast into the data area of the leaf. */
+#define btrfs_item_ptr(leaf, slot, type) \
+ ((type *)(btrfs_leaf_data(leaf) + \
+ btrfs_item_offset_nr(leaf, slot)))
+
+#define btrfs_item_ptr_offset(leaf, slot) \
+ ((unsigned long)(btrfs_leaf_data(leaf) + \
+ btrfs_item_offset_nr(leaf, slot)))
+
+/* extent-tree.c */
+int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 empty_size,
+ u64 hint_byte, u64 search_end,
+ struct btrfs_key *ins, int data);
+int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
+void btrfs_pin_extent(struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes);
+void btrfs_unpin_extent(struct btrfs_fs_info *fs_info,
+ u64 bytenr, u64 num_bytes);
+int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
+struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
+ btrfs_fs_info *info,
+ u64 bytenr);
+struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
+ btrfs_fs_info *info,
+ u64 bytenr);
+struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u32 blocksize, u64 root_objectid,
+ struct btrfs_disk_key *key, int level,
+ u64 hint, u64 empty_size);
+int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner, u64 empty_size, u64 hint_byte,
+ u64 search_end, struct btrfs_key *ins, int data);
+int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr,
+ u64 offset, int metadata, u64 *refs, u64 *flags);
+int btrfs_set_block_flags(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, int level, u64 flags);
+int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct extent_buffer *buf, int record_parent);
+int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct extent_buffer *buf, int record_parent);
+int btrfs_free_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 owner, u64 offset);
+int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_io_tree *unpin);
+int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner_objectid);
+int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr,
+ u64 orig_parent, u64 parent,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner_objectid);
+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
+int btrfs_free_block_groups(struct btrfs_fs_info *info);
+int btrfs_read_block_groups(struct btrfs_root *root);
+struct btrfs_block_group_cache *
+btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type,
+ u64 chunk_objectid, u64 chunk_offset, u64 size);
+int btrfs_make_block_group(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytes_used,
+ u64 type, u64 chunk_objectid, u64 chunk_offset,
+ u64 size);
+int btrfs_make_block_groups(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
+int btrfs_update_block_group(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr, u64 num,
+ int alloc, int mark_free);
+int btrfs_record_file_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ struct btrfs_inode_item *inode,
+ u64 file_pos, u64 disk_bytenr,
+ u64 num_bytes);
+int btrfs_free_block_group(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
+void free_excluded_extents(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache);
+int exclude_super_stripes(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache);
+u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_fs_info *info, u64 start, u64 end);
+/* ctree.c */
+int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
+int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_path *path, int level, int slot);
+enum btrfs_tree_block_status
+btrfs_check_node(struct btrfs_root *root, struct btrfs_disk_key *parent_key,
+ struct extent_buffer *buf);
+enum btrfs_tree_block_status
+btrfs_check_leaf(struct btrfs_root *root, struct btrfs_disk_key *parent_key,
+ struct extent_buffer *buf);
+void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
+ int level, int slot, u64 objectid);
+struct extent_buffer *read_node_slot(struct btrfs_root *root,
+ struct extent_buffer *parent, int slot);
+int btrfs_previous_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 min_objectid,
+ int type);
+int btrfs_previous_extent_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 min_objectid);
+int btrfs_cow_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *buf,
+ struct extent_buffer *parent, int parent_slot,
+ struct extent_buffer **cow_ret);
+int __btrfs_cow_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *buf,
+ struct extent_buffer *parent, int parent_slot,
+ struct extent_buffer **cow_ret,
+ u64 search_start, u64 empty_size);
+int btrfs_copy_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *buf,
+ struct extent_buffer **cow_ret, u64 new_root_objectid);
+int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_path *path, u32 data_size);
+int btrfs_truncate_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u32 new_size, int from_end);
+int btrfs_split_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *new_key,
+ unsigned long split_offset);
+int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *key, struct btrfs_path *p, int
+ ins_len, int cow);
+int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
+ u64 iobjectid, u64 ioff, u8 key_type,
+ struct btrfs_key *found_key);
+void btrfs_release_path(struct btrfs_path *p);
+void add_root_to_dirty_list(struct btrfs_root *root);
+struct btrfs_path *btrfs_alloc_path(void);
+void btrfs_free_path(struct btrfs_path *p);
+void btrfs_init_path(struct btrfs_path *p);
+int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_path *path, int slot, int nr);
+
+static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path)
+{
+ return btrfs_del_items(trans, root, path, path->slots[0], 1);
+}
+
+int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *key, void *data, u32 data_size);
+int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *cpu_key, u32 *data_size, int nr);
+
+static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *key,
+ u32 data_size)
+{
+ return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1);
+}
+
+int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
+static inline int btrfs_next_item(struct btrfs_root *root,
+ struct btrfs_path *p)
+{
+ ++p->slots[0];
+ if (p->slots[0] >= btrfs_header_nritems(p->nodes[0]))
+ return btrfs_next_leaf(root, p);
+ return 0;
+}
+
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
+int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
+void btrfs_fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_disk_key *key, int level);
+int btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *new_key);
+void btrfs_set_item_key_unsafe(struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *new_key);
+
+/* root-item.c */
+int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *tree_root,
+ u64 root_id, u8 type, u64 ref_id,
+ u64 dirid, u64 sequence,
+ const char *name, int name_len);
+int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *key, struct btrfs_root_item
+ *item);
+int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *key, struct btrfs_root_item
+ *item);
+int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
+ btrfs_root_item *item, struct btrfs_key *key);
+/* dir-item.c */
+int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, const char *name, int name_len, u64 dir,
+ struct btrfs_key *location, u8 type, u64 index);
+struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 dir,
+ const char *name, int name_len,
+ int mod);
+struct btrfs_dir_item *btrfs_lookup_dir_index(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 dir,
+ const char *name, int name_len,
+ u64 index, int mod);
+int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_dir_item *di);
+int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, const char *name,
+ u16 name_len, const void *data, u16 data_len,
+ u64 dir);
+/* inode-map.c */
+int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
+ struct btrfs_root *fs_root,
+ u64 dirid, u64 *objectid);
+
+/* inode-item.c */
+int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const char *name, int name_len,
+ u64 inode_objectid, u64 ref_objectid, u64 index);
+int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, u64 objectid, struct btrfs_inode_item
+ *inode_item);
+int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_path *path,
+ struct btrfs_key *location, int mod);
+struct btrfs_inode_extref *btrfs_lookup_inode_extref(struct btrfs_trans_handle
+ *trans, struct btrfs_path *path, struct btrfs_root *root,
+ u64 ino, u64 parent_ino, u64 index, const char *name,
+ int namelen, int ins_len);
+int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const char *name, int name_len,
+ u64 inode_objectid, u64 ref_objectid,
+ u64 *index);
+int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const char *name, int name_len,
+ u64 inode_objectid, u64 ref_objectid, u64 index);
+struct btrfs_inode_ref *btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ const char *name, int namelen, u64 ino, u64 parent_ino,
+ u64 index, int ins_len);
+int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, const char *name, int name_len,
+ u64 ino, u64 parent_ino, u64 *index);
+
+/* file-item.c */
+int btrfs_del_csums(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr, u64 len);
+int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 objectid, u64 pos, u64 offset,
+ u64 disk_num_bytes,
+ u64 num_bytes);
+int btrfs_insert_inline_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ u64 offset, char *buffer, size_t size);
+int btrfs_csum_file_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 alloc_end,
+ u64 bytenr, char *data, size_t len);
+int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ u64 isize);
+
+/* uuid-tree.c */
+int btrfs_lookup_uuid_subvol_item(int fd, const u8 *uuid, u64 *subvol_id);
+int btrfs_lookup_uuid_received_subvol_item(int fd, const u8 *uuid,
+ u64 *subvol_id);
+
+static inline int is_fstree(u64 rootid)
+{
+ if (rootid == BTRFS_FS_TREE_OBJECTID ||
+ (signed long long)rootid >= (signed long long)BTRFS_FIRST_FREE_OBJECTID)
+ return 1;
+ return 0;
+}
+
+/* inode.c */
+int check_dir_conflict(struct btrfs_root *root, char *name, int namelen,
+ u64 dir, u64 index);
+int btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ u64 ino, u32 mode);
+int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ u64 ino, u64 parent_ino, char *name, int namelen,
+ u8 type, u64 *index, int add_backref);
+int btrfs_unlink(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ u64 ino, u64 parent_ino, u64 index, const char *name,
+ int namelen, int add_orphan);
+int btrfs_add_orphan_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ u64 ino);
+int btrfs_mkdir(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ char *name, int namelen, u64 parent_ino, u64 *ino, int mode);
+
+/* file.c */
+int btrfs_get_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 ino, u64 offset, u64 len, int ins_len);
+int btrfs_punch_hole(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 ino, u64 offset, u64 len);
+#endif
diff --git a/debian/btrfs-progs-udeb.install b/debian/btrfs-progs-udeb.install
new file mode 100644
index 00000000..3effab9e
--- /dev/null
+++ b/debian/btrfs-progs-udeb.install
@@ -0,0 +1,2 @@
+btrfs /bin
+mkfs.btrfs /bin
diff --git a/debian/btrfs-progs.install b/debian/btrfs-progs.install
new file mode 100644
index 00000000..b7293dbb
--- /dev/null
+++ b/debian/btrfs-progs.install
@@ -0,0 +1,3 @@
+btrfs-calc-size /bin
+btrfs-select-super /bin
+ioctl.h /usr/include/btrfs
diff --git a/debian/btrfs-progs.lintian-overrides b/debian/btrfs-progs.lintian-overrides
new file mode 100644
index 00000000..22abbb99
--- /dev/null
+++ b/debian/btrfs-progs.lintian-overrides
@@ -0,0 +1,4 @@
+# To avoid micro-packaging, btrfs-tools does contain its library in the same
+# package, therefore overwriting the following two lintian warnings.
+btrfs-progs: non-dev-pkg-with-shlib-symlink
+btrfs-progs: package-name-doesnt-match-sonames
diff --git a/debian/btrfs-progs.triggers b/debian/btrfs-progs.triggers
new file mode 100644
index 00000000..6c9f4543
--- /dev/null
+++ b/debian/btrfs-progs.triggers
@@ -0,0 +1 @@
+activate update-initramfs
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 00000000..290e1c44
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,672 @@
+btrfs-progs (4.4.1-1.1) unstable; urgency=medium
+
+ * Non-maintainer upload.
+ * New upstream release.
+ * Rename package to btrfs-progs (Closes: #780081)
+ * Update standards version to 3.9.7 (no changes needed).
+ * debian/control: Add "Breaks" per Gianfranco Costamagna's suggestion
+ * Change lintian override to reflect package rename
+ * Switch from using postinst and postrm to using triggers
+ per Christian Seiler's recommendation.
+
+ -- Nicholas D Steeves <nsteeves@gmail.com> Fri, 22 Apr 2016 19:41:30 -0400
+
+btrfs-tools (4.4-1) unstable; urgency=medium
+
+ * New upstream release.
+
+ -- Dimitri John Ledkov <xnox@ubuntu.com> Tue, 19 Jan 2016 19:03:41 +0000
+
+btrfs-tools (4.3-1) unstable; urgency=medium
+
+ * New upstream release.
+
+ -- Dimitri John Ledkov <dimitri.j.ledkov@linux.intel.com> Sat, 07 Nov 2015 11:27:13 +0000
+
+btrfs-tools (4.2.2-1) unstable; urgency=medium
+
+ * New upstream release.
+
+ -- Dimitri John Ledkov <dimitri.j.ledkov@linux.intel.com> Sat, 24 Oct 2015 00:09:28 +0100
+
+btrfs-tools (4.1.2-1) unstable; urgency=medium
+
+ * New upstream release.
+
+ -- Dimitri John Ledkov <dimitri.j.ledkov@linux.intel.com> Sun, 30 Aug 2015 00:18:54 +0100
+
+btrfs-tools (4.0-2) unstable; urgency=medium
+
+ * Adjust initramfs hooks and udev rules for move of all binaries to
+ /bin. (Closes: #784234)
+
+ -- Dimitri John Ledkov <dimitri.j.ledkov@linux.intel.com> Mon, 04 May 2015 18:51:07 +0100
+
+btrfs-tools (4.0-1) unstable; urgency=medium
+
+ * New upstream release.
+ * Drop all patches.
+ * Experiment with dropping udev rules, in favour of systemd-udev builtin
+ integration. (Closes: #772744)
+ * Move all binaries to /bin (Closes: #770806)
+ * Switch to autoconf based build system.
+
+ -- Dimitri John Ledkov <dimitri.j.ledkov@linux.intel.com> Sun, 18 Jan 2015 00:14:20 +0000
+
+btrfs-tools (3.17-1.1) unstable; urgency=medium
+
+ * Non-maintainer upload.
+ * Add 0002-Fix-linking-with-libbtrfs.patch from upstream, to properly
+ export all the previously exported API (Closes: #768746)
+ * Add 0003-Make-headers-C++-compatible.patch from upstream, making the
+ new headers C++-compatible.
+
+ -- Nicolas Dandrimont <olasd@debian.org> Sat, 22 Nov 2014 14:52:06 +0100
+
+btrfs-tools (3.17-1) unstable; urgency=medium
+
+ * New upstream release.
+
+ -- Dimitri John Ledkov <dimitri.j.ledkov@linux.intel.com> Thu, 23 Oct 2014 22:04:07 +0100
+
+btrfs-tools (3.16.2-1) unstable; urgency=medium
+
+ * New upstream release.
+ * Drop patches (not needed, or upstremed), but unaligned memory access.
+ * Update upstream location in uscan, to pull release by kdave.
+ * Use upstream fsck.btrfs script.
+
+ -- Dimitri John Ledkov <xnox@ubuntu.com> Sat, 11 Oct 2014 16:54:05 +0100
+
+btrfs-tools (3.16-1) unstable; urgency=medium
+
+ * New upstream release.
+ * Add asciidoc & xmlto build dependencies.
+ * Disable test-suites for now, should be run as autopkgtests.
+
+ -- Dimitri John Ledkov <xnox@ubuntu.com> Sun, 21 Sep 2014 02:05:14 +0100
+
+btrfs-tools (3.14.1-1) unstable; urgency=medium
+
+ * New upstream release.
+ * Switch to git-dpm.
+ * Rebase and cleanup patches.
+
+ -- Dimitri John Ledkov <xnox@debian.org> Sat, 19 Apr 2014 12:12:11 +0100
+
+btrfs-tools (3.12-1) unstable; urgency=low
+
+ [ Julien Muchembled ]
+ * New upstream release. (Closes: #732075, #732533)
+ * Drop patches merged upstream:
+ - 07-manpage2.patch
+ - 10-soname.patch
+ - Btrfs-progs-fix-wrong-arg-sb_bytenr-for-btrfs_scan_fs_devices.patch
+ * Refresh patches. (Closes: #719072)
+
+ [ Dimitri John Ledkov ]
+ * Add watch file.
+ * Bump standards version to 3.9.5.
+
+ -- Dimitri John Ledkov <xnox@ubuntu.com> Wed, 01 Jan 2014 01:52:43 +0000
+
+btrfs-tools (0.19+20130705-3) unstable; urgency=low
+
+ * Import patch by Shilong Wang to resolve btrfs-convert (Closes:
+ #724265) (LP: #1228302)
+
+ -- Dmitrijs Ledkovs <xnox@ubuntu.com> Fri, 18 Oct 2013 23:47:16 +0100
+
+btrfs-tools (0.19+20130705-2) unstable; urgency=low
+
+ * Copy btrfs-zero-log into initramfs, useful for recovering (Closes: #721857)
+
+ -- Dmitrijs Ledkovs <dmitrij.ledkov@ubuntu.com> Mon, 16 Sep 2013 14:54:54 +0100
+
+btrfs-tools (0.19+20130705-1) unstable; urgency=low
+
+ * New upstream snapshot. (Closes: #720932)
+ * Refresh patches.
+ * btrfsctl is gone, and calc-size is prefixed with btrfs- upstream now.
+ * Enable verbose build-log.
+
+ -- Dmitrijs Ledkovs <dmitrij.ledkov@ubuntu.com> Mon, 26 Aug 2013 19:14:51 +0100
+
+btrfs-tools (0.19+20130315-5) unstable; urgency=low
+
+ * Provide "proper" fsck.btrfs, similar in spirit to fsck.xfs, that
+ redirects users to use `btrfs check` and `btrfs restore` and exits
+ with 0. (Closes: #715242) (Closes: #716989)
+ Ideally, all btrfs mount points should be specified with fs_passno 0
+ in fstab. If fstab is migrated to that, I'd want to consider dropping
+ fsck.btrfs wrapper.
+
+ -- Dmitrijs Ledkovs <xnox@debian.org> Wed, 31 Jul 2013 14:52:17 +0100
+
+btrfs-tools (0.19+20130315-4) unstable; urgency=low
+
+ * Drop grub.d hacks, as initscripts correctly skips btrfs volumes for
+ now. See bug #701956. And force remove hack on upgrade.
+ * Conditionally include fsck.btrfs, if there is no generic fsck hook (Closes: 717867)
+ * Install libraries in a multi-arch location. (Closes: #717868)
+ * Drop dh_builddeb compression override to xz, use dpkg default instead,
+ which is xz now anyway.
+
+ -- Dmitrijs Ledkovs <xnox@debian.org> Mon, 29 Jul 2013 14:51:19 +0100
+
+btrfs-tools (0.19+20130315-3) unstable; urgency=low
+
+ * New maintainer (Closes: #717564)
+ - Thanks a lot to Luk and Daniel for previous
+ maintainership. Co-maintainance is welcome!
+ * Include fsck.btrfs in the initramfs
+ * Use correct compiler, when cross-compiling
+ * Default to act like fsck. (Closes: #712078)
+ * Apply a gross hack to set fastboot for grub, if root is on btrfs
+ because of bug 701936. One still needs to manually run update-grub.
+
+ -- Dmitrijs Ledkovs <xnox@debian.org> Tue, 23 Jul 2013 16:12:29 +0100
+
+btrfs-tools (0.19+20130315-2) unstable; urgency=low
+
+ * Fix unaligned access issue thanks to Holger Fischer.
+
+ -- Luk Claes <luk@debian.org> Mon, 20 May 2013 11:01:16 +0200
+
+btrfs-tools (0.19+20130315-1) experimental; urgency=low
+
+ * New upstream version.
+ - Drop 01-labels.patch
+ - Update patches
+ - Drop 05-gzip.patch
+ - Drop 06-armel.patch
+ - Drop 08-fsck.patch
+ - Drop 10-loop.patch
+ * Add build dependency on libblkid-dev and liblzo2-dev.
+ * Add lintian overrides as it does not make sense to split this
+ small a package.
+ * Set soname in 10-soname.patch.
+ * Uploading to experimental as the unaligned access issue still
+ needs to be fixed.
+
+ -- Luk Claes <luk@debian.org> Tue, 14 May 2013 15:23:45 +0200
+
+btrfs-tools (0.19+20130131-3+really20121004-1) unstable; urgency=low
+
+ * Taking over maintenance with maintainer's consent.
+ * Reverting to previous upstream (Closes: #701936).
+
+ -- Luk Claes <luk@debian.org> Sun, 10 Mar 2013 22:28:39 +0100
+
+btrfs-tools (0.19+20130131-3) unstable; urgency=low
+
+ * Also ignoring fsck options -y, -C, and -V in btrfs check.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Fri, 08 Mar 2013 13:31:09 +0100
+
+btrfs-tools (0.19+20130131-2) unstable; urgency=low
+
+ * Replacing fsck.btrfs with wrapper arround 'btrfs check' to avoid
+ different behaviour based on the filename btrfs is copied to (Closes:
+ #701776).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Wed, 27 Feb 2013 18:36:05 +0100
+
+btrfs-tools (0.19+20130131-1) unstable; urgency=low
+
+ * Merging upstream version 0.19+20130131.
+ * Updating copyright file for new upstream version.
+ * Updating years in copyright file.
+ * Prefixing patches with four digits.
+ * Trimming diff headers in patches.
+ * Dropping dpkg-source compression levels.
+ * Moving udev rules from 60-btrfs.rules to 70-btrfs.rules (Closes:
+ #701642).
+ * Dropping lables.patch, included upstream.
+ * Rediffing manpage.patch.
+ * Rediffing linker.patch.
+ * Dropping gzip.patch, not needed anymore.
+ * Dropping armel.patch, included upstream.
+ * Rediffing manpage2.patch.
+ * Dropping fsck.patch, not needed anymore.
+ * Rediffing unaligned-memaccess.patch.
+ * Dropping loop.patch, included upstream.
+ * Renumbering patches.
+ * Adding libblkid-dev to build-depends.
+ * Including show-blocks.
+ * Including bcp.
+ * Adding suggests to python.
+ * Adding lintian overrides.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Tue, 26 Feb 2013 12:12:39 +0100
+
+btrfs-tools (0.19+20121004-2) unstable; urgency=low
+
+ * Adding dpkg-source local options.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Tue, 11 Dec 2012 21:17:43 +0100
+
+btrfs-tools (0.19+20121004-1) unstable; urgency=low
+
+ * Updating standards version to 3.9.4.
+ * Merging upstream version 0.19+20121004.
+ * Rediffing linker.patch.
+ * Rediffing manpage2.patch.
+ * Rediffing fsck.patch.
+ * Adding patch from Nirbheek Chauhan <nirbheek.chauhan@collabora.co.uk>
+ to avoid truncating filenames in loopdevice images (Closes: #691235).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Mon, 10 Dec 2012 11:09:05 +0100
+
+btrfs-tools (0.19+20120328-8) unstable; urgency=low
+
+ * Adding udev rule for btrfs on lvm, thanks to Daniel Pocock
+ <daniel@pocock.com.au> for testing (Closes: #685311).
+ * Adding postrm to rebuild initrd without btrfs on remove.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Tue, 21 Aug 2012 11:49:31 +0200
+
+btrfs-tools (0.19+20120328-7) unstable; urgency=low
+
+ * Using official package-type for udeb packages in control now.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Fri, 29 Jun 2012 17:56:44 +0200
+
+btrfs-tools (0.19+20120328-6) unstable; urgency=low
+
+ * Harmonize coding style for error redirection in initramfs-tools
+ local-premount script.
+ * Switching to xz compression.
+ * Updating GPL boilerplate in copyright file.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Fri, 29 Jun 2012 03:48:36 +0200
+
+btrfs-tools (0.19+20120328-5) unstable; urgency=low
+
+ * Adding postinst for updating initramfs (Closes: #678967).
+ * Adding updated patch from Shawn Landen <shawnlandden@gmail.com> to
+ fix unaligned memory accesses (Closes: #656955).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Mon, 25 Jun 2012 23:09:09 +0200
+
+btrfs-tools (0.19+20120328-4) unstable; urgency=low
+
+ * Adding patch from Shawn Landen <shawnlandden@gmail.com> to fix
+ unaligned memory accesses (Closes: #656955).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Thu, 21 Jun 2012 13:05:12 +0200
+
+btrfs-tools (0.19+20120328-3) unstable; urgency=low
+
+ * Adding udev rule to automatically call 'btrfs scan' upon discovering
+ btrfs devices in order to allow using btrfs raid devices
+ automatically (Closes: #585568, #634658, #653632).
+ * Adding patch to update btrfs manpage to match /sbin/btrfs (Closes:
+ #638778, #642302).
+ * Also including /sbin/btrfs in udeb.
+ * Adding patch from Sten Heinze <shze@gmx.de> to ignore all arguments
+ starting with -a in btrfsck for the time being (Closes: #567681,
+ #571300, #612809, #668832).
+ * Adding back symlink for fsck.btrfs.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Wed, 13 Jun 2012 16:00:13 +0200
+
+btrfs-tools (0.19+20120328-2) unstable; urgency=low
+
+ * Updating compression handling for udeb with newer debhelper (Closes:
+ #673283).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Fri, 18 May 2012 07:50:12 +0200
+
+btrfs-tools (0.19+20120328-1) unstable; urgency=low
+
+ * Including ioctl.h in btrfs-tools (Closes: #658471).
+ * Merging upstream version 0.19+20120328.
+ * Updating to debhelper version 9.
+ * Updating to standards version 3.9.3.
+ * Updating copyright file machine-readable format version 1.0.
+ * Shortening manual build targets now that upstreams makefile has been
+ updated.
+ * Adding patch from Donald Gordon <don@dis.org.nz> to correct getopt
+ (Closes: #665434).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Mon, 16 Apr 2012 08:59:50 +0000
+
+btrfs-tools (0.19+20111105-2) unstable; urgency=low
+
+ * Using btrfs instead of btrfsctl in initramfs (Closes: #650614).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Sat, 10 Dec 2011 10:33:06 +0100
+
+btrfs-tools (0.19+20111105-1) unstable; urgency=low
+
+ * Switching architecture fields to linux-any.
+ * Adding patch to call gzip with full path when creating manpages to
+ fix FTBFS on squeeze.
+ * Using compression level 9 also for binary packages.
+ * Merging upstream version 0.19+20111105.
+ * Updating year in copyright file.
+ * Sorting overrides in rules alphabetically.
+ * Prefixing variables in rules to avoid clashes.
+ * Rediffing labels.patch.
+ * Rediffing ftbfs.patch.
+ * Rediffing linker.patch.
+ * Removing gcc-4.6.patch, not required anymore.
+ * Renumbering patches.
+ * Avoid polluting namespace by prefixing some tools with btrfs.
+ * Updating list of additional tools to be built.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Sat, 19 Nov 2011 03:15:21 +0100
+
+btrfs-tools (0.19+20101101-2) unstable; urgency=low
+
+ * Updating maintainer and uploaders fields.
+ * Removing vcs fields.
+ * Removing references to my old email address.
+ * Updating year in copyright file.
+ * Updating to standards version 3.9.2.
+ * Compacting copyright file.
+ * Dropping btrfs.modules, went into initramfs-tools (Closes: #612642).
+ * Adding patch from Luk Claes <luk@debian.org> to fix FTBFS with --no-
+ add-needed (Closes: #554059).
+ * Adding patch from Jonathan Nieder <jrnieder@gmail.com> to fix FTBFS
+ with gcc-4.6 (Closes: #616296).
+ * Simplfying build and install overrides in rules.
+ * Applying slightly modified patch from Jonathan Nieder
+ <jrnieder@gmail.com> to fix CFLAGS handling in rules, see #616296.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Sun, 28 Aug 2011 09:31:25 +0200
+
+btrfs-tools (0.19+20101101-1) experimental; urgency=low
+
+ * Merging upstream version 0.19+20101101.
+ * Switching to source format 3.0 (quilt).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Sat, 20 Nov 2010 14:27:29 +0100
+
+btrfs-tools (0.19+20100909-2) experimental; urgency=low
+
+ * Update references to btrfs-tools instead of btrfs-progs (upstream name)
+ in manpages (Closes: #599111).
+ * Removing warnings about experimental status.
+ * Updating package to debhelper 8.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Mon, 04 Oct 2010 21:28:58 +0200
+
+btrfs-tools (0.19+20100909-1) experimental; urgency=low
+
+ * Correcting credits for glibc.patch in patch file and changelog entry
+ to David Sugar <david.sugar@canonical.com>.
+ * Updating standards version to 3.9.0.
+ * Merging upstream version 0.19+20100909.
+ * Removing glibc.patch, merged upstream.
+ * Updating standards version to 3.9.1.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Sat, 25 Sep 2010 18:59:07 +0200
+
+btrfs-tools (0.19+20100601-3) unstable; urgency=low
+
+ * Updating year in copyright file.
+ * Adding overrides for clean target to make sure package builds twice
+ in a row (Closes: #585074).
+ * Sorting dh call in rules to more common order.
+ * Adding patch from David Sugar <david.sugar@canonical.com> to fix FTBFS
+ with glibc 2.12 (Closes: #586111).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Sun, 20 Jun 2010 21:22:57 +0200
+
+btrfs-tools (0.19+20100601-2) unstable; urgency=low
+
+ * Updating ftbfs.patch, thanks to Alexander Kurtz
+ <kurtz.alex@googlemail.com> (Closes: #583768).
+ * Adding two more casts in ftbfs.patch and fixing wrapping in previous
+ ones.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Sat, 05 Jun 2010 08:50:05 +0200
+
+btrfs-tools (0.19+20100601-1) unstable; urgency=low
+
+ * Updating patch to fix FTBFS on alpha and ia64, thanks to Alexander
+ Kurtz <kurtz.alex@googlemail.com> (Closes: #583768).
+ * Renumbering patches.
+ * Merging upstream version 0.19+20100601 (Closes: #583793).
+ * Removing upstream patches.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Tue, 01 Jun 2010 07:27:38 +0200
+
+btrfs-tools (0.19-12) unstable; urgency=low
+
+ * Setting section for udeb to debian-installer.
+ * Disabling valgrind patch, seems to make troubles on armel (Closes:
+ #582438).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Mon, 24 May 2010 12:57:36 +0200
+
+btrfs-tools (0.19-11) unstable; urgency=low
+
+ * Removing fsck.btrfs symlink to circumvent that btrfsck doesn't
+ support -a.
+ * Adding initramfs-tools integration (Closes: #559710).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Sun, 04 Apr 2010 17:42:30 +0200
+
+btrfs-tools (0.19-10) unstable; urgency=low
+
+ * Updating to standards 3.8.4.
+ * Including btrfsctl into udeb.
+ * Adding upstream patches to add support for deleting snapshots and
+ subvolumes (Closes: #567171).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Sat, 06 Feb 2010 08:49:55 +0100
+
+btrfs-tools (0.19-9) unstable; urgency=low
+
+ * Adding explicit source version 1.0 until switch to 3.0.
+ * Updating year in copyright file.
+ * Adding patch from Joey Hess <joeyh@debian.org> to add btrfs-tools-
+ udeb (Closes: #567075).
+ * Adding patch from Joey Hess <joeyh@debian.org> to improve package
+ short-descriptions.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Wed, 27 Jan 2010 05:31:57 +0100
+
+btrfs-tools (0.19-8) unstable; urgency=low
+
+ * Adding patch from Josef Bacik <jbacik@redhat.com> to fix memory
+ leaks and segfaults.
+ * Simplifying shell commands in rules.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Sat, 05 Dec 2009 17:20:06 +0100
+
+btrfs-tools (0.19-7) unstable; urgency=low
+
+ * Moving tools from /usr/bin to /sbin (Closes: #559477).
+ * Making build-depends on quilt versioned.
+ * Cosmetically improve shell commands in rules.
+ * Updating README.source.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Fri, 04 Dec 2009 20:54:42 +0100
+
+btrfs-tools (0.19-6) unstable; urgency=low
+
+ * Correcting wrong vcs-browser field.
+ * Updating package long-description (Closes: #552553).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Wed, 28 Oct 2009 20:24:25 +0100
+
+btrfs-tools (0.19-5) unstable; urgency=low
+
+ * Adding debug package.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Fri, 02 Oct 2009 19:40:09 +0200
+
+btrfs-tools (0.19-4) unstable; urgency=low
+
+ * Updating maintainer field.
+ * Updating vcs fields.
+ * Sorting build-depends and depends.
+ * Updating package to standards version 3.8.3.
+ * Bumping versioned build-depends on debhelper.
+ * Updating years in copyright file.
+ * Adding README.source.
+ * Adding maintainer homepage in copyright.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Fri, 02 Oct 2009 19:06:02 +0200
+
+btrfs-tools (0.19-3) unstable; urgency=low
+
+ * Minimizing rules file.
+ * Adding patch from Luca Bruno <lucab@debian.org> to properly cast and
+ avoiding compiler warnings. Fixes FTBFS on alpha and ia64 (Closes:
+ #539433).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Sun, 02 Aug 2009 12:08:49 +0200
+
+btrfs-tools (0.19-2) unstable; urgency=low
+
+ * Building and installing alternative utilities (Closes: #536058).
+ * Adding symlink from btrfsck to fsck.btrfs.
+ * Correcting spelling typo in package long-description.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Tue, 07 Jul 2009 14:56:40 +0200
+
+btrfs-tools (0.19-1) unstable; urgency=low
+
+ * Merging upstream version 0.19.
+ * Removing ftbfs.patch, went upstream.
+ * Updating standards version to 3.8.2.
+ * Updating rules to cope with new upstream manpage makefiles.
+ * Workaround upstream makefiles wrt/ manpage compression.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Mon, 06 Jul 2009 14:22:03 +0200
+
+btrfs-tools (0.18-4) unstable; urgency=low
+
+ * Using correct rfc-2822 date formats in changelog.
+ * Including btrfs-convert, thanks to Uwe Kleine-König <u.kleine-
+ koenig@pengutronix.de> (Closes: #529168).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Sun, 17 May 2009 22:18:27 +0200
+
+btrfs-tools (0.18-3) unstable; urgency=low
+
+ * Tidy rules file.
+ * Adding patch from upstream to allow /'s in labels.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Fri, 17 Apr 2009 21:02:00 +0200
+
+btrfs-tools (0.18-2) unstable; urgency=medium
+
+ * Adding patch from Luca Bruno <lucab@debian.org> to fix FTBFS on
+ alpha and ia64 (Closes: #519913).
+ * Updating rules file to current state of the art.
+ * Updating standards version to 3.8.1.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Tue, 17 Mar 2009 08:22:00 +0100
+
+btrfs-tools (0.18-1) unstable; urgency=low
+
+ * Merging upstream version 0.18.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Sat, 24 Jan 2009 15:59:00 +0100
+
+btrfs-tools (0.17-1) unstable; urgency=low
+
+ * Updating vcs fields in control file.
+ * Replacing obsolete dh_clean -k with dh_prep.
+ * Merging upstream version 0.17.
+ * Removing notes about not finalized on disk format.
+ * Prefixing debhelper files with package name.
+ * Updating year in copyright file.
+ * Adding zlib to build-depends.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Sat, 24 Jan 2009 15:50:00 +0100
+
+btrfs-tools (0.16-1) unstable; urgency=low
+
+ * Removing cflags handling in rules, not required anymore.
+ * Merging upstream version 0.16.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Mon, 25 Aug 2008 01:04:00 +0200
+
+btrfs-tools (0.15-2) unstable; urgency=medium
+
+ * Building on all architectures again since FTBFS on ia64 is gone.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Tue, 01 Jul 2008 15:07:00 +0200
+
+btrfs-tools (0.15-1) unstable; urgency=low
+
+ * Merging upstream version 0.15.
+ * Updating to standards 3.8.0.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Mon, 09 Jun 2008 15:01:00 +0200
+
+btrfs-tools (0.14-1) unstable; urgency=low
+
+ * Reordering rules file.
+ * Rewriting copyright file in machine-interpretable format.
+ * Updated warnings about early state of btrfs.
+ * Adding vcs fields in control file.
+ * Updating homepage field in control file.
+ * Upgrading package to debhelper 7.
+ * Merging upstream version 0.14.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Sun, 04 May 2008 10:04:00 +0200
+
+btrfs-tools (0.13-2) unstable; urgency=medium
+
+ * Excluding alpha and ia64 from architectures, doesn't build there
+ (Closes: #465128).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Fri, 07 Mar 2008 12:04:00 +0100
+
+btrfs-tools (0.13-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Mon, 25 Feb 2008 20:39:00 +0100
+
+btrfs-tools (0.12-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Fri, 08 Feb 2008 17:52:00 +0100
+
+btrfs-tools (0.11-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Mon, 28 Jan 2008 12:58:00 +0100
+
+btrfs-tools (0.10-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Thu, 17 Jan 2008 16:23:00 +0100
+
+btrfs-tools (0.9-1) unstable; urgency=low
+
+ * New upstream release.
+ * Updated standards.
+ * Removing watch file (Closes: #450102).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Thu, 20 Dec 2007 08:10:00 +0100
+
+btrfs-tools (0.8-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Fri, 14 Sep 2007 11:07:00 +0200
+
+btrfs-tools (0.6-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Wed, 08 Aug 2007 20:33:00 +0200
+
+btrfs-tools (0.5-1) unstable; urgency=low
+
+ * Initial release (Closes: #432697).
+
+ -- Daniel Baumann <daniel.baumann@progress-technologies.net> Sun, 29 Jul 2007 12:19:00 +0200
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 00000000..ec635144
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+9
diff --git a/debian/control b/debian/control
new file mode 100644
index 00000000..fc8c9489
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,65 @@
+Source: btrfs-progs
+Section: admin
+Priority: optional
+Maintainer: Dimitri John Ledkov <xnox@debian.org>
+Build-Depends: debhelper (>= 9),
+ e2fslibs-dev,
+ pkg-config,
+ libacl1-dev,
+ libblkid-dev,
+ liblzo2-dev,
+ uuid-dev,
+ zlib1g-dev,
+ asciidoc,
+ xmlto,
+Standards-Version: 3.9.7
+Homepage: http://btrfs.wiki.kernel.org/
+
+Package: btrfs-progs
+Architecture: linux-any
+Depends: ${misc:Depends}, ${shlibs:Depends}
+Replaces: btrfs-tools (<< 4.4.1-1)
+Breaks: btrfs-tools (<< 4.4.1-1)
+Description: Checksumming Copy on Write Filesystem utilities
+ Btrfs is a new copy on write filesystem for Linux aimed at implementing
+ advanced features while focusing on fault tolerance, repair and easy
+ administration.
+ .
+ This package contains utilities (mkfs, fsck) used to work with btrfs
+ and an utility (btrfs-convert) to make a btrfs filesystem from an ext3.
+
+Package: btrfs-tools
+Section: oldlibs
+Architecture: linux-any
+Depends: btrfs-progs, ${misc:Depends}, ${shlibs:Depends}
+Description: transitional dummy package
+ This is a transitional dummy package. It can safely be removed.
+
+Package: btrfs-progs-udeb
+Package-Type: udeb
+Section: debian-installer
+Architecture: linux-any
+Depends: ${misc:Depends}, ${shlibs:Depends}
+Description: Checksumming Copy on Write Filesystem utilities (udeb)
+
+Package: btrfs-progs-dbg
+Section: debug
+Priority: extra
+Architecture: linux-any
+Depends: btrfs-progs (= ${binary:Version}), ${misc:Depends}
+Replaces: btrfs-tools-dbg (<< 4.4.1-1)
+Breaks: btrfs-tools-dbg (<< 4.4.1-1)
+Description: Checksumming Copy on Write Filesystem utilities (debug)
+ Btrfs is a new copy on write filesystem for Linux aimed at implementing
+ advanced features while focusing on fault tolerance, repair and easy
+ administration.
+ .
+ This package contains the debugging symbols.
+
+Package: btrfs-tools-dbg
+Section: oldlibs
+Priority: extra
+Architecture: linux-any
+Depends: btrfs-progs-dbg, ${misc:Depends}
+Description: transitional dummy package
+ This is a transitional dummy package. It can safely be removed.
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 00000000..52c8462b
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,41 @@
+Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Contact: linux-btrfs@vger.kernel.org
+Source: http://git.kernel.org/?p=linux/kernel/git/mason/btrfs-progs.git
+
+Files: *
+Copyright: 2007-2012 Oracle <http://www.oracle.com/>
+License: GPL-2+
+
+Files: radix-tree.c
+Copyright: 2007-2009 Oracle <http://www.oracle.com/>
+ 2001 Momchil Velikov
+ 2001 Christoph Hellwig
+ 2005 SGI, Christoph Lameter <clameter@sgi.com>
+License: GPL-2+
+
+Files: radix-tree.h
+Copyright: 2007-2009 Oracle <http://www.oracle.com/>
+ 2001 Momchil Velikov
+ 2001 Christoph Hellwig
+License: GPL-2+
+
+Files: debian/*
+Copyright: 2007-2012 Daniel Baumann <daniel.baumann@progress-technologies.net>
+License: GPL-2+
+
+License: GPL-2+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+ .
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+ .
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ .
+ The complete text of the GNU General Public License
+ can be found in /usr/share/common-licenses/GPL-2 file.
diff --git a/debian/local/btrfs-lvm.udev b/debian/local/btrfs-lvm.udev
new file mode 100644
index 00000000..938253b7
--- /dev/null
+++ b/debian/local/btrfs-lvm.udev
@@ -0,0 +1,6 @@
+SUBSYSTEM!="block", GOTO="btrfs_lvm_end"
+ENV{DM_UUID}!="LVM-?*", GOTO="btrfs_lvm_end"
+RUN+="/sbin/modprobe btrfs"
+RUN+="/bin/btrfs device scan $env{DEVNAME}"
+
+LABEL="btrfs_lvm_end"
diff --git a/debian/local/btrfs.hook b/debian/local/btrfs.hook
new file mode 100644
index 00000000..d31199cb
--- /dev/null
+++ b/debian/local/btrfs.hook
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+set -e
+
+PREREQ=""
+
+prereqs()
+{
+ echo "${PREREQ}"
+}
+
+case "${1}" in
+ prereqs)
+ prereqs
+ exit 0
+ ;;
+esac
+
+. /usr/share/initramfs-tools/hook-functions
+
+if [ -x /bin/btrfs ]
+then
+ copy_exec /bin/btrfs /bin
+ copy_exec /bin/btrfs-zero-log
+ if [ ! -x /usr/share/initramfs-tools/hooks/fsck ] && [ ! -x /etc/initramfs-tools/hooks/fsck ]
+ then
+ copy_exec /bin/fsck.btrfs /bin
+ fi
+fi
diff --git a/debian/local/btrfs.local-premount b/debian/local/btrfs.local-premount
new file mode 100644
index 00000000..7f7bf27c
--- /dev/null
+++ b/debian/local/btrfs.local-premount
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+set -e
+
+PREREQ=""
+
+prereqs()
+{
+ echo "${PREREQ}"
+}
+
+case "${1}" in
+ prereqs)
+ prereqs
+ exit 0
+ ;;
+esac
+
+if [ -x /bin/btrfs ]
+then
+ modprobe btrfs
+ /bin/btrfs device scan 2> /dev/null
+fi
diff --git a/debian/local/btrfs.udev b/debian/local/btrfs.udev
new file mode 100644
index 00000000..e0905c49
--- /dev/null
+++ b/debian/local/btrfs.udev
@@ -0,0 +1,7 @@
+SUBSYSTEM!="block", GOTO="btrfs_end"
+ACTION!="add|change", GOTO="btrfs_end"
+ENV{ID_FS_TYPE}!="btrfs", GOTO="btrfs_end"
+RUN+="/sbin/modprobe btrfs"
+RUN+="/bin/btrfs device scan $env{DEVNAME}"
+
+LABEL="btrfs_end"
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 00000000..12338971
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,32 @@
+#!/usr/bin/make -f
+
+include /usr/share/dpkg/architecture.mk
+ifneq ($(DEB_HOST_GNU_TYPE),$(DEB_BUILD_GNU_TYPE))
+ CC := $(DEB_HOST_GNU_TYPE)-gcc
+else
+ CC := gcc
+endif
+
+SHELL := sh -e
+
+CFLAGS := $(shell dpkg-buildflags --get CFLAGS)
+CFLAGS := $(patsubst -O2,-Os,$(CFLAGS))
+
+%:
+ dh ${@} --parallel
+
+override_dh_auto_configure:
+ dh_auto_configure -- --bindir=/bin
+
+override_dh_auto_install:
+ dh_auto_install --destdir=debian/btrfs-progs
+
+ # Adding initramfs-tools integration
+ install -D -m 0755 debian/local/btrfs.hook debian/btrfs-progs/usr/share/initramfs-tools/hooks/btrfs
+ install -D -m 0755 debian/local/btrfs.local-premount debian/btrfs-progs/usr/share/initramfs-tools/scripts/local-premount/btrfs
+
+# Needs autopkgtest
+override_dh_auto_test:
+
+override_dh_strip:
+ dh_strip --dbg-package=btrfs-progs-dbg
diff --git a/debian/source/format b/debian/source/format
new file mode 100644
index 00000000..163aaf8d
--- /dev/null
+++ b/debian/source/format
@@ -0,0 +1 @@
+3.0 (quilt)
diff --git a/debian/source/options b/debian/source/options
new file mode 100644
index 00000000..22a4de9f
--- /dev/null
+++ b/debian/source/options
@@ -0,0 +1,2 @@
+compression = xz
+compression-level = 9
diff --git a/debian/watch b/debian/watch
new file mode 100644
index 00000000..6cca8510
--- /dev/null
+++ b/debian/watch
@@ -0,0 +1,2 @@
+version=3
+https://www.kernel.org/pub/linux/kernel/people/kdave/btrfs-progs/btrfs-progs-v(.+)\.tar\.xz
diff --git a/dir-item.c b/dir-item.c
new file mode 100644
index 00000000..bc59d179
--- /dev/null
+++ b/dir-item.c
@@ -0,0 +1,351 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/limits.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "hash.h"
+#include "transaction.h"
+
+static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+ struct btrfs_path *path,
+ const char *name, int name_len);
+
+static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
+ *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *cpu_key,
+ u32 data_size,
+ const char *name,
+ int name_len)
+{
+ int ret;
+ char *ptr;
+ struct btrfs_item *item;
+ struct extent_buffer *leaf;
+
+ ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
+ if (ret == -EEXIST) {
+ struct btrfs_dir_item *di;
+ di = btrfs_match_dir_item_name(root, path, name, name_len);
+ if (di)
+ return ERR_PTR(-EEXIST);
+ ret = btrfs_extend_item(trans, root, path, data_size);
+ WARN_ON(ret > 0);
+ }
+ if (ret < 0)
+ return ERR_PTR(ret);
+ WARN_ON(ret > 0);
+ leaf = path->nodes[0];
+ item = btrfs_item_nr(path->slots[0]);
+ ptr = btrfs_item_ptr(leaf, path->slots[0], char);
+ BUG_ON(data_size > btrfs_item_size(leaf, item));
+ ptr += btrfs_item_size(leaf, item) - data_size;
+ return (struct btrfs_dir_item *)ptr;
+}
+
+int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, const char *name,
+ u16 name_len, const void *data, u16 data_len,
+ u64 dir)
+{
+ int ret = 0;
+ struct btrfs_path *path;
+ struct btrfs_dir_item *dir_item;
+ unsigned long name_ptr, data_ptr;
+ struct btrfs_key key, location;
+ struct btrfs_disk_key disk_key;
+ struct extent_buffer *leaf;
+ u32 data_size;
+
+ key.objectid = dir;
+ btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
+ key.offset = btrfs_name_hash(name, name_len);
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ data_size = sizeof(*dir_item) + name_len + data_len;
+ dir_item = insert_with_overflow(trans, root, path, &key, data_size,
+ name, name_len);
+ /*
+ * FIXME: at some point we should handle xattr's that are larger than
+ * what we can fit in our leaf. We set location to NULL b/c we arent
+ * pointing at anything else, that will change if we store the xattr
+ * data in a separate inode.
+ */
+ BUG_ON(IS_ERR(dir_item));
+ memset(&location, 0, sizeof(location));
+
+ leaf = path->nodes[0];
+ btrfs_cpu_key_to_disk(&disk_key, &location);
+ btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
+ btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR);
+ btrfs_set_dir_name_len(leaf, dir_item, name_len);
+ btrfs_set_dir_data_len(leaf, dir_item, data_len);
+ name_ptr = (unsigned long)(dir_item + 1);
+ data_ptr = (unsigned long)((char *)name_ptr + name_len);
+
+ write_extent_buffer(leaf, name, name_ptr, name_len);
+ write_extent_buffer(leaf, data, data_ptr, data_len);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+
+ btrfs_free_path(path);
+ return ret;
+}
+
+int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, const char *name, int name_len, u64 dir,
+ struct btrfs_key *location, u8 type, u64 index)
+{
+ int ret = 0;
+ int ret2 = 0;
+ struct btrfs_path *path;
+ struct btrfs_dir_item *dir_item;
+ struct extent_buffer *leaf;
+ unsigned long name_ptr;
+ struct btrfs_key key;
+ struct btrfs_disk_key disk_key;
+ u32 data_size;
+
+ key.objectid = dir;
+ btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
+ key.offset = btrfs_name_hash(name, name_len);
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ data_size = sizeof(*dir_item) + name_len;
+ dir_item = insert_with_overflow(trans, root, path, &key, data_size,
+ name, name_len);
+ if (IS_ERR(dir_item)) {
+ ret = PTR_ERR(dir_item);
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ btrfs_cpu_key_to_disk(&disk_key, location);
+ btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
+ btrfs_set_dir_type(leaf, dir_item, type);
+ btrfs_set_dir_data_len(leaf, dir_item, 0);
+ btrfs_set_dir_name_len(leaf, dir_item, name_len);
+ name_ptr = (unsigned long)(dir_item + 1);
+
+ write_extent_buffer(leaf, name, name_ptr, name_len);
+ btrfs_mark_buffer_dirty(leaf);
+
+ /* FIXME, use some real flag for selecting the extra index */
+ if (root == root->fs_info->tree_root) {
+ ret = 0;
+ goto out;
+ }
+ btrfs_release_path(path);
+
+ btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
+ key.offset = index;
+ dir_item = insert_with_overflow(trans, root, path, &key, data_size,
+ name, name_len);
+ if (IS_ERR(dir_item)) {
+ ret2 = PTR_ERR(dir_item);
+ goto out;
+ }
+ leaf = path->nodes[0];
+ btrfs_cpu_key_to_disk(&disk_key, location);
+ btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
+ btrfs_set_dir_type(leaf, dir_item, type);
+ btrfs_set_dir_data_len(leaf, dir_item, 0);
+ btrfs_set_dir_name_len(leaf, dir_item, name_len);
+ name_ptr = (unsigned long)(dir_item + 1);
+ write_extent_buffer(leaf, name, name_ptr, name_len);
+ btrfs_mark_buffer_dirty(leaf);
+out:
+ btrfs_free_path(path);
+ if (ret)
+ return ret;
+ if (ret2)
+ return ret2;
+ return 0;
+}
+
+struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 dir,
+ const char *name, int name_len,
+ int mod)
+{
+ int ret;
+ struct btrfs_key key;
+ int ins_len = mod < 0 ? -1 : 0;
+ int cow = mod != 0;
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
+
+ key.objectid = dir;
+ btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
+
+ key.offset = btrfs_name_hash(name, name_len);
+
+ ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret > 0) {
+ if (path->slots[0] == 0)
+ return NULL;
+ path->slots[0]--;
+ }
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+ if (found_key.objectid != dir ||
+ btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY ||
+ found_key.offset != key.offset)
+ return NULL;
+
+ return btrfs_match_dir_item_name(root, path, name, name_len);
+}
+
+struct btrfs_dir_item *btrfs_lookup_dir_index(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 dir,
+ const char *name, int name_len,
+ u64 index, int mod)
+{
+ int ret;
+ struct btrfs_key key;
+ int ins_len = mod < 0 ? -1 : 0;
+ int cow = mod != 0;
+
+ key.objectid = dir;
+ key.type = BTRFS_DIR_INDEX_KEY;
+ key.offset = index;
+
+ ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret > 0)
+ return ERR_PTR(-ENOENT);
+
+ return btrfs_match_dir_item_name(root, path, name, name_len);
+}
+
+/*
+ * given a pointer into a directory item, delete it. This
+ * handles items that have more than one entry in them.
+ */
+int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_dir_item *di)
+{
+
+ struct extent_buffer *leaf;
+ u32 sub_item_len;
+ u32 item_len;
+ int ret = 0;
+
+ leaf = path->nodes[0];
+ sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di) +
+ btrfs_dir_data_len(leaf, di);
+ item_len = btrfs_item_size_nr(leaf, path->slots[0]);
+ if (sub_item_len == item_len) {
+ ret = btrfs_del_item(trans, root, path);
+ } else {
+ unsigned long ptr = (unsigned long)di;
+ unsigned long start;
+
+ start = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
+ item_len - (ptr + sub_item_len - start));
+ btrfs_truncate_item(trans, root, path, item_len - sub_item_len, 1);
+ }
+ return ret;
+}
+
+static int verify_dir_item(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_dir_item *dir_item)
+{
+ u16 namelen = BTRFS_NAME_LEN;
+ u8 type = btrfs_dir_type(leaf, dir_item);
+
+ if (type >= BTRFS_FT_MAX) {
+ fprintf(stderr, "invalid dir item type: %d\n",
+ (int)type);
+ return 1;
+ }
+
+ if (type == BTRFS_FT_XATTR)
+ namelen = XATTR_NAME_MAX;
+
+ if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
+ fprintf(stderr, "invalid dir item name len: %u\n",
+ (unsigned)btrfs_dir_data_len(leaf, dir_item));
+ return 1;
+ }
+
+ /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
+ if ((btrfs_dir_data_len(leaf, dir_item) +
+ btrfs_dir_name_len(leaf, dir_item)) > BTRFS_MAX_XATTR_SIZE(root)) {
+ fprintf(stderr, "invalid dir item name + data len: %u + %u\n",
+ (unsigned)btrfs_dir_name_len(leaf, dir_item),
+ (unsigned)btrfs_dir_data_len(leaf, dir_item));
+ return 1;
+ }
+
+ return 0;
+}
+
+static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+ struct btrfs_path *path,
+ const char *name, int name_len)
+{
+ struct btrfs_dir_item *dir_item;
+ unsigned long name_ptr;
+ u32 total_len;
+ u32 cur = 0;
+ u32 this_len;
+ struct extent_buffer *leaf;
+
+ leaf = path->nodes[0];
+ dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
+ total_len = btrfs_item_size_nr(leaf, path->slots[0]);
+ if (verify_dir_item(root, leaf, dir_item))
+ return NULL;
+
+ while(cur < total_len) {
+ this_len = sizeof(*dir_item) +
+ btrfs_dir_name_len(leaf, dir_item) +
+ btrfs_dir_data_len(leaf, dir_item);
+ if (this_len > (total_len - cur)) {
+ fprintf(stderr, "invalid dir item size\n");
+ return NULL;
+ }
+
+ name_ptr = (unsigned long)(dir_item + 1);
+
+ if (btrfs_dir_name_len(leaf, dir_item) == name_len &&
+ memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
+ return dir_item;
+
+ cur += this_len;
+ dir_item = (struct btrfs_dir_item *)((char *)dir_item +
+ this_len);
+ }
+ return NULL;
+}
diff --git a/dir-test.c b/dir-test.c
new file mode 100644
index 00000000..a54b7773
--- /dev/null
+++ b/dir-test.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "kerncompat.h"
+#include "radix-tree.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "hash.h"
+#include "transaction.h"
+
+int keep_running = 1;
+struct btrfs_super_block super;
+static u64 dir_oid = 0;
+static u64 file_oid = 33778;
+
+static int find_num(struct radix_tree_root *root, unsigned long *num_ret,
+ int exists)
+{
+ unsigned long num = rand();
+ unsigned long res[2];
+ int ret;
+
+again:
+ ret = radix_tree_gang_lookup(root, (void **)res, num, 2);
+ if (exists) {
+ if (ret == 0)
+ return -1;
+ num = res[0];
+ } else if (ret != 0 && num == res[0]) {
+ num++;
+ if (ret > 1 && num == res[1]) {
+ num++;
+ goto again;
+ }
+ }
+ *num_ret = num;
+ return 0;
+}
+
+static void initial_inode_init(struct btrfs_root *root,
+ struct btrfs_inode_item *inode_item)
+{
+ memset(inode_item, 0, sizeof(*inode_item));
+ btrfs_set_inode_generation(inode_item, root->fs_info->generation);
+ btrfs_set_inode_mode(inode_item, S_IFREG | 0700);
+}
+
+static int ins_one(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct radix_tree_root *radix)
+{
+ int ret;
+ char buf[128];
+ unsigned long oid;
+ u64 objectid;
+ struct btrfs_path path;
+ struct btrfs_key inode_map;
+ struct btrfs_inode_item inode_item;
+
+ find_num(radix, &oid, 0);
+ sprintf(buf, "str-%lu", oid);
+
+ ret = btrfs_find_free_objectid(trans, root, dir_oid + 1, &objectid);
+ if (ret)
+ goto error;
+
+ inode_map.objectid = objectid;
+ inode_map.flags = 0;
+ btrfs_set_key_type(&inode_map, BTRFS_INODE_ITEM_KEY);
+ inode_map.offset = 0;
+
+ initial_inode_init(root, &inode_item);
+ ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
+ if (ret)
+ goto error;
+ ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf), dir_oid,
+ &inode_map, BTRFS_FT_UNKNOWN);
+ if (ret)
+ goto error;
+
+ radix_tree_preload(GFP_KERNEL);
+ ret = radix_tree_insert(radix, oid, (void *)oid);
+ radix_tree_preload_end();
+ if (ret)
+ goto error;
+ return ret;
+error:
+ if (ret != -EEXIST)
+ goto fatal;
+
+ /*
+ * if we got an EEXIST, it may be due to hash collision, double
+ * check
+ */
+ btrfs_init_path(&path);
+ ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf,
+ strlen(buf), 0);
+ if (ret)
+ goto fatal_release;
+ if (!btrfs_match_dir_item_name(root, &path, buf, strlen(buf))) {
+ struct btrfs_dir_item *di;
+ char *found;
+ u32 found_len;
+ u64 myhash;
+ u64 foundhash;
+
+ di = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0],
+ struct btrfs_dir_item);
+ found = (char *)(di + 1);
+ found_len = btrfs_dir_name_len(di);
+ myhash = btrfs_name_hash(buf, strlen(buf));
+ foundhash = btrfs_name_hash(found, found_len);
+ if (myhash != foundhash)
+ goto fatal_release;
+ btrfs_release_path(&path);
+ return 0;
+ }
+fatal_release:
+ btrfs_release_path(&path);
+fatal:
+ printf("failed to insert %lu ret %d\n", oid, ret);
+ return ret;
+}
+
+static int insert_dup(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct radix_tree_root *radix)
+{
+ int ret;
+ char buf[128];
+ unsigned long oid;
+ struct btrfs_key key;
+
+ ret = find_num(radix, &oid, 1);
+ if (ret < 0)
+ return 0;
+ sprintf(buf, "str-%lu", oid);
+
+ key.objectid = file_oid;
+ key.flags = 0;
+ btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+ key.offset = 0;
+ ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf), dir_oid,
+ &key, BTRFS_FT_UNKNOWN);
+ if (ret != -EEXIST) {
+ printf("insert on %s gave us %d\n", buf, ret);
+ return 1;
+ }
+ return 0;
+}
+
+static int del_dir_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct radix_tree_root *radix,
+ unsigned long radix_index,
+ struct btrfs_path *path)
+{
+ int ret;
+ unsigned long *ptr;
+ u64 file_objectid;
+ struct btrfs_dir_item *di;
+
+ /* find the inode number of the file */
+ di = btrfs_item_ptr(&path->nodes[0]->leaf, path->slots[0],
+ struct btrfs_dir_item);
+ file_objectid = btrfs_disk_key_objectid(&di->location);
+
+ /* delete the directory item */
+ ret = btrfs_del_item(trans, root, path);
+ if (ret)
+ goto out_release;
+ btrfs_release_path(path);
+
+ /* delete the inode */
+ btrfs_init_path(path);
+ ret = btrfs_lookup_inode(trans, root, path, file_objectid, -1);
+ if (ret)
+ goto out_release;
+ ret = btrfs_del_item(trans, root, path);
+ if (ret)
+ goto out_release;
+ btrfs_release_path(path);
+
+ if (root->fs_info->last_inode_alloc > file_objectid)
+ root->fs_info->last_inode_alloc = file_objectid;
+ ptr = radix_tree_delete(radix, radix_index);
+ if (!ptr) {
+ ret = -5555;
+ goto out;
+ }
+ return 0;
+out_release:
+ btrfs_release_path(path);
+out:
+ printf("failed to delete %lu %d\n", radix_index, ret);
+ return ret;
+}
+
+static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct radix_tree_root *radix)
+{
+ int ret;
+ char buf[128];
+ unsigned long oid;
+ struct btrfs_path path;
+
+ ret = find_num(radix, &oid, 1);
+ if (ret < 0)
+ return 0;
+ sprintf(buf, "str-%lu", oid);
+ btrfs_init_path(&path);
+ ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf,
+ strlen(buf), -1);
+ if (ret)
+ goto out_release;
+
+ ret = del_dir_item(trans, root, radix, oid, &path);
+ if (ret)
+ goto out_release;
+ return ret;
+out_release:
+ btrfs_release_path(&path);
+ printf("failed to delete %lu %d\n", oid, ret);
+ return ret;
+}
+
+static int lookup_item(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct radix_tree_root *radix)
+{
+ struct btrfs_path path;
+ char buf[128];
+ int ret;
+ unsigned long oid;
+ u64 objectid;
+ struct btrfs_dir_item *di;
+
+ ret = find_num(radix, &oid, 1);
+ if (ret < 0)
+ return 0;
+ sprintf(buf, "str-%lu", oid);
+ btrfs_init_path(&path);
+ ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf,
+ strlen(buf), 0);
+ if (!ret) {
+ di = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0],
+ struct btrfs_dir_item);
+ objectid = btrfs_disk_key_objectid(&di->location);
+ }
+ btrfs_release_path(&path);
+ if (ret) {
+ printf("unable to find key %lu\n", oid);
+ return ret;
+ }
+ return 0;
+}
+
+static int lookup_enoent(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct radix_tree_root *radix)
+{
+ struct btrfs_path path;
+ char buf[128];
+ int ret;
+ unsigned long oid;
+
+ ret = find_num(radix, &oid, 0);
+ if (ret < 0)
+ return 0;
+ sprintf(buf, "str-%lu", oid);
+ btrfs_init_path(&path);
+ ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf,
+ strlen(buf), 0);
+ btrfs_release_path(&path);
+ if (!ret) {
+ printf("able to find key that should not exist %lu\n", oid);
+ return ret;
+ }
+ return 0;
+}
+
+static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct radix_tree_root *radix, int nr)
+{
+ struct btrfs_path path;
+ struct btrfs_key key;
+ unsigned long found = 0;
+ u32 found_len;
+ int ret;
+ int slot;
+ int count = 0;
+ char buf[128];
+ struct btrfs_dir_item *di;
+
+ key.offset = (u64)-1;
+ key.flags = 0;
+ btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
+ key.objectid = dir_oid;
+ while(nr-- >= 0) {
+ btrfs_init_path(&path);
+ ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
+ if (ret < 0) {
+ btrfs_release_path(&path);
+ return ret;
+ }
+ if (ret != 0) {
+ if (path.slots[0] == 0) {
+ btrfs_release_path(&path);
+ break;
+ }
+ path.slots[0] -= 1;
+ }
+ slot = path.slots[0];
+ di = btrfs_item_ptr(&path.nodes[0]->leaf, slot,
+ struct btrfs_dir_item);
+ found_len = btrfs_dir_name_len(di);
+ memcpy(buf, (char *)(di + 1), found_len);
+ BUG_ON(found_len > 128);
+ buf[found_len] = '\0';
+ found = atoi(buf + 4);
+ ret = del_dir_item(trans, root, radix, found, &path);
+ count++;
+ if (ret) {
+ fprintf(stderr,
+ "failed to remove %lu from tree\n",
+ found);
+ return ret;
+ }
+ if (!keep_running)
+ break;
+ }
+ return 0;
+ fprintf(stderr, "failed to delete from the radix %lu\n", found);
+ return ret;
+}
+
+static int fill_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct radix_tree_root *radix, int count)
+{
+ int i;
+ int ret = 0;
+ for (i = 0; i < count; i++) {
+ ret = ins_one(trans, root, radix);
+ if (ret) {
+ fprintf(stderr, "fill failed\n");
+ goto out;
+ }
+ if (i % 1000 == 0) {
+ ret = btrfs_commit_transaction(trans, root, &super);
+ if (ret) {
+ fprintf(stderr, "fill commit failed\n");
+ return ret;
+ }
+ }
+ if (i && i % 10000 == 0) {
+ printf("bigfill %d\n", i);
+ }
+ if (!keep_running)
+ break;
+ }
+out:
+ return ret;
+}
+
+static int bulk_op(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct radix_tree_root *radix)
+{
+ int ret;
+ int nr = rand() % 5000;
+ static int run_nr = 0;
+
+ /* do the bulk op much less frequently */
+ if (run_nr++ % 100)
+ return 0;
+ ret = empty_tree(trans, root, radix, nr);
+ if (ret)
+ return ret;
+ ret = fill_tree(trans, root, radix, nr);
+ if (ret)
+ return ret;
+ return 0;
+}
+
+
+int (*ops[])(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct
+ radix_tree_root *radix) =
+ { ins_one, insert_dup, del_one, lookup_item,
+ lookup_enoent, bulk_op };
+
+void sigstopper(int ignored)
+{
+ keep_running = 0;
+ fprintf(stderr, "caught exit signal, stopping\n");
+}
+
+int print_usage(void)
+{
+ printf("usage: tester [-ih] [-c count] [-f count]\n");
+ printf("\t -c count -- iteration count after filling\n");
+ printf("\t -f count -- run this many random inserts before starting\n");
+ printf("\t -i -- only do initial fill\n");
+ printf("\t -h -- this help text\n");
+ exit(1);
+}
+int main(int ac, char **av)
+{
+ RADIX_TREE(radix, GFP_KERNEL);
+ struct btrfs_root *root;
+ int i;
+ int ret;
+ int count;
+ int op;
+ int iterations = 20000;
+ int init_fill_count = 800000;
+ int err = 0;
+ int initial_only = 0;
+ struct btrfs_trans_handle *trans;
+ radix_tree_init();
+
+ root = open_ctree(av[ac-1], &super, 0);
+
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ return 1;
+ }
+
+ trans = btrfs_start_transaction(root, 1);
+
+ dir_oid = btrfs_super_root_dir(&super);
+
+ signal(SIGTERM, sigstopper);
+ signal(SIGINT, sigstopper);
+
+ for (i = 1 ; i < ac - 1; i++) {
+ if (strcmp(av[i], "-i") == 0) {
+ initial_only = 1;
+ } else if (strcmp(av[i], "-c") == 0) {
+ iterations = atoi(av[i+1]);
+ i++;
+ } else if (strcmp(av[i], "-f") == 0) {
+ init_fill_count = atoi(av[i+1]);
+ i++;
+ } else {
+ print_usage();
+ }
+ }
+ printf("initial fill\n");
+ ret = fill_tree(trans, root, &radix, init_fill_count);
+ printf("starting run\n");
+ if (ret) {
+ err = ret;
+ goto out;
+ }
+ if (initial_only == 1) {
+ goto out;
+ }
+ for (i = 0; i < iterations; i++) {
+ op = rand() % ARRAY_SIZE(ops);
+ count = rand() % 128;
+ if (i % 2000 == 0) {
+ printf("%d\n", i);
+ fflush(stdout);
+ }
+ if (i && i % 5000 == 0) {
+ printf("open & close, root level %d nritems %d\n",
+ btrfs_header_level(&root->node->node.header),
+ btrfs_header_nritems(&root->node->node.header));
+ close_ctree(root, &super);
+ root = open_ctree("dbfile", &super, 0);
+
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ return 1;
+ }
+ }
+ while(count--) {
+ ret = ops[op](trans, root, &radix);
+ if (ret) {
+ fprintf(stderr, "op %d failed %d:%d\n",
+ op, i, iterations);
+ btrfs_print_tree(root, root->node, 1);
+ fprintf(stderr, "op %d failed %d:%d\n",
+ op, i, iterations);
+ err = ret;
+ goto out;
+ }
+ if (ops[op] == bulk_op)
+ break;
+ if (keep_running == 0) {
+ err = 0;
+ goto out;
+ }
+ }
+ }
+out:
+ close_ctree(root, &super);
+ return !!err;
+}
+
diff --git a/disk-io.c b/disk-io.c
new file mode 100644
index 00000000..e520d80d
--- /dev/null
+++ b/disk-io.c
@@ -0,0 +1,1750 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <uuid/uuid.h>
+#include "kerncompat.h"
+#include "radix-tree.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "volumes.h"
+#include "transaction.h"
+#include "crc32c.h"
+#include "utils.h"
+#include "print-tree.h"
+#include "rbtree-utils.h"
+
+/* specified errno for check_tree_block */
+#define BTRFS_BAD_BYTENR (-1)
+#define BTRFS_BAD_FSID (-2)
+#define BTRFS_BAD_LEVEL (-3)
+#define BTRFS_BAD_NRITEMS (-4)
+
+/* Calculate max possible nritems for a leaf/node */
+static u32 max_nritems(u8 level, u32 nodesize)
+{
+
+ if (level == 0)
+ return ((nodesize - sizeof(struct btrfs_header)) /
+ sizeof(struct btrfs_item));
+ return ((nodesize - sizeof(struct btrfs_header)) /
+ sizeof(struct btrfs_key_ptr));
+}
+
+static int check_tree_block(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *buf)
+{
+
+ struct btrfs_fs_devices *fs_devices;
+ u32 leafsize = btrfs_super_leafsize(fs_info->super_copy);
+ int ret = BTRFS_BAD_FSID;
+
+ if (buf->start != btrfs_header_bytenr(buf))
+ return BTRFS_BAD_BYTENR;
+ if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL)
+ return BTRFS_BAD_LEVEL;
+ if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf),
+ leafsize))
+ return BTRFS_BAD_NRITEMS;
+
+ fs_devices = fs_info->fs_devices;
+ while (fs_devices) {
+ if (fs_info->ignore_fsid_mismatch ||
+ !memcmp_extent_buffer(buf, fs_devices->fsid,
+ btrfs_header_fsid(),
+ BTRFS_FSID_SIZE)) {
+ ret = 0;
+ break;
+ }
+ fs_devices = fs_devices->seed;
+ }
+ return ret;
+}
+
+static void print_tree_block_error(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb,
+ int err)
+{
+ char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
+ char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
+ u8 buf[BTRFS_UUID_SIZE];
+
+ switch (err) {
+ case BTRFS_BAD_FSID:
+ read_extent_buffer(eb, buf, btrfs_header_fsid(),
+ BTRFS_UUID_SIZE);
+ uuid_unparse(buf, found_uuid);
+ uuid_unparse(fs_info->fsid, fs_uuid);
+ fprintf(stderr, "fsid mismatch, want=%s, have=%s\n",
+ fs_uuid, found_uuid);
+ break;
+ case BTRFS_BAD_BYTENR:
+ fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n",
+ eb->start, btrfs_header_bytenr(eb));
+ break;
+ case BTRFS_BAD_LEVEL:
+ fprintf(stderr, "bad level, %u > %u\n",
+ btrfs_header_level(eb), BTRFS_MAX_LEVEL);
+ break;
+ case BTRFS_BAD_NRITEMS:
+ fprintf(stderr, "invalid nr_items: %u\n",
+ btrfs_header_nritems(eb));
+ break;
+ }
+}
+
+u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
+{
+ return crc32c(seed, data, len);
+}
+
+void btrfs_csum_final(u32 crc, char *result)
+{
+ *(__le32 *)result = ~cpu_to_le32(crc);
+}
+
+static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
+ int verify, int silent)
+{
+ char result[BTRFS_CSUM_SIZE];
+ u32 len;
+ u32 crc = ~(u32)0;
+
+ len = buf->len - BTRFS_CSUM_SIZE;
+ crc = crc32c(crc, buf->data + BTRFS_CSUM_SIZE, len);
+ btrfs_csum_final(crc, result);
+
+ if (verify) {
+ if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
+ if (!silent)
+ printk("checksum verify failed on %llu found %08X wanted %08X\n",
+ (unsigned long long)buf->start,
+ *((u32 *)result),
+ *((u32*)(char *)buf->data));
+ return 1;
+ }
+ } else {
+ write_extent_buffer(buf, result, 0, csum_size);
+ }
+ return 0;
+}
+
+int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify)
+{
+ return __csum_tree_block_size(buf, csum_size, verify, 0);
+}
+
+int verify_tree_block_csum_silent(struct extent_buffer *buf, u16 csum_size)
+{
+ return __csum_tree_block_size(buf, csum_size, 1, 1);
+}
+
+static int csum_tree_block_fs_info(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *buf, int verify)
+{
+ u16 csum_size =
+ btrfs_super_csum_size(fs_info->super_copy);
+ if (verify && fs_info->suppress_check_block_errors)
+ return verify_tree_block_csum_silent(buf, csum_size);
+ return csum_tree_block_size(buf, csum_size, verify);
+}
+
+int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
+ int verify)
+{
+ return csum_tree_block_fs_info(root->fs_info, buf, verify);
+}
+
+struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
+ u64 bytenr, u32 blocksize)
+{
+ return find_extent_buffer(&root->fs_info->extent_cache,
+ bytenr, blocksize);
+}
+
+struct extent_buffer* btrfs_find_create_tree_block(
+ struct btrfs_fs_info *fs_info, u64 bytenr, u32 blocksize)
+{
+ return alloc_extent_buffer(&fs_info->extent_cache, bytenr, blocksize);
+}
+
+void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
+ u64 parent_transid)
+{
+ struct extent_buffer *eb;
+ u64 length;
+ struct btrfs_multi_bio *multi = NULL;
+ struct btrfs_device *device;
+
+ eb = btrfs_find_tree_block(root, bytenr, blocksize);
+ if (!(eb && btrfs_buffer_uptodate(eb, parent_transid)) &&
+ !btrfs_map_block(&root->fs_info->mapping_tree, READ,
+ bytenr, &length, &multi, 0, NULL)) {
+ device = multi->stripes[0].dev;
+ device->total_ios++;
+ blocksize = min(blocksize, (u32)(64 * 1024));
+ readahead(device->fd, multi->stripes[0].physical, blocksize);
+ }
+
+ free_extent_buffer(eb);
+ kfree(multi);
+}
+
+static int verify_parent_transid(struct extent_io_tree *io_tree,
+ struct extent_buffer *eb, u64 parent_transid,
+ int ignore)
+{
+ int ret;
+
+ if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
+ return 0;
+
+ if (extent_buffer_uptodate(eb) &&
+ btrfs_header_generation(eb) == parent_transid) {
+ ret = 0;
+ goto out;
+ }
+ printk("parent transid verify failed on %llu wanted %llu found %llu\n",
+ (unsigned long long)eb->start,
+ (unsigned long long)parent_transid,
+ (unsigned long long)btrfs_header_generation(eb));
+ if (ignore) {
+ eb->flags |= EXTENT_BAD_TRANSID;
+ printk("Ignoring transid failure\n");
+ return 0;
+ }
+
+ ret = 1;
+out:
+ clear_extent_buffer_uptodate(io_tree, eb);
+ return ret;
+
+}
+
+
+int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror)
+{
+ unsigned long offset = 0;
+ struct btrfs_multi_bio *multi = NULL;
+ struct btrfs_device *device;
+ int ret = 0;
+ u64 read_len;
+ unsigned long bytes_left = eb->len;
+
+ while (bytes_left) {
+ read_len = bytes_left;
+ device = NULL;
+
+ if (!info->on_restoring &&
+ eb->start != BTRFS_SUPER_INFO_OFFSET) {
+ ret = btrfs_map_block(&info->mapping_tree, READ,
+ eb->start + offset, &read_len, &multi,
+ mirror, NULL);
+ if (ret) {
+ printk("Couldn't map the block %Lu\n", eb->start + offset);
+ kfree(multi);
+ return -EIO;
+ }
+ device = multi->stripes[0].dev;
+
+ if (device->fd <= 0) {
+ kfree(multi);
+ return -EIO;
+ }
+
+ eb->fd = device->fd;
+ device->total_ios++;
+ eb->dev_bytenr = multi->stripes[0].physical;
+ kfree(multi);
+ multi = NULL;
+ } else {
+ /* special case for restore metadump */
+ list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
+ if (device->devid == 1)
+ break;
+ }
+
+ eb->fd = device->fd;
+ eb->dev_bytenr = eb->start;
+ device->total_ios++;
+ }
+
+ if (read_len > bytes_left)
+ read_len = bytes_left;
+
+ ret = read_extent_from_disk(eb, offset, read_len);
+ if (ret)
+ return -EIO;
+ offset += read_len;
+ bytes_left -= read_len;
+ }
+ return 0;
+}
+
+struct extent_buffer* read_tree_block_fs_info(
+ struct btrfs_fs_info *fs_info, u64 bytenr, u32 blocksize,
+ u64 parent_transid)
+{
+ int ret;
+ struct extent_buffer *eb;
+ u64 best_transid = 0;
+ int mirror_num = 0;
+ int good_mirror = 0;
+ int num_copies;
+ int ignore = 0;
+
+ eb = btrfs_find_create_tree_block(fs_info, bytenr, blocksize);
+ if (!eb)
+ return ERR_PTR(-ENOMEM);
+
+ if (btrfs_buffer_uptodate(eb, parent_transid))
+ return eb;
+
+ while (1) {
+ ret = read_whole_eb(fs_info, eb, mirror_num);
+ if (ret == 0 && csum_tree_block_fs_info(fs_info, eb, 1) == 0 &&
+ check_tree_block(fs_info, eb) == 0 &&
+ verify_parent_transid(eb->tree, eb, parent_transid, ignore)
+ == 0) {
+ if (eb->flags & EXTENT_BAD_TRANSID &&
+ list_empty(&eb->recow)) {
+ list_add_tail(&eb->recow,
+ &fs_info->recow_ebs);
+ eb->refs++;
+ }
+ btrfs_set_buffer_uptodate(eb);
+ return eb;
+ }
+ if (ignore) {
+ if (check_tree_block(fs_info, eb)) {
+ if (!fs_info->suppress_check_block_errors)
+ print_tree_block_error(fs_info, eb,
+ check_tree_block(fs_info, eb));
+ } else {
+ if (!fs_info->suppress_check_block_errors)
+ fprintf(stderr, "Csum didn't match\n");
+ }
+ ret = -EIO;
+ break;
+ }
+ num_copies = btrfs_num_copies(&fs_info->mapping_tree,
+ eb->start, eb->len);
+ if (num_copies == 1) {
+ ignore = 1;
+ continue;
+ }
+ if (btrfs_header_generation(eb) > best_transid && mirror_num) {
+ best_transid = btrfs_header_generation(eb);
+ good_mirror = mirror_num;
+ }
+ mirror_num++;
+ if (mirror_num > num_copies) {
+ mirror_num = good_mirror;
+ ignore = 1;
+ continue;
+ }
+ }
+ free_extent_buffer(eb);
+ return ERR_PTR(ret);
+}
+
+int read_extent_data(struct btrfs_root *root, char *data,
+ u64 logical, u64 *len, int mirror)
+{
+ u64 offset = 0;
+ struct btrfs_multi_bio *multi = NULL;
+ struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_device *device;
+ int ret = 0;
+ u64 max_len = *len;
+
+ ret = btrfs_map_block(&info->mapping_tree, READ, logical, len,
+ &multi, mirror, NULL);
+ if (ret) {
+ fprintf(stderr, "Couldn't map the block %llu\n",
+ logical + offset);
+ goto err;
+ }
+ device = multi->stripes[0].dev;
+
+ if (device->fd <= 0)
+ goto err;
+ if (*len > max_len)
+ *len = max_len;
+
+ ret = pread64(device->fd, data, *len, multi->stripes[0].physical);
+ if (ret != *len)
+ ret = -EIO;
+ else
+ ret = 0;
+err:
+ kfree(multi);
+ return ret;
+}
+
+int write_and_map_eb(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *eb)
+{
+ int ret;
+ int dev_nr;
+ u64 length;
+ u64 *raid_map = NULL;
+ struct btrfs_multi_bio *multi = NULL;
+
+ dev_nr = 0;
+ length = eb->len;
+ ret = btrfs_map_block(&root->fs_info->mapping_tree, WRITE,
+ eb->start, &length, &multi, 0, &raid_map);
+
+ if (raid_map) {
+ ret = write_raid56_with_parity(root->fs_info, eb, multi,
+ length, raid_map);
+ BUG_ON(ret);
+ } else while (dev_nr < multi->num_stripes) {
+ BUG_ON(ret);
+ eb->fd = multi->stripes[dev_nr].dev->fd;
+ eb->dev_bytenr = multi->stripes[dev_nr].physical;
+ multi->stripes[dev_nr].dev->total_ios++;
+ dev_nr++;
+ ret = write_extent_to_disk(eb);
+ BUG_ON(ret);
+ }
+ kfree(raid_map);
+ kfree(multi);
+ return 0;
+}
+
+int write_tree_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *eb)
+{
+ if (check_tree_block(root->fs_info, eb)) {
+ print_tree_block_error(root->fs_info, eb,
+ check_tree_block(root->fs_info, eb));
+ BUG();
+ }
+
+ if (trans && !btrfs_buffer_uptodate(eb, trans->transid))
+ BUG();
+
+ btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
+ csum_tree_block(root, eb, 0);
+
+ return write_and_map_eb(trans, root, eb);
+}
+
+int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
+ u32 stripesize, struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info, u64 objectid)
+{
+ root->node = NULL;
+ root->commit_root = NULL;
+ root->sectorsize = sectorsize;
+ root->nodesize = nodesize;
+ root->leafsize = leafsize;
+ root->stripesize = stripesize;
+ root->ref_cows = 0;
+ root->track_dirty = 0;
+
+ root->fs_info = fs_info;
+ root->objectid = objectid;
+ root->last_trans = 0;
+ root->highest_inode = 0;
+ root->last_inode_alloc = 0;
+
+ INIT_LIST_HEAD(&root->dirty_list);
+ INIT_LIST_HEAD(&root->orphan_data_extents);
+ memset(&root->root_key, 0, sizeof(root->root_key));
+ memset(&root->root_item, 0, sizeof(root->root_item));
+ root->root_key.objectid = objectid;
+ return 0;
+}
+
+static int update_cowonly_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ int ret;
+ u64 old_root_bytenr;
+ struct btrfs_root *tree_root = root->fs_info->tree_root;
+
+ btrfs_write_dirty_block_groups(trans, root);
+ while(1) {
+ old_root_bytenr = btrfs_root_bytenr(&root->root_item);
+ if (old_root_bytenr == root->node->start)
+ break;
+ btrfs_set_root_bytenr(&root->root_item,
+ root->node->start);
+ btrfs_set_root_generation(&root->root_item,
+ trans->transid);
+ root->root_item.level = btrfs_header_level(root->node);
+ ret = btrfs_update_root(trans, tree_root,
+ &root->root_key,
+ &root->root_item);
+ BUG_ON(ret);
+ btrfs_write_dirty_block_groups(trans, root);
+ }
+ return 0;
+}
+
+static int commit_tree_roots(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_root *root;
+ struct list_head *next;
+ struct extent_buffer *eb;
+ int ret;
+
+ if (fs_info->readonly)
+ return 0;
+
+ eb = fs_info->tree_root->node;
+ extent_buffer_get(eb);
+ ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb);
+ free_extent_buffer(eb);
+ if (ret)
+ return ret;
+
+ while(!list_empty(&fs_info->dirty_cowonly_roots)) {
+ next = fs_info->dirty_cowonly_roots.next;
+ list_del_init(next);
+ root = list_entry(next, struct btrfs_root, dirty_list);
+ update_cowonly_root(trans, root);
+ free_extent_buffer(root->commit_root);
+ root->commit_root = NULL;
+ }
+
+ return 0;
+}
+
+static int __commit_transaction(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ u64 start;
+ u64 end;
+ struct extent_buffer *eb;
+ struct extent_io_tree *tree = &root->fs_info->extent_cache;
+ int ret;
+
+ while(1) {
+ ret = find_first_extent_bit(tree, 0, &start, &end,
+ EXTENT_DIRTY);
+ if (ret)
+ break;
+ while(start <= end) {
+ eb = find_first_extent_buffer(tree, start);
+ BUG_ON(!eb || eb->start != start);
+ ret = write_tree_block(trans, root, eb);
+ BUG_ON(ret);
+ start += eb->len;
+ clear_extent_buffer_dirty(eb);
+ free_extent_buffer(eb);
+ }
+ }
+ return 0;
+}
+
+int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ u64 transid = trans->transid;
+ int ret = 0;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+
+ if (root->commit_root == root->node)
+ goto commit_tree;
+ if (root == root->fs_info->tree_root)
+ goto commit_tree;
+ if (root == root->fs_info->chunk_root)
+ goto commit_tree;
+
+ free_extent_buffer(root->commit_root);
+ root->commit_root = NULL;
+
+ btrfs_set_root_bytenr(&root->root_item, root->node->start);
+ btrfs_set_root_generation(&root->root_item, trans->transid);
+ root->root_item.level = btrfs_header_level(root->node);
+ ret = btrfs_update_root(trans, root->fs_info->tree_root,
+ &root->root_key, &root->root_item);
+ BUG_ON(ret);
+commit_tree:
+ ret = commit_tree_roots(trans, fs_info);
+ BUG_ON(ret);
+ ret = __commit_transaction(trans, root);
+ BUG_ON(ret);
+ write_ctree_super(trans, root);
+ btrfs_finish_extent_commit(trans, fs_info->extent_root,
+ &fs_info->pinned_extents);
+ btrfs_free_transaction(root, trans);
+ free_extent_buffer(root->commit_root);
+ root->commit_root = NULL;
+ fs_info->running_transaction = NULL;
+ fs_info->last_trans_committed = transid;
+ return 0;
+}
+
+static int find_and_setup_root(struct btrfs_root *tree_root,
+ struct btrfs_fs_info *fs_info,
+ u64 objectid, struct btrfs_root *root)
+{
+ int ret;
+ u32 blocksize;
+ u64 generation;
+
+ __setup_root(tree_root->nodesize, tree_root->leafsize,
+ tree_root->sectorsize, tree_root->stripesize,
+ root, fs_info, objectid);
+ ret = btrfs_find_last_root(tree_root, objectid,
+ &root->root_item, &root->root_key);
+ if (ret)
+ return ret;
+
+ blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
+ generation = btrfs_root_generation(&root->root_item);
+ root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
+ blocksize, generation);
+ if (!extent_buffer_uptodate(root->node))
+ return -EIO;
+
+ return 0;
+}
+
+static int find_and_setup_log_root(struct btrfs_root *tree_root,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_super_block *disk_super)
+{
+ u32 blocksize;
+ u64 blocknr = btrfs_super_log_root(disk_super);
+ struct btrfs_root *log_root = malloc(sizeof(struct btrfs_root));
+
+ if (!log_root)
+ return -ENOMEM;
+
+ if (blocknr == 0) {
+ free(log_root);
+ return 0;
+ }
+
+ blocksize = btrfs_level_size(tree_root,
+ btrfs_super_log_root_level(disk_super));
+
+ __setup_root(tree_root->nodesize, tree_root->leafsize,
+ tree_root->sectorsize, tree_root->stripesize,
+ log_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
+
+ log_root->node = read_tree_block(tree_root, blocknr,
+ blocksize,
+ btrfs_super_generation(disk_super) + 1);
+
+ fs_info->log_root_tree = log_root;
+
+ if (!extent_buffer_uptodate(log_root->node)) {
+ free_extent_buffer(log_root->node);
+ free(log_root);
+ fs_info->log_root_tree = NULL;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int btrfs_free_fs_root(struct btrfs_root *root)
+{
+ if (root->node)
+ free_extent_buffer(root->node);
+ if (root->commit_root)
+ free_extent_buffer(root->commit_root);
+ kfree(root);
+ return 0;
+}
+
+static void __free_fs_root(struct rb_node *node)
+{
+ struct btrfs_root *root;
+
+ root = container_of(node, struct btrfs_root, rb_node);
+ btrfs_free_fs_root(root);
+}
+
+FREE_RB_BASED_TREE(fs_roots, __free_fs_root);
+
+struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info,
+ struct btrfs_key *location)
+{
+ struct btrfs_root *root;
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_path *path;
+ struct extent_buffer *l;
+ u64 generation;
+ u32 blocksize;
+ int ret = 0;
+
+ root = calloc(1, sizeof(*root));
+ if (!root)
+ return ERR_PTR(-ENOMEM);
+ if (location->offset == (u64)-1) {
+ ret = find_and_setup_root(tree_root, fs_info,
+ location->objectid, root);
+ if (ret) {
+ free(root);
+ return ERR_PTR(ret);
+ }
+ goto insert;
+ }
+
+ __setup_root(tree_root->nodesize, tree_root->leafsize,
+ tree_root->sectorsize, tree_root->stripesize,
+ root, fs_info, location->objectid);
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+ ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
+ if (ret != 0) {
+ if (ret > 0)
+ ret = -ENOENT;
+ goto out;
+ }
+ l = path->nodes[0];
+ read_extent_buffer(l, &root->root_item,
+ btrfs_item_ptr_offset(l, path->slots[0]),
+ sizeof(root->root_item));
+ memcpy(&root->root_key, location, sizeof(*location));
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ if (ret) {
+ free(root);
+ return ERR_PTR(ret);
+ }
+ generation = btrfs_root_generation(&root->root_item);
+ blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
+ root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
+ blocksize, generation);
+ if (!extent_buffer_uptodate(root->node)) {
+ free(root);
+ return ERR_PTR(-EIO);
+ }
+insert:
+ root->ref_cows = 1;
+ return root;
+}
+
+static int btrfs_fs_roots_compare_objectids(struct rb_node *node,
+ void *data)
+{
+ u64 objectid = *((u64 *)data);
+ struct btrfs_root *root;
+
+ root = rb_entry(node, struct btrfs_root, rb_node);
+ if (objectid > root->objectid)
+ return 1;
+ else if (objectid < root->objectid)
+ return -1;
+ else
+ return 0;
+}
+
+static int btrfs_fs_roots_compare_roots(struct rb_node *node1,
+ struct rb_node *node2)
+{
+ struct btrfs_root *root;
+
+ root = rb_entry(node2, struct btrfs_root, rb_node);
+ return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid);
+}
+
+struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
+ struct btrfs_key *location)
+{
+ struct btrfs_root *root;
+ struct rb_node *node;
+ int ret;
+ u64 objectid = location->objectid;
+
+ if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
+ return fs_info->tree_root;
+ if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
+ return fs_info->extent_root;
+ if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
+ return fs_info->chunk_root;
+ if (location->objectid == BTRFS_DEV_TREE_OBJECTID)
+ return fs_info->dev_root;
+ if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
+ return fs_info->csum_root;
+ if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID)
+ return fs_info->quota_root;
+
+ BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID ||
+ location->offset != (u64)-1);
+
+ node = rb_search(&fs_info->fs_root_tree, (void *)&objectid,
+ btrfs_fs_roots_compare_objectids, NULL);
+ if (node)
+ return container_of(node, struct btrfs_root, rb_node);
+
+ root = btrfs_read_fs_root_no_cache(fs_info, location);
+ if (IS_ERR(root))
+ return root;
+
+ ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node,
+ btrfs_fs_roots_compare_roots);
+ BUG_ON(ret);
+ return root;
+}
+
+void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
+{
+ free(fs_info->tree_root);
+ free(fs_info->extent_root);
+ free(fs_info->chunk_root);
+ free(fs_info->dev_root);
+ free(fs_info->csum_root);
+ free(fs_info->quota_root);
+ free(fs_info->free_space_root);
+ free(fs_info->super_copy);
+ free(fs_info->log_root_tree);
+ free(fs_info);
+}
+
+struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr)
+{
+ struct btrfs_fs_info *fs_info;
+
+ fs_info = calloc(1, sizeof(struct btrfs_fs_info));
+ if (!fs_info)
+ return NULL;
+
+ fs_info->tree_root = calloc(1, sizeof(struct btrfs_root));
+ fs_info->extent_root = calloc(1, sizeof(struct btrfs_root));
+ fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root));
+ fs_info->dev_root = calloc(1, sizeof(struct btrfs_root));
+ fs_info->csum_root = calloc(1, sizeof(struct btrfs_root));
+ fs_info->quota_root = calloc(1, sizeof(struct btrfs_root));
+ fs_info->free_space_root = calloc(1, sizeof(struct btrfs_root));
+ fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE);
+
+ if (!fs_info->tree_root || !fs_info->extent_root ||
+ !fs_info->chunk_root || !fs_info->dev_root ||
+ !fs_info->csum_root || !fs_info->quota_root ||
+ !fs_info->free_space_root || !fs_info->super_copy)
+ goto free_all;
+
+ extent_io_tree_init(&fs_info->extent_cache);
+ extent_io_tree_init(&fs_info->free_space_cache);
+ extent_io_tree_init(&fs_info->block_group_cache);
+ extent_io_tree_init(&fs_info->pinned_extents);
+ extent_io_tree_init(&fs_info->pending_del);
+ extent_io_tree_init(&fs_info->extent_ins);
+ fs_info->excluded_extents = NULL;
+
+ fs_info->fs_root_tree = RB_ROOT;
+ cache_tree_init(&fs_info->mapping_tree.cache_tree);
+
+ mutex_init(&fs_info->fs_mutex);
+ INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
+ INIT_LIST_HEAD(&fs_info->space_info);
+ INIT_LIST_HEAD(&fs_info->recow_ebs);
+
+ if (!writable)
+ fs_info->readonly = 1;
+
+ fs_info->super_bytenr = sb_bytenr;
+ fs_info->data_alloc_profile = (u64)-1;
+ fs_info->metadata_alloc_profile = (u64)-1;
+ fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
+ return fs_info;
+free_all:
+ btrfs_free_fs_info(fs_info);
+ return NULL;
+}
+
+int btrfs_check_fs_compatibility(struct btrfs_super_block *sb, int writable)
+{
+ u64 features;
+
+ features = btrfs_super_incompat_flags(sb) &
+ ~BTRFS_FEATURE_INCOMPAT_SUPP;
+ if (features) {
+ printk("couldn't open because of unsupported "
+ "option features (%Lx).\n",
+ (unsigned long long)features);
+ return -ENOTSUP;
+ }
+
+ features = btrfs_super_incompat_flags(sb);
+ if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
+ features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
+ btrfs_set_super_incompat_flags(sb, features);
+ }
+
+ features = btrfs_super_compat_ro_flags(sb) &
+ ~BTRFS_FEATURE_COMPAT_RO_SUPP;
+ if (writable && features) {
+ printk("couldn't open RDWR because of unsupported "
+ "option features (%Lx).\n",
+ (unsigned long long)features);
+ return -ENOTSUP;
+ }
+ return 0;
+}
+
+static int find_best_backup_root(struct btrfs_super_block *super)
+{
+ struct btrfs_root_backup *backup;
+ u64 orig_gen = btrfs_super_generation(super);
+ u64 gen = 0;
+ int best_index = 0;
+ int i;
+
+ for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
+ backup = super->super_roots + i;
+ if (btrfs_backup_tree_root_gen(backup) != orig_gen &&
+ btrfs_backup_tree_root_gen(backup) > gen) {
+ best_index = i;
+ gen = btrfs_backup_tree_root_gen(backup);
+ }
+ }
+ return best_index;
+}
+
+static int setup_root_or_create_block(struct btrfs_fs_info *fs_info,
+ enum btrfs_open_ctree_flags flags,
+ struct btrfs_root *info_root,
+ u64 objectid, char *str)
+{
+ struct btrfs_super_block *sb = fs_info->super_copy;
+ struct btrfs_root *root = fs_info->tree_root;
+ u32 leafsize = btrfs_super_leafsize(sb);
+ int ret;
+
+ ret = find_and_setup_root(root, fs_info, objectid, info_root);
+ if (ret) {
+ printk("Couldn't setup %s tree\n", str);
+ if (!(flags & OPEN_CTREE_PARTIAL))
+ return -EIO;
+ /*
+ * Need a blank node here just so we don't screw up in the
+ * million of places that assume a root has a valid ->node
+ */
+ info_root->node =
+ btrfs_find_create_tree_block(fs_info, 0, leafsize);
+ if (!info_root->node)
+ return -ENOMEM;
+ clear_extent_buffer_uptodate(NULL, info_root->node);
+ }
+
+ return 0;
+}
+
+int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info, u64 root_tree_bytenr,
+ enum btrfs_open_ctree_flags flags)
+{
+ struct btrfs_super_block *sb = fs_info->super_copy;
+ struct btrfs_root *root;
+ struct btrfs_key key;
+ u32 sectorsize;
+ u32 nodesize;
+ u32 leafsize;
+ u32 stripesize;
+ u64 generation;
+ u32 blocksize;
+ int ret;
+
+ nodesize = btrfs_super_nodesize(sb);
+ leafsize = btrfs_super_leafsize(sb);
+ sectorsize = btrfs_super_sectorsize(sb);
+ stripesize = btrfs_super_stripesize(sb);
+
+ root = fs_info->tree_root;
+ __setup_root(nodesize, leafsize, sectorsize, stripesize,
+ root, fs_info, BTRFS_ROOT_TREE_OBJECTID);
+ blocksize = btrfs_level_size(root, btrfs_super_root_level(sb));
+ generation = btrfs_super_generation(sb);
+
+ if (!root_tree_bytenr && !(flags & OPEN_CTREE_BACKUP_ROOT)) {
+ root_tree_bytenr = btrfs_super_root(sb);
+ } else if (flags & OPEN_CTREE_BACKUP_ROOT) {
+ struct btrfs_root_backup *backup;
+ int index = find_best_backup_root(sb);
+ if (index >= BTRFS_NUM_BACKUP_ROOTS) {
+ fprintf(stderr, "Invalid backup root number\n");
+ return -EIO;
+ }
+ backup = fs_info->super_copy->super_roots + index;
+ root_tree_bytenr = btrfs_backup_tree_root(backup);
+ generation = btrfs_backup_tree_root_gen(backup);
+ }
+
+ root->node = read_tree_block(root, root_tree_bytenr, blocksize,
+ generation);
+ if (!extent_buffer_uptodate(root->node)) {
+ fprintf(stderr, "Couldn't read tree root\n");
+ return -EIO;
+ }
+
+ ret = setup_root_or_create_block(fs_info, flags, fs_info->extent_root,
+ BTRFS_EXTENT_TREE_OBJECTID, "extent");
+ if (ret)
+ return ret;
+ fs_info->extent_root->track_dirty = 1;
+
+ ret = find_and_setup_root(root, fs_info, BTRFS_DEV_TREE_OBJECTID,
+ fs_info->dev_root);
+ if (ret) {
+ printk("Couldn't setup device tree\n");
+ return -EIO;
+ }
+ fs_info->dev_root->track_dirty = 1;
+
+ ret = setup_root_or_create_block(fs_info, flags, fs_info->csum_root,
+ BTRFS_CSUM_TREE_OBJECTID, "csum");
+ if (ret)
+ return ret;
+ fs_info->csum_root->track_dirty = 1;
+
+ ret = find_and_setup_root(root, fs_info, BTRFS_QUOTA_TREE_OBJECTID,
+ fs_info->quota_root);
+ if (ret == 0)
+ fs_info->quota_enabled = 1;
+
+ if (btrfs_fs_compat_ro(fs_info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
+ ret = find_and_setup_root(root, fs_info, BTRFS_FREE_SPACE_TREE_OBJECTID,
+ fs_info->free_space_root);
+ if (ret) {
+ printk("Couldn't read free space tree\n");
+ return -EIO;
+ }
+ fs_info->free_space_root->track_dirty = 1;
+ }
+
+ ret = find_and_setup_log_root(root, fs_info, sb);
+ if (ret) {
+ printk("Couldn't setup log root tree\n");
+ if (!(flags & OPEN_CTREE_PARTIAL))
+ return -EIO;
+ }
+
+ fs_info->generation = generation;
+ fs_info->last_trans_committed = generation;
+ if (extent_buffer_uptodate(fs_info->extent_root->node) &&
+ !(flags & OPEN_CTREE_NO_BLOCK_GROUPS))
+ btrfs_read_block_groups(fs_info->tree_root);
+
+ key.objectid = BTRFS_FS_TREE_OBJECTID;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ fs_info->fs_root = btrfs_read_fs_root(fs_info, &key);
+
+ if (IS_ERR(fs_info->fs_root))
+ return -EIO;
+ return 0;
+}
+
+void btrfs_release_all_roots(struct btrfs_fs_info *fs_info)
+{
+ if (fs_info->free_space_root)
+ free_extent_buffer(fs_info->free_space_root->node);
+ if (fs_info->quota_root)
+ free_extent_buffer(fs_info->quota_root->node);
+ if (fs_info->csum_root)
+ free_extent_buffer(fs_info->csum_root->node);
+ if (fs_info->dev_root)
+ free_extent_buffer(fs_info->dev_root->node);
+ if (fs_info->extent_root)
+ free_extent_buffer(fs_info->extent_root->node);
+ if (fs_info->tree_root)
+ free_extent_buffer(fs_info->tree_root->node);
+ if (fs_info->log_root_tree)
+ free_extent_buffer(fs_info->log_root_tree->node);
+ if (fs_info->chunk_root)
+ free_extent_buffer(fs_info->chunk_root->node);
+}
+
+static void free_map_lookup(struct cache_extent *ce)
+{
+ struct map_lookup *map;
+
+ map = container_of(ce, struct map_lookup, ce);
+ kfree(map);
+}
+
+FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup);
+
+void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info)
+{
+ while (!list_empty(&fs_info->recow_ebs)) {
+ struct extent_buffer *eb;
+ eb = list_first_entry(&fs_info->recow_ebs,
+ struct extent_buffer, recow);
+ list_del_init(&eb->recow);
+ free_extent_buffer(eb);
+ }
+ free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree);
+ extent_io_tree_cleanup(&fs_info->extent_cache);
+ extent_io_tree_cleanup(&fs_info->free_space_cache);
+ extent_io_tree_cleanup(&fs_info->block_group_cache);
+ extent_io_tree_cleanup(&fs_info->pinned_extents);
+ extent_io_tree_cleanup(&fs_info->pending_del);
+ extent_io_tree_cleanup(&fs_info->extent_ins);
+}
+
+int btrfs_scan_fs_devices(int fd, const char *path,
+ struct btrfs_fs_devices **fs_devices,
+ u64 sb_bytenr, int super_recover,
+ int skip_devices)
+{
+ u64 total_devs;
+ u64 dev_size;
+ off_t seek_ret;
+ int ret;
+ if (!sb_bytenr)
+ sb_bytenr = BTRFS_SUPER_INFO_OFFSET;
+
+ seek_ret = lseek(fd, 0, SEEK_END);
+ if (seek_ret < 0)
+ return -errno;
+
+ dev_size = seek_ret;
+ lseek(fd, 0, SEEK_SET);
+ if (sb_bytenr > dev_size) {
+ fprintf(stderr, "Superblock bytenr is larger than device size\n");
+ return -EINVAL;
+ }
+
+ ret = btrfs_scan_one_device(fd, path, fs_devices,
+ &total_devs, sb_bytenr, super_recover);
+ if (ret) {
+ fprintf(stderr, "No valid Btrfs found on %s\n", path);
+ return ret;
+ }
+
+ if (!skip_devices && total_devs != 1) {
+ ret = btrfs_scan_lblkid();
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_super_block *sb = fs_info->super_copy;
+ u32 sectorsize;
+ u32 nodesize;
+ u32 leafsize;
+ u32 blocksize;
+ u32 stripesize;
+ u64 generation;
+ int ret;
+
+ nodesize = btrfs_super_nodesize(sb);
+ leafsize = btrfs_super_leafsize(sb);
+ sectorsize = btrfs_super_sectorsize(sb);
+ stripesize = btrfs_super_stripesize(sb);
+
+ __setup_root(nodesize, leafsize, sectorsize, stripesize,
+ fs_info->chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
+
+ ret = btrfs_read_sys_array(fs_info->chunk_root);
+ if (ret)
+ return ret;
+
+ blocksize = btrfs_level_size(fs_info->chunk_root,
+ btrfs_super_chunk_root_level(sb));
+ generation = btrfs_super_chunk_root_generation(sb);
+
+ fs_info->chunk_root->node = read_tree_block(fs_info->chunk_root,
+ btrfs_super_chunk_root(sb),
+ blocksize, generation);
+ if (!extent_buffer_uptodate(fs_info->chunk_root->node)) {
+ if (fs_info->ignore_chunk_tree_error) {
+ warning("cannot read chunk root, continue anyway");
+ fs_info->chunk_root = NULL;
+ return 0;
+ } else {
+ error("cannot read chunk root");
+ return -EIO;
+ }
+ }
+
+ if (!(btrfs_super_flags(sb) & BTRFS_SUPER_FLAG_METADUMP)) {
+ ret = btrfs_read_chunk_tree(fs_info->chunk_root);
+ if (ret) {
+ fprintf(stderr, "Couldn't read chunk tree\n");
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path,
+ u64 sb_bytenr,
+ u64 root_tree_bytenr,
+ enum btrfs_open_ctree_flags flags)
+{
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_super_block *disk_super;
+ struct btrfs_fs_devices *fs_devices = NULL;
+ struct extent_buffer *eb;
+ int ret;
+ int oflags;
+
+ if (sb_bytenr == 0)
+ sb_bytenr = BTRFS_SUPER_INFO_OFFSET;
+
+ /* try to drop all the caches */
+ if (posix_fadvise(fp, 0, 0, POSIX_FADV_DONTNEED))
+ fprintf(stderr, "Warning, could not drop caches\n");
+
+ fs_info = btrfs_new_fs_info(flags & OPEN_CTREE_WRITES, sb_bytenr);
+ if (!fs_info) {
+ fprintf(stderr, "Failed to allocate memory for fs_info\n");
+ return NULL;
+ }
+ if (flags & OPEN_CTREE_RESTORE)
+ fs_info->on_restoring = 1;
+ if (flags & OPEN_CTREE_SUPPRESS_CHECK_BLOCK_ERRORS)
+ fs_info->suppress_check_block_errors = 1;
+ if (flags & OPEN_CTREE_IGNORE_FSID_MISMATCH)
+ fs_info->ignore_fsid_mismatch = 1;
+ if (flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR)
+ fs_info->ignore_chunk_tree_error = 1;
+
+ ret = btrfs_scan_fs_devices(fp, path, &fs_devices, sb_bytenr,
+ (flags & OPEN_CTREE_RECOVER_SUPER),
+ (flags & OPEN_CTREE_NO_DEVICES));
+ if (ret)
+ goto out;
+
+ fs_info->fs_devices = fs_devices;
+ if (flags & OPEN_CTREE_WRITES)
+ oflags = O_RDWR;
+ else
+ oflags = O_RDONLY;
+
+ if (flags & OPEN_CTREE_EXCLUSIVE)
+ oflags |= O_EXCL;
+
+ ret = btrfs_open_devices(fs_devices, oflags);
+ if (ret)
+ goto out;
+
+ disk_super = fs_info->super_copy;
+ if (!(flags & OPEN_CTREE_RECOVER_SUPER))
+ ret = btrfs_read_dev_super(fs_devices->latest_bdev,
+ disk_super, sb_bytenr, 1);
+ else
+ ret = btrfs_read_dev_super(fp, disk_super, sb_bytenr, 0);
+ if (ret) {
+ printk("No valid btrfs found\n");
+ goto out_devices;
+ }
+
+ if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID &&
+ !fs_info->ignore_fsid_mismatch) {
+ fprintf(stderr, "ERROR: Filesystem UUID change in progress\n");
+ goto out_devices;
+ }
+
+ memcpy(fs_info->fsid, &disk_super->fsid, BTRFS_FSID_SIZE);
+
+ ret = btrfs_check_fs_compatibility(fs_info->super_copy,
+ flags & OPEN_CTREE_WRITES);
+ if (ret)
+ goto out_devices;
+
+ ret = btrfs_setup_chunk_tree_and_device_map(fs_info);
+ if (ret)
+ goto out_chunk;
+
+ /* Chunk tree root is unable to read, return directly */
+ if (!fs_info->chunk_root)
+ return fs_info;
+
+ eb = fs_info->chunk_root->node;
+ read_extent_buffer(eb, fs_info->chunk_tree_uuid,
+ btrfs_header_chunk_tree_uuid(eb),
+ BTRFS_UUID_SIZE);
+
+ ret = btrfs_setup_all_roots(fs_info, root_tree_bytenr, flags);
+ if (ret && !(flags & __OPEN_CTREE_RETURN_CHUNK_ROOT) &&
+ !fs_info->ignore_chunk_tree_error)
+ goto out_chunk;
+
+ return fs_info;
+
+out_chunk:
+ btrfs_release_all_roots(fs_info);
+ btrfs_cleanup_all_caches(fs_info);
+out_devices:
+ btrfs_close_devices(fs_devices);
+out:
+ btrfs_free_fs_info(fs_info);
+ return NULL;
+}
+
+struct btrfs_fs_info *open_ctree_fs_info(const char *filename,
+ u64 sb_bytenr, u64 root_tree_bytenr,
+ enum btrfs_open_ctree_flags flags)
+{
+ int fp;
+ struct btrfs_fs_info *info;
+ int oflags = O_CREAT | O_RDWR;
+
+ if (!(flags & OPEN_CTREE_WRITES))
+ oflags = O_RDONLY;
+
+ fp = open(filename, oflags, 0600);
+ if (fp < 0) {
+ fprintf (stderr, "Could not open %s\n", filename);
+ return NULL;
+ }
+ info = __open_ctree_fd(fp, filename, sb_bytenr, root_tree_bytenr,
+ flags);
+ close(fp);
+ return info;
+}
+
+struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr,
+ enum btrfs_open_ctree_flags flags)
+{
+ struct btrfs_fs_info *info;
+
+ /* This flags may not return fs_info with any valid root */
+ BUG_ON(flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR);
+ info = open_ctree_fs_info(filename, sb_bytenr, 0, flags);
+ if (!info)
+ return NULL;
+ if (flags & __OPEN_CTREE_RETURN_CHUNK_ROOT)
+ return info->chunk_root;
+ return info->fs_root;
+}
+
+struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
+ enum btrfs_open_ctree_flags flags)
+{
+ struct btrfs_fs_info *info;
+
+ /* This flags may not return fs_info with any valid root */
+ BUG_ON(flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR);
+ info = __open_ctree_fd(fp, path, sb_bytenr, 0, flags);
+ if (!info)
+ return NULL;
+ if (flags & __OPEN_CTREE_RETURN_CHUNK_ROOT)
+ return info->chunk_root;
+ return info->fs_root;
+}
+
+/*
+ * Check if the super is valid:
+ * - nodesize/sectorsize - minimum, maximum, alignment
+ * - tree block starts - alignment
+ * - number of devices - something sane
+ * - sys array size - maximum
+ */
+static int check_super(struct btrfs_super_block *sb)
+{
+ char result[BTRFS_CSUM_SIZE];
+ u32 crc;
+ u16 csum_type;
+ int csum_size;
+
+ if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
+ fprintf(stderr, "ERROR: superblock magic doesn't match\n");
+ return -EIO;
+ }
+
+ csum_type = btrfs_super_csum_type(sb);
+ if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
+ fprintf(stderr, "ERROR: unsupported checksum algorithm %u\n",
+ csum_type);
+ return -EIO;
+ }
+ csum_size = btrfs_csum_sizes[csum_type];
+
+ crc = ~(u32)0;
+ crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE, crc,
+ BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+ btrfs_csum_final(crc, result);
+
+ if (memcmp(result, sb->csum, csum_size)) {
+ fprintf(stderr, "ERROR: superblock checksum mismatch\n");
+ return -EIO;
+ }
+ if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
+ fprintf(stderr, "ERROR: tree_root level too big: %d >= %d\n",
+ btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
+ return -EIO;
+ }
+ if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
+ fprintf(stderr, "ERROR: chunk_root level too big: %d >= %d\n",
+ btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
+ return -EIO;
+ }
+ if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
+ fprintf(stderr, "ERROR: log_root level too big: %d >= %d\n",
+ btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
+ return -EIO;
+ }
+
+ if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) {
+ fprintf(stderr, "ERROR: tree_root block unaligned: %llu\n",
+ btrfs_super_root(sb));
+ return -EIO;
+ }
+ if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) {
+ fprintf(stderr, "ERROR: chunk_root block unaligned: %llu\n",
+ btrfs_super_chunk_root(sb));
+ return -EIO;
+ }
+ if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) {
+ fprintf(stderr, "ERROR: log_root block unaligned: %llu\n",
+ btrfs_super_log_root(sb));
+ return -EIO;
+ }
+ if (btrfs_super_nodesize(sb) < 4096) {
+ fprintf(stderr, "ERROR: nodesize too small: %u < 4096\n",
+ btrfs_super_nodesize(sb));
+ return -EIO;
+ }
+ if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) {
+ fprintf(stderr, "ERROR: nodesize unaligned: %u\n",
+ btrfs_super_nodesize(sb));
+ return -EIO;
+ }
+ if (btrfs_super_sectorsize(sb) < 4096) {
+ fprintf(stderr, "ERROR: sectorsize too small: %u < 4096\n",
+ btrfs_super_sectorsize(sb));
+ return -EIO;
+ }
+ if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) {
+ fprintf(stderr, "ERROR: sectorsize unaligned: %u\n",
+ btrfs_super_sectorsize(sb));
+ return -EIO;
+ }
+
+ if (memcmp(sb->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
+ char fsid[BTRFS_UUID_UNPARSED_SIZE];
+ char dev_fsid[BTRFS_UUID_UNPARSED_SIZE];
+
+ uuid_unparse(sb->fsid, fsid);
+ uuid_unparse(sb->dev_item.fsid, dev_fsid);
+ printk(KERN_ERR
+ "ERROR: dev_item UUID does not match fsid: %s != %s\n",
+ dev_fsid, fsid);
+ return -EIO;
+ }
+
+ /*
+ * Hint to catch really bogus numbers, bitflips or so
+ */
+ if (btrfs_super_num_devices(sb) > (1UL << 31)) {
+ fprintf(stderr, "WARNING: suspicious number of devices: %llu\n",
+ btrfs_super_num_devices(sb));
+ }
+
+ if (btrfs_super_num_devices(sb) == 0) {
+ fprintf(stderr, "ERROR: number of devices is 0\n");
+ return -EIO;
+ }
+
+ /*
+ * Obvious sys_chunk_array corruptions, it must hold at least one key
+ * and one chunk
+ */
+ if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
+ fprintf(stderr, "BTRFS: system chunk array too big %u > %u\n",
+ btrfs_super_sys_array_size(sb),
+ BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
+ return -EIO;
+ }
+ if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
+ + sizeof(struct btrfs_chunk)) {
+ fprintf(stderr, "BTRFS: system chunk array too small %u < %lu\n",
+ btrfs_super_sys_array_size(sb),
+ sizeof(struct btrfs_disk_key) +
+ sizeof(struct btrfs_chunk));
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr,
+ int super_recover)
+{
+ u8 fsid[BTRFS_FSID_SIZE];
+ int fsid_is_initialized = 0;
+ char tmp[BTRFS_SUPER_INFO_SIZE];
+ struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp;
+ int i;
+ int ret;
+ int max_super = super_recover ? BTRFS_SUPER_MIRROR_MAX : 1;
+ u64 transid = 0;
+ u64 bytenr;
+
+ if (sb_bytenr != BTRFS_SUPER_INFO_OFFSET) {
+ ret = pread64(fd, buf, BTRFS_SUPER_INFO_SIZE, sb_bytenr);
+ if (ret < BTRFS_SUPER_INFO_SIZE)
+ return -1;
+
+ if (btrfs_super_bytenr(buf) != sb_bytenr)
+ return -1;
+
+ if (check_super(buf))
+ return -1;
+ memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE);
+ return 0;
+ }
+
+ /*
+ * we would like to check all the supers, but that would make
+ * a btrfs mount succeed after a mkfs from a different FS.
+ * So, we need to add a special mount option to scan for
+ * later supers, using BTRFS_SUPER_MIRROR_MAX instead
+ */
+
+ for (i = 0; i < max_super; i++) {
+ bytenr = btrfs_sb_offset(i);
+ ret = pread64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr);
+ if (ret < BTRFS_SUPER_INFO_SIZE)
+ break;
+
+ if (btrfs_super_bytenr(buf) != bytenr )
+ continue;
+ /* if magic is NULL, the device was removed */
+ if (btrfs_super_magic(buf) == 0 && i == 0)
+ break;
+ if (check_super(buf))
+ continue;
+
+ if (!fsid_is_initialized) {
+ memcpy(fsid, buf->fsid, sizeof(fsid));
+ fsid_is_initialized = 1;
+ } else if (memcmp(fsid, buf->fsid, sizeof(fsid))) {
+ /*
+ * the superblocks (the original one and
+ * its backups) contain data of different
+ * filesystems -> the super cannot be trusted
+ */
+ continue;
+ }
+
+ if (btrfs_super_generation(buf) > transid) {
+ memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE);
+ transid = btrfs_super_generation(buf);
+ }
+ }
+
+ return transid > 0 ? 0 : -1;
+}
+
+static int write_dev_supers(struct btrfs_root *root,
+ struct btrfs_super_block *sb,
+ struct btrfs_device *device)
+{
+ u64 bytenr;
+ u32 crc;
+ int i, ret;
+
+ if (root->fs_info->super_bytenr != BTRFS_SUPER_INFO_OFFSET) {
+ btrfs_set_super_bytenr(sb, root->fs_info->super_bytenr);
+ crc = ~(u32)0;
+ crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE, crc,
+ BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+ btrfs_csum_final(crc, (char *)&sb->csum[0]);
+
+ /*
+ * super_copy is BTRFS_SUPER_INFO_SIZE bytes and is
+ * zero filled, we can use it directly
+ */
+ ret = pwrite64(device->fd, root->fs_info->super_copy,
+ BTRFS_SUPER_INFO_SIZE,
+ root->fs_info->super_bytenr);
+ if (ret != BTRFS_SUPER_INFO_SIZE)
+ goto write_err;
+ return 0;
+ }
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ if (bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
+ break;
+
+ btrfs_set_super_bytenr(sb, bytenr);
+
+ crc = ~(u32)0;
+ crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE, crc,
+ BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+ btrfs_csum_final(crc, (char *)&sb->csum[0]);
+
+ /*
+ * super_copy is BTRFS_SUPER_INFO_SIZE bytes and is
+ * zero filled, we can use it directly
+ */
+ ret = pwrite64(device->fd, root->fs_info->super_copy,
+ BTRFS_SUPER_INFO_SIZE, bytenr);
+ if (ret != BTRFS_SUPER_INFO_SIZE)
+ goto write_err;
+ }
+
+ return 0;
+
+write_err:
+ if (ret > 0)
+ fprintf(stderr, "WARNING: failed to write all sb data\n");
+ else
+ fprintf(stderr, "WARNING: failed to write sb: %s\n",
+ strerror(errno));
+ return ret;
+}
+
+int write_all_supers(struct btrfs_root *root)
+{
+ struct list_head *cur;
+ struct list_head *head = &root->fs_info->fs_devices->devices;
+ struct btrfs_device *dev;
+ struct btrfs_super_block *sb;
+ struct btrfs_dev_item *dev_item;
+ int ret;
+ u64 flags;
+
+ sb = root->fs_info->super_copy;
+ dev_item = &sb->dev_item;
+ list_for_each(cur, head) {
+ dev = list_entry(cur, struct btrfs_device, dev_list);
+ if (!dev->writeable)
+ continue;
+
+ btrfs_set_stack_device_generation(dev_item, 0);
+ btrfs_set_stack_device_type(dev_item, dev->type);
+ btrfs_set_stack_device_id(dev_item, dev->devid);
+ btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes);
+ btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
+ btrfs_set_stack_device_io_align(dev_item, dev->io_align);
+ btrfs_set_stack_device_io_width(dev_item, dev->io_width);
+ btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
+ memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
+ memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);
+
+ flags = btrfs_super_flags(sb);
+ btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
+
+ ret = write_dev_supers(root, sb, dev);
+ BUG_ON(ret);
+ }
+ return 0;
+}
+
+int write_ctree_super(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ int ret;
+ struct btrfs_root *tree_root = root->fs_info->tree_root;
+ struct btrfs_root *chunk_root = root->fs_info->chunk_root;
+
+ if (root->fs_info->readonly)
+ return 0;
+
+ btrfs_set_super_generation(root->fs_info->super_copy,
+ trans->transid);
+ btrfs_set_super_root(root->fs_info->super_copy,
+ tree_root->node->start);
+ btrfs_set_super_root_level(root->fs_info->super_copy,
+ btrfs_header_level(tree_root->node));
+ btrfs_set_super_chunk_root(root->fs_info->super_copy,
+ chunk_root->node->start);
+ btrfs_set_super_chunk_root_level(root->fs_info->super_copy,
+ btrfs_header_level(chunk_root->node));
+ btrfs_set_super_chunk_root_generation(root->fs_info->super_copy,
+ btrfs_header_generation(chunk_root->node));
+
+ ret = write_all_supers(root);
+ if (ret)
+ fprintf(stderr, "failed to write new super block err %d\n", ret);
+ return ret;
+}
+
+int close_ctree_fs_info(struct btrfs_fs_info *fs_info)
+{
+ int ret;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root = fs_info->tree_root;
+
+ if (fs_info->last_trans_committed !=
+ fs_info->generation) {
+ BUG_ON(!root);
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_commit_transaction(trans, root);
+ trans = btrfs_start_transaction(root, 1);
+ ret = commit_tree_roots(trans, fs_info);
+ BUG_ON(ret);
+ ret = __commit_transaction(trans, root);
+ BUG_ON(ret);
+ write_ctree_super(trans, root);
+ btrfs_free_transaction(root, trans);
+ }
+ btrfs_free_block_groups(fs_info);
+
+ free_fs_roots_tree(&fs_info->fs_root_tree);
+
+ btrfs_release_all_roots(fs_info);
+ btrfs_close_devices(fs_info->fs_devices);
+ btrfs_cleanup_all_caches(fs_info);
+ btrfs_free_fs_info(fs_info);
+ return 0;
+}
+
+int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct extent_buffer *eb)
+{
+ return clear_extent_buffer_dirty(eb);
+}
+
+int wait_on_tree_block_writeback(struct btrfs_root *root,
+ struct extent_buffer *eb)
+{
+ return 0;
+}
+
+void btrfs_mark_buffer_dirty(struct extent_buffer *eb)
+{
+ set_extent_buffer_dirty(eb);
+}
+
+int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
+{
+ int ret;
+
+ ret = extent_buffer_uptodate(buf);
+ if (!ret)
+ return ret;
+
+ ret = verify_parent_transid(buf->tree, buf, parent_transid, 1);
+ return !ret;
+}
+
+int btrfs_set_buffer_uptodate(struct extent_buffer *eb)
+{
+ return set_extent_buffer_uptodate(eb);
+}
diff --git a/disk-io.h b/disk-io.h
new file mode 100644
index 00000000..30ccb2bd
--- /dev/null
+++ b/disk-io.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_DISK_IO_H__
+#define __BTRFS_DISK_IO_H__
+
+#include "kerncompat.h"
+#include "ctree.h"
+
+#define BTRFS_SUPER_INFO_OFFSET (64 * 1024)
+#define BTRFS_SUPER_INFO_SIZE 4096
+
+#define BTRFS_SUPER_MIRROR_MAX 3
+#define BTRFS_SUPER_MIRROR_SHIFT 12
+
+enum btrfs_open_ctree_flags {
+ OPEN_CTREE_WRITES = (1 << 0),
+ OPEN_CTREE_PARTIAL = (1 << 1),
+ OPEN_CTREE_BACKUP_ROOT = (1 << 2),
+ OPEN_CTREE_RECOVER_SUPER = (1 << 3),
+ OPEN_CTREE_RESTORE = (1 << 4),
+ OPEN_CTREE_NO_BLOCK_GROUPS = (1 << 5),
+ OPEN_CTREE_EXCLUSIVE = (1 << 6),
+ OPEN_CTREE_NO_DEVICES = (1 << 7),
+ /*
+ * Don't print error messages if bytenr or checksums do not match in
+ * tree block headers. Turn on by OPEN_CTREE_SUPPRESS_ERROR
+ */
+ OPEN_CTREE_SUPPRESS_CHECK_BLOCK_ERRORS = (1 << 8),
+ /* Return chunk root */
+ __OPEN_CTREE_RETURN_CHUNK_ROOT = (1 << 9),
+ OPEN_CTREE_CHUNK_ROOT_ONLY = OPEN_CTREE_PARTIAL +
+ OPEN_CTREE_SUPPRESS_CHECK_BLOCK_ERRORS +
+ __OPEN_CTREE_RETURN_CHUNK_ROOT,
+ /*
+ * TODO: cleanup: Split the open_ctree_flags into more indepent
+ * tree bits.
+ * Like split PARTIAL into SKIP_CSUM/SKIP_EXTENT
+ */
+
+ OPEN_CTREE_IGNORE_FSID_MISMATCH = (1 << 10),
+
+ /*
+ * Allow open_ctree_fs_info() to return a incomplete fs_info with
+ * system chunks from super block only.
+ * It's useful for chunk corruption case.
+ * Makes no sense for open_ctree variants returning btrfs_root.
+ */
+ OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR = (1 << 11)
+};
+
+static inline u64 btrfs_sb_offset(int mirror)
+{
+ u64 start = 16 * 1024;
+ if (mirror)
+ return start << (BTRFS_SUPER_MIRROR_SHIFT * mirror);
+ return BTRFS_SUPER_INFO_OFFSET;
+}
+
+struct btrfs_device;
+
+int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror);
+struct extent_buffer* read_tree_block_fs_info(
+ struct btrfs_fs_info *fs_info, u64 bytenr, u32 blocksize,
+ u64 parent_transid);
+static inline struct extent_buffer* read_tree_block(
+ struct btrfs_root *root, u64 bytenr, u32 blocksize,
+ u64 parent_transid)
+{
+ return read_tree_block_fs_info(root->fs_info, bytenr, blocksize,
+ parent_transid);
+}
+
+int read_extent_data(struct btrfs_root *root, char *data, u64 logical,
+ u64 *len, int mirror);
+void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
+ u64 parent_transid);
+struct extent_buffer* btrfs_find_create_tree_block(
+ struct btrfs_fs_info *fs_info, u64 bytenr, u32 blocksize);
+
+int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
+ u32 stripesize, struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info, u64 objectid);
+int clean_tree_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *buf);
+
+void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
+struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr);
+int btrfs_check_fs_compatibility(struct btrfs_super_block *sb, int writable);
+int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info, u64 root_tree_bytenr,
+ enum btrfs_open_ctree_flags flags);
+void btrfs_release_all_roots(struct btrfs_fs_info *fs_info);
+void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info);
+int btrfs_scan_fs_devices(int fd, const char *path,
+ struct btrfs_fs_devices **fs_devices, u64 sb_bytenr,
+ int super_recover, int skip_devices);
+int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info);
+
+struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr,
+ enum btrfs_open_ctree_flags flags);
+struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
+ enum btrfs_open_ctree_flags flags);
+struct btrfs_fs_info *open_ctree_fs_info(const char *filename,
+ u64 sb_bytenr, u64 root_tree_bytenr,
+ enum btrfs_open_ctree_flags flags);
+int close_ctree_fs_info(struct btrfs_fs_info *fs_info);
+static inline int close_ctree(struct btrfs_root *root)
+{
+ BUG_ON(!root);
+ return close_ctree_fs_info(root->fs_info);
+}
+
+int write_all_supers(struct btrfs_root *root);
+int write_ctree_super(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
+int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr,
+ int super_recover);
+int btrfs_map_bh_to_logical(struct btrfs_root *root, struct extent_buffer *bh,
+ u64 logical);
+struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
+ u64 bytenr, u32 blocksize);
+struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
+ struct btrfs_key *location);
+struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info,
+ struct btrfs_key *location);
+int btrfs_free_fs_root(struct btrfs_root *root);
+void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
+int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
+int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
+int wait_on_tree_block_writeback(struct btrfs_root *root,
+ struct extent_buffer *buf);
+u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
+void btrfs_csum_final(u32 crc, char *result);
+
+int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
+int btrfs_open_device(struct btrfs_device *dev);
+int csum_tree_block_size(struct extent_buffer *buf, u16 csum_sectorsize,
+ int verify);
+int verify_tree_block_csum_silent(struct extent_buffer *buf, u16 csum_size);
+int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
+int write_tree_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *eb);
+int write_and_map_eb(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct extent_buffer *eb);
+
+/* raid6.c */
+void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs);
+
+#endif
diff --git a/extent-cache.c b/extent-cache.c
new file mode 100644
index 00000000..38bed8b5
--- /dev/null
+++ b/extent-cache.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include "kerncompat.h"
+#include "extent-cache.h"
+#include "rbtree-utils.h"
+
+struct cache_extent_search_range {
+ u64 objectid;
+ u64 start;
+ u64 size;
+};
+
+static int cache_tree_comp_range(struct rb_node *node, void *data)
+{
+ struct cache_extent *entry;
+ struct cache_extent_search_range *range;
+
+ range = (struct cache_extent_search_range *)data;
+ entry = rb_entry(node, struct cache_extent, rb_node);
+
+ if (entry->start + entry->size <= range->start)
+ return 1;
+ else if (range->start + range->size <= entry->start)
+ return -1;
+ else
+ return 0;
+}
+
+static int cache_tree_comp_nodes(struct rb_node *node1, struct rb_node *node2)
+{
+ struct cache_extent *entry;
+ struct cache_extent_search_range range;
+
+ entry = rb_entry(node2, struct cache_extent, rb_node);
+ range.start = entry->start;
+ range.size = entry->size;
+
+ return cache_tree_comp_range(node1, (void *)&range);
+}
+
+static int cache_tree_comp_range2(struct rb_node *node, void *data)
+{
+ struct cache_extent *entry;
+ struct cache_extent_search_range *range;
+
+ range = (struct cache_extent_search_range *)data;
+ entry = rb_entry(node, struct cache_extent, rb_node);
+
+ if (entry->objectid < range->objectid)
+ return 1;
+ else if (entry->objectid > range->objectid)
+ return -1;
+ else if (entry->start + entry->size <= range->start)
+ return 1;
+ else if (range->start + range->size <= entry->start)
+ return -1;
+ else
+ return 0;
+}
+
+static int cache_tree_comp_nodes2(struct rb_node *node1, struct rb_node *node2)
+{
+ struct cache_extent *entry;
+ struct cache_extent_search_range range;
+
+ entry = rb_entry(node2, struct cache_extent, rb_node);
+ range.objectid = entry->objectid;
+ range.start = entry->start;
+ range.size = entry->size;
+
+ return cache_tree_comp_range2(node1, (void *)&range);
+}
+
+void cache_tree_init(struct cache_tree *tree)
+{
+ tree->root = RB_ROOT;
+}
+
+static struct cache_extent *
+alloc_cache_extent(u64 objectid, u64 start, u64 size)
+{
+ struct cache_extent *pe = malloc(sizeof(*pe));
+
+ if (!pe)
+ return pe;
+
+ pe->objectid = objectid;
+ pe->start = start;
+ pe->size = size;
+ return pe;
+}
+
+static int __add_cache_extent(struct cache_tree *tree,
+ u64 objectid, u64 start, u64 size)
+{
+ struct cache_extent *pe = alloc_cache_extent(objectid, start, size);
+ int ret;
+
+ if (!pe) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(1);
+ }
+
+ ret = insert_cache_extent(tree, pe);
+ if (ret)
+ free(pe);
+
+ return ret;
+}
+
+int add_cache_extent(struct cache_tree *tree, u64 start, u64 size)
+{
+ return __add_cache_extent(tree, 0, start, size);
+}
+
+int add_cache_extent2(struct cache_tree *tree,
+ u64 objectid, u64 start, u64 size)
+{
+ return __add_cache_extent(tree, objectid, start, size);
+}
+
+int insert_cache_extent(struct cache_tree *tree, struct cache_extent *pe)
+{
+ return rb_insert(&tree->root, &pe->rb_node, cache_tree_comp_nodes);
+}
+
+int insert_cache_extent2(struct cache_tree *tree, struct cache_extent *pe)
+{
+ return rb_insert(&tree->root, &pe->rb_node, cache_tree_comp_nodes2);
+}
+
+struct cache_extent *lookup_cache_extent(struct cache_tree *tree,
+ u64 start, u64 size)
+{
+ struct rb_node *node;
+ struct cache_extent *entry;
+ struct cache_extent_search_range range;
+
+ range.start = start;
+ range.size = size;
+ node = rb_search(&tree->root, &range, cache_tree_comp_range, NULL);
+ if (!node)
+ return NULL;
+
+ entry = rb_entry(node, struct cache_extent, rb_node);
+ return entry;
+}
+
+struct cache_extent *lookup_cache_extent2(struct cache_tree *tree,
+ u64 objectid, u64 start, u64 size)
+{
+ struct rb_node *node;
+ struct cache_extent *entry;
+ struct cache_extent_search_range range;
+
+ range.objectid = objectid;
+ range.start = start;
+ range.size = size;
+ node = rb_search(&tree->root, &range, cache_tree_comp_range2, NULL);
+ if (!node)
+ return NULL;
+
+ entry = rb_entry(node, struct cache_extent, rb_node);
+ return entry;
+}
+
+struct cache_extent *search_cache_extent(struct cache_tree *tree, u64 start)
+{
+ struct rb_node *next;
+ struct rb_node *node;
+ struct cache_extent *entry;
+ struct cache_extent_search_range range;
+
+ range.start = start;
+ range.size = 1;
+ node = rb_search(&tree->root, &range, cache_tree_comp_range, &next);
+ if (!node)
+ node = next;
+ if (!node)
+ return NULL;
+
+ entry = rb_entry(node, struct cache_extent, rb_node);
+ return entry;
+}
+
+struct cache_extent *search_cache_extent2(struct cache_tree *tree,
+ u64 objectid, u64 start)
+{
+ struct rb_node *next;
+ struct rb_node *node;
+ struct cache_extent *entry;
+ struct cache_extent_search_range range;
+
+ range.objectid = objectid;
+ range.start = start;
+ range.size = 1;
+ node = rb_search(&tree->root, &range, cache_tree_comp_range2, &next);
+ if (!node)
+ node = next;
+ if (!node)
+ return NULL;
+
+ entry = rb_entry(node, struct cache_extent, rb_node);
+ return entry;
+}
+
+struct cache_extent *first_cache_extent(struct cache_tree *tree)
+{
+ struct rb_node *node = rb_first(&tree->root);
+
+ if (!node)
+ return NULL;
+ return rb_entry(node, struct cache_extent, rb_node);
+}
+
+struct cache_extent *last_cache_extent(struct cache_tree *tree)
+{
+ struct rb_node *node = rb_last(&tree->root);
+
+ if (!node)
+ return NULL;
+ return rb_entry(node, struct cache_extent, rb_node);
+}
+
+struct cache_extent *prev_cache_extent(struct cache_extent *pe)
+{
+ struct rb_node *node = rb_prev(&pe->rb_node);
+
+ if (!node)
+ return NULL;
+ return rb_entry(node, struct cache_extent, rb_node);
+}
+
+struct cache_extent *next_cache_extent(struct cache_extent *pe)
+{
+ struct rb_node *node = rb_next(&pe->rb_node);
+
+ if (!node)
+ return NULL;
+ return rb_entry(node, struct cache_extent, rb_node);
+}
+
+void remove_cache_extent(struct cache_tree *tree, struct cache_extent *pe)
+{
+ rb_erase(&pe->rb_node, &tree->root);
+}
+
+void cache_tree_free_extents(struct cache_tree *tree,
+ free_cache_extent free_func)
+{
+ struct cache_extent *ce;
+
+ while ((ce = first_cache_extent(tree))) {
+ remove_cache_extent(tree, ce);
+ free_func(ce);
+ }
+}
+
+static void free_extent_cache(struct cache_extent *pe)
+{
+ free(pe);
+}
+
+void free_extent_cache_tree(struct cache_tree *tree)
+{
+ cache_tree_free_extents(tree, free_extent_cache);
+}
+
+int add_merge_cache_extent(struct cache_tree *tree, u64 start, u64 size)
+{
+ struct cache_extent *cache;
+ struct cache_extent *next = NULL;
+ struct cache_extent *prev = NULL;
+ int next_merged = 0;
+ int prev_merged = 0;
+ int ret = 0;
+
+ if (cache_tree_empty(tree))
+ goto insert;
+
+ cache = search_cache_extent(tree, start);
+ if (!cache) {
+ /*
+ * Either the tree is completely empty, or the no range after
+ * start.
+ * Either way, the last cache_extent should be prev.
+ */
+ prev = last_cache_extent(tree);
+ } else if (start <= cache->start) {
+ next = cache;
+ prev = prev_cache_extent(cache);
+ } else {
+ prev = cache;
+ next = next_cache_extent(cache);
+ }
+
+ /*
+ * Ensure the range to be inserted won't cover with existings
+ * Or we will need extra loop to do merge
+ */
+ BUG_ON(next && start + size > next->start);
+ BUG_ON(prev && prev->start + prev->size > start);
+
+ if (next && start + size == next->start) {
+ next_merged = 1;
+ next->size = next->start + next->size - start;
+ next->start = start;
+ }
+ if (prev && prev->start + prev->size == start) {
+ prev_merged = 1;
+ if (next_merged) {
+ next->size = next->start + next->size - prev->start;
+ next->start = prev->start;
+ remove_cache_extent(tree, prev);
+ free(prev);
+ } else {
+ prev->size = start + size - prev->start;
+ }
+ }
+insert:
+ if (!prev_merged && !next_merged)
+ ret = add_cache_extent(tree, start, size);
+ return ret;
+}
diff --git a/extent-cache.h b/extent-cache.h
new file mode 100644
index 00000000..f031fbfb
--- /dev/null
+++ b/extent-cache.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_EXTENT_CACHE_H__
+#define __BTRFS_EXTENT_CACHE_H__
+
+#if BTRFS_FLAT_INCLUDES
+#include "kerncompat.h"
+#include "rbtree.h"
+#else
+#include <btrfs/kerncompat.h>
+#include <btrfs/rbtree.h>
+#endif /* BTRFS_FLAT_INCLUDES */
+
+struct cache_tree {
+ struct rb_root root;
+};
+
+struct cache_extent {
+ struct rb_node rb_node;
+ u64 objectid;
+ u64 start;
+ u64 size;
+};
+
+void cache_tree_init(struct cache_tree *tree);
+
+struct cache_extent *first_cache_extent(struct cache_tree *tree);
+struct cache_extent *last_cache_extent(struct cache_tree *tree);
+struct cache_extent *prev_cache_extent(struct cache_extent *pe);
+struct cache_extent *next_cache_extent(struct cache_extent *pe);
+
+/*
+ * Find a cache_extent which covers start.
+ *
+ * If not found, return next cache_extent if possible.
+ */
+struct cache_extent *search_cache_extent(struct cache_tree *tree, u64 start);
+
+/*
+ * Find a cahce_extent which restrictly covers start.
+ *
+ * If not found, return NULL.
+ */
+struct cache_extent *lookup_cache_extent(struct cache_tree *tree,
+ u64 start, u64 size);
+
+/*
+ * Add an non-overlap extent into cache tree
+ *
+ * If [start, start+size) overlap with existing one, it will return -EEXIST.
+ */
+int add_cache_extent(struct cache_tree *tree, u64 start, u64 size);
+
+/*
+ * Same with add_cache_extent, but with cache_extent strcut.
+ */
+int insert_cache_extent(struct cache_tree *tree, struct cache_extent *pe);
+void remove_cache_extent(struct cache_tree *tree, struct cache_extent *pe);
+
+static inline int cache_tree_empty(struct cache_tree *tree)
+{
+ return RB_EMPTY_ROOT(&tree->root);
+}
+
+typedef void (*free_cache_extent)(struct cache_extent *pe);
+
+void cache_tree_free_extents(struct cache_tree *tree,
+ free_cache_extent free_func);
+
+#define FREE_EXTENT_CACHE_BASED_TREE(name, free_func) \
+static void free_##name##_tree(struct cache_tree *tree) \
+{ \
+ cache_tree_free_extents(tree, free_func); \
+}
+
+void free_extent_cache_tree(struct cache_tree *tree);
+
+/*
+ * Search a cache_extent with same objectid, and covers start.
+ *
+ * If not found, return next if possible.
+ */
+struct cache_extent *search_cache_extent2(struct cache_tree *tree,
+ u64 objectid, u64 start);
+/*
+ * Search a cache_extent with same objectid, and covers the range
+ * [start, start + size)
+ *
+ * If not found, return next cache_extent if possible.
+ */
+struct cache_extent *lookup_cache_extent2(struct cache_tree *tree,
+ u64 objectid, u64 start, u64 size);
+int add_cache_extent2(struct cache_tree *tree,
+ u64 objectid, u64 start, u64 size);
+int insert_cache_extent2(struct cache_tree *tree, struct cache_extent *pe);
+
+/*
+ * Insert a cache_extent range [start, start + size).
+ *
+ * This function may merge with existing cache_extent.
+ * NOTE: caller must ensure the inserted range won't cover with any existing
+ * range.
+ */
+int add_merge_cache_extent(struct cache_tree *tree, u64 start, u64 size);
+#endif
diff --git a/extent-tree.c b/extent-tree.c
new file mode 100644
index 00000000..b9b00f06
--- /dev/null
+++ b/extent-tree.c
@@ -0,0 +1,4122 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <math.h>
+#include "kerncompat.h"
+#include "radix-tree.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "crc32c.h"
+#include "volumes.h"
+#include "free-space-cache.h"
+#include "utils.h"
+
+#define PENDING_EXTENT_INSERT 0
+#define PENDING_EXTENT_DELETE 1
+#define PENDING_BACKREF_UPDATE 2
+
+struct pending_extent_op {
+ int type;
+ u64 bytenr;
+ u64 num_bytes;
+ u64 flags;
+ struct btrfs_disk_key key;
+ int level;
+};
+
+static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 root_objectid, u64 generation,
+ u64 flags, struct btrfs_disk_key *key,
+ int level, struct btrfs_key *ins);
+static int __free_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 owner_objectid,
+ u64 owner_offset, int refs_to_drop);
+static int finish_current_insert(struct btrfs_trans_handle *trans, struct
+ btrfs_root *extent_root);
+static int del_pending_extents(struct btrfs_trans_handle *trans, struct
+ btrfs_root *extent_root);
+static struct btrfs_block_group_cache *
+btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache
+ *hint, u64 search_start, int data, int owner);
+
+static int remove_sb_from_cache(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
+{
+ u64 bytenr;
+ u64 *logical;
+ int stripe_len;
+ int i, nr, ret;
+ struct extent_io_tree *free_space_cache;
+
+ free_space_cache = &root->fs_info->free_space_cache;
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
+ cache->key.objectid, bytenr, 0,
+ &logical, &nr, &stripe_len);
+ BUG_ON(ret);
+ while (nr--) {
+ clear_extent_dirty(free_space_cache, logical[nr],
+ logical[nr] + stripe_len - 1, GFP_NOFS);
+ }
+ kfree(logical);
+ }
+ return 0;
+}
+
+static int cache_block_group(struct btrfs_root *root,
+ struct btrfs_block_group_cache *block_group)
+{
+ struct btrfs_path *path;
+ int ret;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct extent_io_tree *free_space_cache;
+ int slot;
+ u64 last;
+ u64 hole_size;
+
+ if (!block_group)
+ return 0;
+
+ root = root->fs_info->extent_root;
+ free_space_cache = &root->fs_info->free_space_cache;
+
+ if (block_group->cached)
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ path->reada = 2;
+ last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+ key.objectid = last;
+ key.offset = 0;
+ key.type = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto err;
+
+ while(1) {
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ goto err;
+ if (ret == 0) {
+ continue;
+ } else {
+ break;
+ }
+ }
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid < block_group->key.objectid) {
+ goto next;
+ }
+ if (key.objectid >= block_group->key.objectid +
+ block_group->key.offset) {
+ break;
+ }
+
+ if (key.type == BTRFS_EXTENT_ITEM_KEY ||
+ key.type == BTRFS_METADATA_ITEM_KEY) {
+ if (key.objectid > last) {
+ hole_size = key.objectid - last;
+ set_extent_dirty(free_space_cache, last,
+ last + hole_size - 1,
+ GFP_NOFS);
+ }
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
+ last = key.objectid + root->leafsize;
+ else
+ last = key.objectid + key.offset;
+ }
+next:
+ path->slots[0]++;
+ }
+
+ if (block_group->key.objectid +
+ block_group->key.offset > last) {
+ hole_size = block_group->key.objectid +
+ block_group->key.offset - last;
+ set_extent_dirty(free_space_cache, last,
+ last + hole_size - 1, GFP_NOFS);
+ }
+ remove_sb_from_cache(root, block_group);
+ block_group->cached = 1;
+err:
+ btrfs_free_path(path);
+ return 0;
+}
+
+struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
+ btrfs_fs_info *info,
+ u64 bytenr)
+{
+ struct extent_io_tree *block_group_cache;
+ struct btrfs_block_group_cache *block_group = NULL;
+ u64 ptr;
+ u64 start;
+ u64 end;
+ int ret;
+
+ bytenr = max_t(u64, bytenr,
+ BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
+ block_group_cache = &info->block_group_cache;
+ ret = find_first_extent_bit(block_group_cache,
+ bytenr, &start, &end,
+ BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
+ BLOCK_GROUP_SYSTEM);
+ if (ret) {
+ return NULL;
+ }
+ ret = get_state_private(block_group_cache, start, &ptr);
+ if (ret)
+ return NULL;
+
+ block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
+ return block_group;
+}
+
+struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
+ btrfs_fs_info *info,
+ u64 bytenr)
+{
+ struct extent_io_tree *block_group_cache;
+ struct btrfs_block_group_cache *block_group = NULL;
+ u64 ptr;
+ u64 start;
+ u64 end;
+ int ret;
+
+ block_group_cache = &info->block_group_cache;
+ ret = find_first_extent_bit(block_group_cache,
+ bytenr, &start, &end,
+ BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
+ BLOCK_GROUP_SYSTEM);
+ if (ret) {
+ return NULL;
+ }
+ ret = get_state_private(block_group_cache, start, &ptr);
+ if (ret)
+ return NULL;
+
+ block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
+ if (block_group->key.objectid <= bytenr && bytenr <
+ block_group->key.objectid + block_group->key.offset)
+ return block_group;
+ return NULL;
+}
+
+static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
+{
+ return (cache->flags & bits) == bits;
+}
+
+static int noinline find_search_start(struct btrfs_root *root,
+ struct btrfs_block_group_cache **cache_ret,
+ u64 *start_ret, int num, int data)
+{
+ int ret;
+ struct btrfs_block_group_cache *cache = *cache_ret;
+ u64 last = *start_ret;
+ u64 start = 0;
+ u64 end = 0;
+ u64 search_start = *start_ret;
+ int wrapped = 0;
+
+ if (!cache)
+ goto out;
+again:
+ ret = cache_block_group(root, cache);
+ if (ret)
+ goto out;
+
+ last = max(search_start, cache->key.objectid);
+ if (cache->ro || !block_group_bits(cache, data))
+ goto new_group;
+
+ while(1) {
+ ret = find_first_extent_bit(&root->fs_info->free_space_cache,
+ last, &start, &end, EXTENT_DIRTY);
+ if (ret) {
+ goto new_group;
+ }
+
+ start = max(last, start);
+ last = end + 1;
+ if (last - start < num) {
+ continue;
+ }
+ if (start + num > cache->key.objectid + cache->key.offset) {
+ goto new_group;
+ }
+ *start_ret = start;
+ return 0;
+ }
+out:
+ *start_ret = last;
+ cache = btrfs_lookup_block_group(root->fs_info, search_start);
+ if (!cache) {
+ printk("Unable to find block group for %llu\n",
+ (unsigned long long)search_start);
+ WARN_ON(1);
+ }
+ return -ENOSPC;
+
+new_group:
+ last = cache->key.objectid + cache->key.offset;
+wrapped:
+ cache = btrfs_lookup_first_block_group(root->fs_info, last);
+ if (!cache) {
+ if (!wrapped) {
+ wrapped = 1;
+ last = search_start;
+ goto wrapped;
+ }
+ goto out;
+ }
+ *cache_ret = cache;
+ goto again;
+}
+
+static int block_group_state_bits(u64 flags)
+{
+ int bits = 0;
+ if (flags & BTRFS_BLOCK_GROUP_DATA)
+ bits |= BLOCK_GROUP_DATA;
+ if (flags & BTRFS_BLOCK_GROUP_METADATA)
+ bits |= BLOCK_GROUP_METADATA;
+ if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+ bits |= BLOCK_GROUP_SYSTEM;
+ return bits;
+}
+
+static struct btrfs_block_group_cache *
+btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache
+ *hint, u64 search_start, int data, int owner)
+{
+ struct btrfs_block_group_cache *cache;
+ struct extent_io_tree *block_group_cache;
+ struct btrfs_block_group_cache *found_group = NULL;
+ struct btrfs_fs_info *info = root->fs_info;
+ u64 used;
+ u64 last = 0;
+ u64 hint_last;
+ u64 start;
+ u64 end;
+ u64 free_check;
+ u64 ptr;
+ int bit;
+ int ret;
+ int full_search = 0;
+ int factor = 10;
+
+ block_group_cache = &info->block_group_cache;
+
+ if (!owner)
+ factor = 10;
+
+ bit = block_group_state_bits(data);
+
+ if (search_start) {
+ struct btrfs_block_group_cache *shint;
+ shint = btrfs_lookup_block_group(info, search_start);
+ if (shint && !shint->ro && block_group_bits(shint, data)) {
+ used = btrfs_block_group_used(&shint->item);
+ if (used + shint->pinned <
+ div_factor(shint->key.offset, factor)) {
+ return shint;
+ }
+ }
+ }
+ if (hint && !hint->ro && block_group_bits(hint, data)) {
+ used = btrfs_block_group_used(&hint->item);
+ if (used + hint->pinned <
+ div_factor(hint->key.offset, factor)) {
+ return hint;
+ }
+ last = hint->key.objectid + hint->key.offset;
+ hint_last = last;
+ } else {
+ if (hint)
+ hint_last = max(hint->key.objectid, search_start);
+ else
+ hint_last = search_start;
+
+ last = hint_last;
+ }
+again:
+ while(1) {
+ ret = find_first_extent_bit(block_group_cache, last,
+ &start, &end, bit);
+ if (ret)
+ break;
+
+ ret = get_state_private(block_group_cache, start, &ptr);
+ if (ret)
+ break;
+
+ cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
+ last = cache->key.objectid + cache->key.offset;
+ used = btrfs_block_group_used(&cache->item);
+
+ if (!cache->ro && block_group_bits(cache, data)) {
+ if (full_search)
+ free_check = cache->key.offset;
+ else
+ free_check = div_factor(cache->key.offset,
+ factor);
+
+ if (used + cache->pinned < free_check) {
+ found_group = cache;
+ goto found;
+ }
+ }
+ cond_resched();
+ }
+ if (!full_search) {
+ last = search_start;
+ full_search = 1;
+ goto again;
+ }
+found:
+ return found_group;
+}
+
+/*
+ * Back reference rules. Back refs have three main goals:
+ *
+ * 1) differentiate between all holders of references to an extent so that
+ * when a reference is dropped we can make sure it was a valid reference
+ * before freeing the extent.
+ *
+ * 2) Provide enough information to quickly find the holders of an extent
+ * if we notice a given block is corrupted or bad.
+ *
+ * 3) Make it easy to migrate blocks for FS shrinking or storage pool
+ * maintenance. This is actually the same as #2, but with a slightly
+ * different use case.
+ *
+ * There are two kinds of back refs. The implicit back refs is optimized
+ * for pointers in non-shared tree blocks. For a given pointer in a block,
+ * back refs of this kind provide information about the block's owner tree
+ * and the pointer's key. These information allow us to find the block by
+ * b-tree searching. The full back refs is for pointers in tree blocks not
+ * referenced by their owner trees. The location of tree block is recorded
+ * in the back refs. Actually the full back refs is generic, and can be
+ * used in all cases the implicit back refs is used. The major shortcoming
+ * of the full back refs is its overhead. Every time a tree block gets
+ * COWed, we have to update back refs entry for all pointers in it.
+ *
+ * For a newly allocated tree block, we use implicit back refs for
+ * pointers in it. This means most tree related operations only involve
+ * implicit back refs. For a tree block created in old transaction, the
+ * only way to drop a reference to it is COW it. So we can detect the
+ * event that tree block loses its owner tree's reference and do the
+ * back refs conversion.
+ *
+ * When a tree block is COW'd through a tree, there are four cases:
+ *
+ * The reference count of the block is one and the tree is the block's
+ * owner tree. Nothing to do in this case.
+ *
+ * The reference count of the block is one and the tree is not the
+ * block's owner tree. In this case, full back refs is used for pointers
+ * in the block. Remove these full back refs, add implicit back refs for
+ * every pointers in the new block.
+ *
+ * The reference count of the block is greater than one and the tree is
+ * the block's owner tree. In this case, implicit back refs is used for
+ * pointers in the block. Add full back refs for every pointers in the
+ * block, increase lower level extents' reference counts. The original
+ * implicit back refs are entailed to the new block.
+ *
+ * The reference count of the block is greater than one and the tree is
+ * not the block's owner tree. Add implicit back refs for every pointer in
+ * the new block, increase lower level extents' reference count.
+ *
+ * Back Reference Key composing:
+ *
+ * The key objectid corresponds to the first byte in the extent,
+ * The key type is used to differentiate between types of back refs.
+ * There are different meanings of the key offset for different types
+ * of back refs.
+ *
+ * File extents can be referenced by:
+ *
+ * - multiple snapshots, subvolumes, or different generations in one subvol
+ * - different files inside a single subvolume
+ * - different offsets inside a file (bookend extents in file.c)
+ *
+ * The extent ref structure for the implicit back refs has fields for:
+ *
+ * - Objectid of the subvolume root
+ * - objectid of the file holding the reference
+ * - original offset in the file
+ * - how many bookend extents
+ *
+ * The key offset for the implicit back refs is hash of the first
+ * three fields.
+ *
+ * The extent ref structure for the full back refs has field for:
+ *
+ * - number of pointers in the tree leaf
+ *
+ * The key offset for the implicit back refs is the first byte of
+ * the tree leaf
+ *
+ * When a file extent is allocated, The implicit back refs is used.
+ * the fields are filled in:
+ *
+ * (root_key.objectid, inode objectid, offset in file, 1)
+ *
+ * When a file extent is removed file truncation, we find the
+ * corresponding implicit back refs and check the following fields:
+ *
+ * (btrfs_header_owner(leaf), inode objectid, offset in file)
+ *
+ * Btree extents can be referenced by:
+ *
+ * - Different subvolumes
+ *
+ * Both the implicit back refs and the full back refs for tree blocks
+ * only consist of key. The key offset for the implicit back refs is
+ * objectid of block's owner tree. The key offset for the full back refs
+ * is the first byte of parent block.
+ *
+ * When implicit back refs is used, information about the lowest key and
+ * level of the tree block are required. These information are stored in
+ * tree block info structure.
+ */
+
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 owner, u32 extra_size)
+{
+ struct btrfs_extent_item *item;
+ struct btrfs_extent_item_v0 *ei0;
+ struct btrfs_extent_ref_v0 *ref0;
+ struct btrfs_tree_block_info *bi;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ u32 new_size = sizeof(*item);
+ u64 refs;
+ int ret;
+
+ leaf = path->nodes[0];
+ BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ ei0 = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_item_v0);
+ refs = btrfs_extent_refs_v0(leaf, ei0);
+
+ if (owner == (u64)-1) {
+ while (1) {
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ return ret;
+ BUG_ON(ret > 0);
+ leaf = path->nodes[0];
+ }
+ btrfs_item_key_to_cpu(leaf, &found_key,
+ path->slots[0]);
+ BUG_ON(key.objectid != found_key.objectid);
+ if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
+ path->slots[0]++;
+ continue;
+ }
+ ref0 = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_ref_v0);
+ owner = btrfs_ref_objectid_v0(leaf, ref0);
+ break;
+ }
+ }
+ btrfs_release_path(path);
+
+ if (owner < BTRFS_FIRST_FREE_OBJECTID)
+ new_size += sizeof(*bi);
+
+ new_size -= sizeof(*ei0);
+ ret = btrfs_search_slot(trans, root, &key, path, new_size, 1);
+ if (ret < 0)
+ return ret;
+ BUG_ON(ret);
+
+ ret = btrfs_extend_item(trans, root, path, new_size);
+ BUG_ON(ret);
+
+ leaf = path->nodes[0];
+ item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+ btrfs_set_extent_refs(leaf, item, refs);
+ /* FIXME: get real generation */
+ btrfs_set_extent_generation(leaf, item, 0);
+ if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+ btrfs_set_extent_flags(leaf, item,
+ BTRFS_EXTENT_FLAG_TREE_BLOCK |
+ BTRFS_BLOCK_FLAG_FULL_BACKREF);
+ bi = (struct btrfs_tree_block_info *)(item + 1);
+ /* FIXME: get first key of the block */
+ memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
+ btrfs_set_tree_block_level(leaf, bi, (int)owner);
+ } else {
+ btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
+ }
+ btrfs_mark_buffer_dirty(leaf);
+ return 0;
+}
+#endif
+
+static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
+{
+ u32 high_crc = ~(u32)0;
+ u32 low_crc = ~(u32)0;
+ __le64 lenum;
+
+ lenum = cpu_to_le64(root_objectid);
+ high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
+ lenum = cpu_to_le64(owner);
+ low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
+ lenum = cpu_to_le64(offset);
+ low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
+
+ return ((u64)high_crc << 31) ^ (u64)low_crc;
+}
+
+static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
+ struct btrfs_extent_data_ref *ref)
+{
+ return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
+ btrfs_extent_data_ref_objectid(leaf, ref),
+ btrfs_extent_data_ref_offset(leaf, ref));
+}
+
+static int match_extent_data_ref(struct extent_buffer *leaf,
+ struct btrfs_extent_data_ref *ref,
+ u64 root_objectid, u64 owner, u64 offset)
+{
+ if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
+ btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
+ btrfs_extent_data_ref_offset(leaf, ref) != offset)
+ return 0;
+ return 1;
+}
+
+static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 bytenr, u64 parent,
+ u64 root_objectid,
+ u64 owner, u64 offset)
+{
+ struct btrfs_key key;
+ struct btrfs_extent_data_ref *ref;
+ struct extent_buffer *leaf;
+ u32 nritems;
+ int ret;
+ int recow;
+ int err = -ENOENT;
+
+ key.objectid = bytenr;
+ if (parent) {
+ key.type = BTRFS_SHARED_DATA_REF_KEY;
+ key.offset = parent;
+ } else {
+ key.type = BTRFS_EXTENT_DATA_REF_KEY;
+ key.offset = hash_extent_data_ref(root_objectid,
+ owner, offset);
+ }
+again:
+ recow = 0;
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret < 0) {
+ err = ret;
+ goto fail;
+ }
+
+ if (parent) {
+ if (!ret)
+ return 0;
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ key.type = BTRFS_EXTENT_REF_V0_KEY;
+ btrfs_release_path(path);
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret < 0) {
+ err = ret;
+ goto fail;
+ }
+ if (!ret)
+ return 0;
+#endif
+ goto fail;
+ }
+
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+ while (1) {
+ if (path->slots[0] >= nritems) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ err = ret;
+ if (ret)
+ goto fail;
+
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+ recow = 1;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid != bytenr ||
+ key.type != BTRFS_EXTENT_DATA_REF_KEY)
+ goto fail;
+
+ ref = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_data_ref);
+
+ if (match_extent_data_ref(leaf, ref, root_objectid,
+ owner, offset)) {
+ if (recow) {
+ btrfs_release_path(path);
+ goto again;
+ }
+ err = 0;
+ break;
+ }
+ path->slots[0]++;
+ }
+fail:
+ return err;
+}
+
+static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 bytenr, u64 parent,
+ u64 root_objectid, u64 owner,
+ u64 offset, int refs_to_add)
+{
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ u32 size;
+ u32 num_refs;
+ int ret;
+
+ key.objectid = bytenr;
+ if (parent) {
+ key.type = BTRFS_SHARED_DATA_REF_KEY;
+ key.offset = parent;
+ size = sizeof(struct btrfs_shared_data_ref);
+ } else {
+ key.type = BTRFS_EXTENT_DATA_REF_KEY;
+ key.offset = hash_extent_data_ref(root_objectid,
+ owner, offset);
+ size = sizeof(struct btrfs_extent_data_ref);
+ }
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key, size);
+ if (ret && ret != -EEXIST)
+ goto fail;
+
+ leaf = path->nodes[0];
+ if (parent) {
+ struct btrfs_shared_data_ref *ref;
+ ref = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_shared_data_ref);
+ if (ret == 0) {
+ btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
+ } else {
+ num_refs = btrfs_shared_data_ref_count(leaf, ref);
+ num_refs += refs_to_add;
+ btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
+ }
+ } else {
+ struct btrfs_extent_data_ref *ref;
+ while (ret == -EEXIST) {
+ ref = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_data_ref);
+ if (match_extent_data_ref(leaf, ref, root_objectid,
+ owner, offset))
+ break;
+ btrfs_release_path(path);
+
+ key.offset++;
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ size);
+ if (ret && ret != -EEXIST)
+ goto fail;
+
+ leaf = path->nodes[0];
+ }
+ ref = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_data_ref);
+ if (ret == 0) {
+ btrfs_set_extent_data_ref_root(leaf, ref,
+ root_objectid);
+ btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
+ btrfs_set_extent_data_ref_offset(leaf, ref, offset);
+ btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
+ } else {
+ num_refs = btrfs_extent_data_ref_count(leaf, ref);
+ num_refs += refs_to_add;
+ btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
+ }
+ }
+ btrfs_mark_buffer_dirty(leaf);
+ ret = 0;
+fail:
+ btrfs_release_path(path);
+ return ret;
+}
+
+static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ int refs_to_drop)
+{
+ struct btrfs_key key;
+ struct btrfs_extent_data_ref *ref1 = NULL;
+ struct btrfs_shared_data_ref *ref2 = NULL;
+ struct extent_buffer *leaf;
+ u32 num_refs = 0;
+ int ret = 0;
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+ if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
+ ref1 = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_data_ref);
+ num_refs = btrfs_extent_data_ref_count(leaf, ref1);
+ } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
+ ref2 = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_shared_data_ref);
+ num_refs = btrfs_shared_data_ref_count(leaf, ref2);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
+ struct btrfs_extent_ref_v0 *ref0;
+ ref0 = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_ref_v0);
+ num_refs = btrfs_ref_count_v0(leaf, ref0);
+#endif
+ } else {
+ BUG();
+ }
+
+ BUG_ON(num_refs < refs_to_drop);
+ num_refs -= refs_to_drop;
+
+ if (num_refs == 0) {
+ ret = btrfs_del_item(trans, root, path);
+ } else {
+ if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
+ btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
+ else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
+ btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ else {
+ struct btrfs_extent_ref_v0 *ref0;
+ ref0 = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_ref_v0);
+ btrfs_set_ref_count_v0(leaf, ref0, num_refs);
+ }
+#endif
+ btrfs_mark_buffer_dirty(leaf);
+ }
+ return ret;
+}
+
+static noinline u32 extent_data_ref_count(struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_extent_inline_ref *iref)
+{
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct btrfs_extent_data_ref *ref1;
+ struct btrfs_shared_data_ref *ref2;
+ u32 num_refs = 0;
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (iref) {
+ if (btrfs_extent_inline_ref_type(leaf, iref) ==
+ BTRFS_EXTENT_DATA_REF_KEY) {
+ ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
+ num_refs = btrfs_extent_data_ref_count(leaf, ref1);
+ } else {
+ ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
+ num_refs = btrfs_shared_data_ref_count(leaf, ref2);
+ }
+ } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
+ ref1 = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_data_ref);
+ num_refs = btrfs_extent_data_ref_count(leaf, ref1);
+ } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
+ ref2 = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_shared_data_ref);
+ num_refs = btrfs_shared_data_ref_count(leaf, ref2);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
+ struct btrfs_extent_ref_v0 *ref0;
+ ref0 = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_ref_v0);
+ num_refs = btrfs_ref_count_v0(leaf, ref0);
+#endif
+ } else {
+ BUG();
+ }
+ return num_refs;
+}
+
+static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 bytenr, u64 parent,
+ u64 root_objectid)
+{
+ struct btrfs_key key;
+ int ret;
+
+ key.objectid = bytenr;
+ if (parent) {
+ key.type = BTRFS_SHARED_BLOCK_REF_KEY;
+ key.offset = parent;
+ } else {
+ key.type = BTRFS_TREE_BLOCK_REF_KEY;
+ key.offset = root_objectid;
+ }
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret > 0)
+ ret = -ENOENT;
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ if (ret == -ENOENT && parent) {
+ btrfs_release_path(path);
+ key.type = BTRFS_EXTENT_REF_V0_KEY;
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret > 0)
+ ret = -ENOENT;
+ }
+#endif
+ return ret;
+}
+
+static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 bytenr, u64 parent,
+ u64 root_objectid)
+{
+ struct btrfs_key key;
+ int ret;
+
+ key.objectid = bytenr;
+ if (parent) {
+ key.type = BTRFS_SHARED_BLOCK_REF_KEY;
+ key.offset = parent;
+ } else {
+ key.type = BTRFS_TREE_BLOCK_REF_KEY;
+ key.offset = root_objectid;
+ }
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+
+ btrfs_release_path(path);
+ return ret;
+}
+
+static inline int extent_ref_type(u64 parent, u64 owner)
+{
+ int type;
+ if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+ if (parent > 0)
+ type = BTRFS_SHARED_BLOCK_REF_KEY;
+ else
+ type = BTRFS_TREE_BLOCK_REF_KEY;
+ } else {
+ if (parent > 0)
+ type = BTRFS_SHARED_DATA_REF_KEY;
+ else
+ type = BTRFS_EXTENT_DATA_REF_KEY;
+ }
+ return type;
+}
+
+static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_extent_inline_ref **ref_ret,
+ u64 bytenr, u64 num_bytes,
+ u64 parent, u64 root_objectid,
+ u64 owner, u64 offset, int insert)
+{
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct btrfs_extent_item *ei;
+ struct btrfs_extent_inline_ref *iref;
+ u64 flags;
+ u32 item_size;
+ unsigned long ptr;
+ unsigned long end;
+ int extra_size;
+ int type;
+ int want;
+ int ret;
+ int err = 0;
+ int skinny_metadata =
+ btrfs_fs_incompat(root->fs_info,
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
+
+ key.objectid = bytenr;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = num_bytes;
+
+ want = extent_ref_type(parent, owner);
+ if (insert)
+ extra_size = btrfs_extent_inline_ref_size(want);
+ else
+ extra_size = -1;
+
+ if (owner < BTRFS_FIRST_FREE_OBJECTID && skinny_metadata) {
+ skinny_metadata = 1;
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ key.offset = owner;
+ } else if (skinny_metadata) {
+ skinny_metadata = 0;
+ }
+
+again:
+ ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
+ if (ret < 0) {
+ err = ret;
+ goto out;
+ }
+
+ /*
+ * We may be a newly converted file system which still has the old fat
+ * extent entries for metadata, so try and see if we have one of those.
+ */
+ if (ret > 0 && skinny_metadata) {
+ skinny_metadata = 0;
+ if (path->slots[0]) {
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(path->nodes[0], &key,
+ path->slots[0]);
+ if (key.objectid == bytenr &&
+ key.type == BTRFS_EXTENT_ITEM_KEY &&
+ key.offset == num_bytes)
+ ret = 0;
+ }
+ if (ret) {
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = num_bytes;
+ btrfs_release_path(path);
+ goto again;
+ }
+ }
+
+ if (ret) {
+ printf("Failed to find [%llu, %u, %llu]\n", key.objectid, key.type, key.offset);
+ return -ENOENT;
+ }
+
+ BUG_ON(ret);
+
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ if (item_size < sizeof(*ei)) {
+ if (!insert) {
+ err = -ENOENT;
+ goto out;
+ }
+ ret = convert_extent_item_v0(trans, root, path, owner,
+ extra_size);
+ if (ret < 0) {
+ err = ret;
+ goto out;
+ }
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ }
+#endif
+ if (item_size < sizeof(*ei)) {
+ printf("Size is %u, needs to be %u, slot %d\n",
+ (unsigned)item_size,
+ (unsigned)sizeof(*ei), path->slots[0]);
+ btrfs_print_leaf(root, leaf);
+ return -EINVAL;
+ }
+ BUG_ON(item_size < sizeof(*ei));
+
+ ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+ flags = btrfs_extent_flags(leaf, ei);
+
+ ptr = (unsigned long)(ei + 1);
+ end = (unsigned long)ei + item_size;
+
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
+ ptr += sizeof(struct btrfs_tree_block_info);
+ BUG_ON(ptr > end);
+ } else if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
+ if (!(flags & BTRFS_EXTENT_FLAG_DATA)) {
+ return -EIO;
+ }
+ }
+
+ err = -ENOENT;
+ while (1) {
+ if (ptr >= end) {
+ WARN_ON(ptr > end);
+ break;
+ }
+ iref = (struct btrfs_extent_inline_ref *)ptr;
+ type = btrfs_extent_inline_ref_type(leaf, iref);
+ if (want < type)
+ break;
+ if (want > type) {
+ ptr += btrfs_extent_inline_ref_size(type);
+ continue;
+ }
+
+ if (type == BTRFS_EXTENT_DATA_REF_KEY) {
+ struct btrfs_extent_data_ref *dref;
+ dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+ if (match_extent_data_ref(leaf, dref, root_objectid,
+ owner, offset)) {
+ err = 0;
+ break;
+ }
+ if (hash_extent_data_ref_item(leaf, dref) <
+ hash_extent_data_ref(root_objectid, owner, offset))
+ break;
+ } else {
+ u64 ref_offset;
+ ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
+ if (parent > 0) {
+ if (parent == ref_offset) {
+ err = 0;
+ break;
+ }
+ if (ref_offset < parent)
+ break;
+ } else {
+ if (root_objectid == ref_offset) {
+ err = 0;
+ break;
+ }
+ if (ref_offset < root_objectid)
+ break;
+ }
+ }
+ ptr += btrfs_extent_inline_ref_size(type);
+ }
+ if (err == -ENOENT && insert) {
+ if (item_size + extra_size >=
+ BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
+ err = -EAGAIN;
+ goto out;
+ }
+ /*
+ * To add new inline back ref, we have to make sure
+ * there is no corresponding back ref item.
+ * For simplicity, we just do not add new inline back
+ * ref if there is any back ref item.
+ */
+ if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
+ key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
+ err = -EAGAIN;
+ goto out;
+ }
+ }
+ *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
+out:
+ return err;
+}
+
+static int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_extent_inline_ref *iref,
+ u64 parent, u64 root_objectid,
+ u64 owner, u64 offset, int refs_to_add)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_extent_item *ei;
+ unsigned long ptr;
+ unsigned long end;
+ unsigned long item_offset;
+ u64 refs;
+ int size;
+ int type;
+ int ret;
+
+ leaf = path->nodes[0];
+ ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+ item_offset = (unsigned long)iref - (unsigned long)ei;
+
+ type = extent_ref_type(parent, owner);
+ size = btrfs_extent_inline_ref_size(type);
+
+ ret = btrfs_extend_item(trans, root, path, size);
+ BUG_ON(ret);
+
+ ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+ refs = btrfs_extent_refs(leaf, ei);
+ refs += refs_to_add;
+ btrfs_set_extent_refs(leaf, ei, refs);
+
+ ptr = (unsigned long)ei + item_offset;
+ end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
+ if (ptr < end - size)
+ memmove_extent_buffer(leaf, ptr + size, ptr,
+ end - size - ptr);
+
+ iref = (struct btrfs_extent_inline_ref *)ptr;
+ btrfs_set_extent_inline_ref_type(leaf, iref, type);
+ if (type == BTRFS_EXTENT_DATA_REF_KEY) {
+ struct btrfs_extent_data_ref *dref;
+ dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+ btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
+ btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
+ btrfs_set_extent_data_ref_offset(leaf, dref, offset);
+ btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
+ } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
+ struct btrfs_shared_data_ref *sref;
+ sref = (struct btrfs_shared_data_ref *)(iref + 1);
+ btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
+ btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
+ } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
+ btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
+ } else {
+ btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
+ }
+ btrfs_mark_buffer_dirty(leaf);
+ return 0;
+}
+
+static int lookup_extent_backref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_extent_inline_ref **ref_ret,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 owner, u64 offset)
+{
+ int ret;
+
+ ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
+ bytenr, num_bytes, parent,
+ root_objectid, owner, offset, 0);
+ if (ret != -ENOENT)
+ return ret;
+
+ btrfs_release_path(path);
+ *ref_ret = NULL;
+
+ if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+ ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
+ root_objectid);
+ } else {
+ ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
+ root_objectid, owner, offset);
+ }
+ return ret;
+}
+
+static int update_inline_extent_backref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_extent_inline_ref *iref,
+ int refs_to_mod)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_extent_item *ei;
+ struct btrfs_extent_data_ref *dref = NULL;
+ struct btrfs_shared_data_ref *sref = NULL;
+ unsigned long ptr;
+ unsigned long end;
+ u32 item_size;
+ int size;
+ int type;
+ int ret;
+ u64 refs;
+
+ leaf = path->nodes[0];
+ ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+ refs = btrfs_extent_refs(leaf, ei);
+ WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
+ refs += refs_to_mod;
+ btrfs_set_extent_refs(leaf, ei, refs);
+
+ type = btrfs_extent_inline_ref_type(leaf, iref);
+
+ if (type == BTRFS_EXTENT_DATA_REF_KEY) {
+ dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+ refs = btrfs_extent_data_ref_count(leaf, dref);
+ } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
+ sref = (struct btrfs_shared_data_ref *)(iref + 1);
+ refs = btrfs_shared_data_ref_count(leaf, sref);
+ } else {
+ refs = 1;
+ BUG_ON(refs_to_mod != -1);
+ }
+
+ BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
+ refs += refs_to_mod;
+
+ if (refs > 0) {
+ if (type == BTRFS_EXTENT_DATA_REF_KEY)
+ btrfs_set_extent_data_ref_count(leaf, dref, refs);
+ else
+ btrfs_set_shared_data_ref_count(leaf, sref, refs);
+ } else {
+ size = btrfs_extent_inline_ref_size(type);
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ ptr = (unsigned long)iref;
+ end = (unsigned long)ei + item_size;
+ if (ptr + size < end)
+ memmove_extent_buffer(leaf, ptr, ptr + size,
+ end - ptr - size);
+ item_size -= size;
+ ret = btrfs_truncate_item(trans, root, path, item_size, 1);
+ BUG_ON(ret);
+ }
+ btrfs_mark_buffer_dirty(leaf);
+ return 0;
+}
+
+static int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 owner,
+ u64 offset, int refs_to_add)
+{
+ struct btrfs_extent_inline_ref *iref;
+ int ret;
+
+ ret = lookup_inline_extent_backref(trans, root, path, &iref,
+ bytenr, num_bytes, parent,
+ root_objectid, owner, offset, 1);
+ if (ret == 0) {
+ BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
+ ret = update_inline_extent_backref(trans, root, path, iref,
+ refs_to_add);
+ } else if (ret == -ENOENT) {
+ ret = setup_inline_extent_backref(trans, root, path, iref,
+ parent, root_objectid,
+ owner, offset, refs_to_add);
+ }
+ return ret;
+}
+
+static int insert_extent_backref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 bytenr, u64 parent, u64 root_objectid,
+ u64 owner, u64 offset, int refs_to_add)
+{
+ int ret;
+
+ if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
+ ret = insert_extent_data_ref(trans, root, path, bytenr,
+ parent, root_objectid,
+ owner, offset, refs_to_add);
+ } else {
+ BUG_ON(refs_to_add != 1);
+ ret = insert_tree_block_ref(trans, root, path, bytenr,
+ parent, root_objectid);
+ }
+ return ret;
+}
+
+static int remove_extent_backref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_extent_inline_ref *iref,
+ int refs_to_drop, int is_data)
+{
+ int ret;
+
+ BUG_ON(!is_data && refs_to_drop != 1);
+ if (iref) {
+ ret = update_inline_extent_backref(trans, root, path, iref,
+ -refs_to_drop);
+ } else if (is_data) {
+ ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
+ } else {
+ ret = btrfs_del_item(trans, root, path);
+ }
+ return ret;
+}
+
+int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 owner, u64 offset)
+{
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_extent_item *item;
+ u64 refs;
+ int ret;
+ int err = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ path->reada = 1;
+
+ ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
+ path, bytenr, num_bytes, parent,
+ root_objectid, owner, offset, 1);
+ if (ret == 0)
+ goto out;
+
+ if (ret != -EAGAIN) {
+ err = ret;
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+ refs = btrfs_extent_refs(leaf, item);
+ btrfs_set_extent_refs(leaf, item, refs + 1);
+
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(path);
+
+ path->reada = 1;
+
+ /* now insert the actual backref */
+ ret = insert_extent_backref(trans, root->fs_info->extent_root,
+ path, bytenr, parent, root_objectid,
+ owner, offset, 1);
+ if (ret)
+ err = ret;
+out:
+ btrfs_free_path(path);
+ finish_current_insert(trans, root->fs_info->extent_root);
+ del_pending_extents(trans, root->fs_info->extent_root);
+ BUG_ON(err);
+ return err;
+}
+
+int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ finish_current_insert(trans, root->fs_info->extent_root);
+ del_pending_extents(trans, root->fs_info->extent_root);
+ return 0;
+}
+
+int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr,
+ u64 offset, int metadata, u64 *refs, u64 *flags)
+{
+ struct btrfs_path *path;
+ int ret;
+ struct btrfs_key key;
+ struct extent_buffer *l;
+ struct btrfs_extent_item *item;
+ u32 item_size;
+ u64 num_refs;
+ u64 extent_flags;
+
+ if (metadata &&
+ !btrfs_fs_incompat(root->fs_info,
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) {
+ offset = root->leafsize;
+ metadata = 0;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->reada = 1;
+
+ key.objectid = bytenr;
+ key.offset = offset;
+ if (metadata)
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ else
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+
+again:
+ ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
+ 0, 0);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * Deal with the fact that we may have mixed SKINNY and normal refs. If
+ * we didn't find what we wanted check and see if we have a normal ref
+ * right next to us, or re-search if we are on the edge of the leaf just
+ * to make sure.
+ */
+ if (ret > 0 && metadata) {
+ if (path->slots[0]) {
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(path->nodes[0], &key,
+ path->slots[0]);
+ if (key.objectid == bytenr &&
+ key.type == BTRFS_EXTENT_ITEM_KEY &&
+ key.offset == root->leafsize)
+ ret = 0;
+ }
+
+ if (ret) {
+ btrfs_release_path(path);
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = root->leafsize;
+ metadata = 0;
+ goto again;
+ }
+ }
+
+ if (ret != 0) {
+ ret = -EIO;
+ goto out;
+ }
+
+ l = path->nodes[0];
+ item_size = btrfs_item_size_nr(l, path->slots[0]);
+ if (item_size >= sizeof(*item)) {
+ item = btrfs_item_ptr(l, path->slots[0],
+ struct btrfs_extent_item);
+ num_refs = btrfs_extent_refs(l, item);
+ extent_flags = btrfs_extent_flags(l, item);
+ } else {
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ struct btrfs_extent_item_v0 *ei0;
+ BUG_ON(item_size != sizeof(*ei0));
+ ei0 = btrfs_item_ptr(l, path->slots[0],
+ struct btrfs_extent_item_v0);
+ num_refs = btrfs_extent_refs_v0(l, ei0);
+ /* FIXME: this isn't correct for data */
+ extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
+#else
+ BUG();
+#endif
+ }
+ item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
+ if (refs)
+ *refs = num_refs;
+ if (flags)
+ *flags = extent_flags;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+int btrfs_set_block_flags(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, int level, u64 flags)
+{
+ struct btrfs_path *path;
+ int ret;
+ struct btrfs_key key;
+ struct extent_buffer *l;
+ struct btrfs_extent_item *item;
+ u32 item_size;
+ int skinny_metadata =
+ btrfs_fs_incompat(root->fs_info,
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->reada = 1;
+
+ key.objectid = bytenr;
+ if (skinny_metadata) {
+ key.offset = level;
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ } else {
+ key.offset = root->leafsize;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ }
+
+again:
+ ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
+ 0, 0);
+ if (ret < 0)
+ goto out;
+
+ if (ret > 0 && skinny_metadata) {
+ skinny_metadata = 0;
+ if (path->slots[0]) {
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(path->nodes[0], &key,
+ path->slots[0]);
+ if (key.objectid == bytenr &&
+ key.offset == root->leafsize &&
+ key.type == BTRFS_EXTENT_ITEM_KEY)
+ ret = 0;
+ }
+ if (ret) {
+ btrfs_release_path(path);
+ key.offset = root->leafsize;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ goto again;
+ }
+ }
+
+ if (ret != 0) {
+ btrfs_print_leaf(root, path->nodes[0]);
+ printk("failed to find block number %Lu\n",
+ (unsigned long long)bytenr);
+ BUG();
+ }
+ l = path->nodes[0];
+ item_size = btrfs_item_size_nr(l, path->slots[0]);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ if (item_size < sizeof(*item)) {
+ ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
+ path, (u64)-1, 0);
+ if (ret < 0)
+ goto out;
+
+ l = path->nodes[0];
+ item_size = btrfs_item_size_nr(l, path->slots[0]);
+ }
+#endif
+ BUG_ON(item_size < sizeof(*item));
+ item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
+ flags |= btrfs_extent_flags(l, item);
+ btrfs_set_extent_flags(l, item, flags);
+out:
+ btrfs_free_path(path);
+ finish_current_insert(trans, root->fs_info->extent_root);
+ del_pending_extents(trans, root->fs_info->extent_root);
+ return ret;
+}
+
+static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *buf,
+ int record_parent, int inc)
+{
+ u64 bytenr;
+ u64 num_bytes;
+ u64 parent;
+ u64 ref_root;
+ u32 nritems;
+ struct btrfs_key key;
+ struct btrfs_file_extent_item *fi;
+ int i;
+ int level;
+ int ret = 0;
+ int (*process_func)(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64, u64, u64, u64, u64, u64);
+
+ ref_root = btrfs_header_owner(buf);
+ nritems = btrfs_header_nritems(buf);
+ level = btrfs_header_level(buf);
+
+ if (!root->ref_cows && level == 0)
+ return 0;
+
+ if (inc)
+ process_func = btrfs_inc_extent_ref;
+ else
+ process_func = btrfs_free_extent;
+
+ if (record_parent)
+ parent = buf->start;
+ else
+ parent = 0;
+
+ for (i = 0; i < nritems; i++) {
+ cond_resched();
+ if (level == 0) {
+ btrfs_item_key_to_cpu(buf, &key, i);
+ if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
+ continue;
+ fi = btrfs_item_ptr(buf, i,
+ struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(buf, fi) ==
+ BTRFS_FILE_EXTENT_INLINE)
+ continue;
+ bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
+ if (bytenr == 0)
+ continue;
+
+ num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
+ key.offset -= btrfs_file_extent_offset(buf, fi);
+ ret = process_func(trans, root, bytenr, num_bytes,
+ parent, ref_root, key.objectid,
+ key.offset);
+ if (ret) {
+ WARN_ON(1);
+ goto fail;
+ }
+ } else {
+ bytenr = btrfs_node_blockptr(buf, i);
+ num_bytes = btrfs_level_size(root, level - 1);
+ ret = process_func(trans, root, bytenr, num_bytes,
+ parent, ref_root, level - 1, 0);
+ if (ret) {
+ WARN_ON(1);
+ goto fail;
+ }
+ }
+ }
+ return 0;
+fail:
+ WARN_ON(1);
+ return ret;
+}
+
+int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct extent_buffer *buf, int record_parent)
+{
+ return __btrfs_mod_ref(trans, root, buf, record_parent, 1);
+}
+
+int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct extent_buffer *buf, int record_parent)
+{
+ return __btrfs_mod_ref(trans, root, buf, record_parent, 0);
+}
+
+static int write_one_cache_group(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_block_group_cache *cache)
+{
+ int ret;
+ int pending_ret;
+ struct btrfs_root *extent_root = root->fs_info->extent_root;
+ unsigned long bi;
+ struct extent_buffer *leaf;
+
+ ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
+ if (ret < 0)
+ goto fail;
+ BUG_ON(ret);
+
+ leaf = path->nodes[0];
+ bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(path);
+fail:
+ finish_current_insert(trans, extent_root);
+ pending_ret = del_pending_extents(trans, extent_root);
+ if (ret)
+ return ret;
+ if (pending_ret)
+ return pending_ret;
+ return 0;
+
+}
+
+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct extent_io_tree *block_group_cache;
+ struct btrfs_block_group_cache *cache;
+ int ret;
+ struct btrfs_path *path;
+ u64 last = 0;
+ u64 start;
+ u64 end;
+ u64 ptr;
+
+ block_group_cache = &root->fs_info->block_group_cache;
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ while(1) {
+ ret = find_first_extent_bit(block_group_cache, last,
+ &start, &end, BLOCK_GROUP_DIRTY);
+ if (ret) {
+ if (last == 0)
+ break;
+ last = 0;
+ continue;
+ }
+
+ last = end + 1;
+ ret = get_state_private(block_group_cache, start, &ptr);
+ BUG_ON(ret);
+
+ clear_extent_bits(block_group_cache, start, end,
+ BLOCK_GROUP_DIRTY, GFP_NOFS);
+
+ cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
+ ret = write_one_cache_group(trans, root, path, cache);
+ }
+ btrfs_free_path(path);
+ return 0;
+}
+
+static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
+ u64 flags)
+{
+ struct btrfs_space_info *found;
+
+ flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
+
+ list_for_each_entry(found, &info->space_info, list) {
+ if (found->flags & flags)
+ return found;
+ }
+ return NULL;
+
+}
+
+static int free_space_info(struct btrfs_fs_info *fs_info, u64 flags,
+ u64 total_bytes, u64 bytes_used,
+ struct btrfs_space_info **space_info)
+{
+ struct btrfs_space_info *found;
+
+ /* only support free block group which is empty */
+ if (bytes_used)
+ return -ENOTEMPTY;
+
+ found = __find_space_info(fs_info, flags);
+ if (!found)
+ return -ENOENT;
+ if (found->total_bytes < total_bytes) {
+ fprintf(stderr,
+ "WARNING: bad space info to free %llu only have %llu\n",
+ total_bytes, found->total_bytes);
+ return -EINVAL;
+ }
+ found->total_bytes -= total_bytes;
+ if (space_info)
+ *space_info = found;
+ return 0;
+}
+
+static int update_space_info(struct btrfs_fs_info *info, u64 flags,
+ u64 total_bytes, u64 bytes_used,
+ struct btrfs_space_info **space_info)
+{
+ struct btrfs_space_info *found;
+
+ found = __find_space_info(info, flags);
+ if (found) {
+ found->total_bytes += total_bytes;
+ found->bytes_used += bytes_used;
+ if (found->total_bytes < found->bytes_used) {
+ fprintf(stderr, "warning, bad space info total_bytes "
+ "%llu used %llu\n",
+ (unsigned long long)found->total_bytes,
+ (unsigned long long)found->bytes_used);
+ }
+ *space_info = found;
+ return 0;
+ }
+ found = kmalloc(sizeof(*found), GFP_NOFS);
+ if (!found)
+ return -ENOMEM;
+
+ list_add(&found->list, &info->space_info);
+ found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
+ found->total_bytes = total_bytes;
+ found->bytes_used = bytes_used;
+ found->bytes_pinned = 0;
+ found->full = 0;
+ *space_info = found;
+ return 0;
+}
+
+
+static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
+{
+ u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
+ BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10 |
+ BTRFS_BLOCK_GROUP_RAID5 |
+ BTRFS_BLOCK_GROUP_RAID6 |
+ BTRFS_BLOCK_GROUP_DUP);
+ if (extra_flags) {
+ if (flags & BTRFS_BLOCK_GROUP_DATA)
+ fs_info->avail_data_alloc_bits |= extra_flags;
+ if (flags & BTRFS_BLOCK_GROUP_METADATA)
+ fs_info->avail_metadata_alloc_bits |= extra_flags;
+ if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+ fs_info->avail_system_alloc_bits |= extra_flags;
+ }
+}
+
+static int do_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *extent_root, u64 alloc_bytes,
+ u64 flags)
+{
+ struct btrfs_space_info *space_info;
+ u64 thresh;
+ u64 start;
+ u64 num_bytes;
+ int ret;
+
+ space_info = __find_space_info(extent_root->fs_info, flags);
+ if (!space_info) {
+ ret = update_space_info(extent_root->fs_info, flags,
+ 0, 0, &space_info);
+ BUG_ON(ret);
+ }
+ BUG_ON(!space_info);
+
+ if (space_info->full)
+ return 0;
+
+ thresh = div_factor(space_info->total_bytes, 7);
+ if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
+ thresh)
+ return 0;
+
+ ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes,
+ space_info->flags);
+ if (ret == -ENOSPC) {
+ space_info->full = 1;
+ return 0;
+ }
+
+ BUG_ON(ret);
+
+ ret = btrfs_make_block_group(trans, extent_root, 0, space_info->flags,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
+ BUG_ON(ret);
+ return 0;
+}
+
+static int update_block_group(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, int alloc,
+ int mark_free)
+{
+ struct btrfs_block_group_cache *cache;
+ struct btrfs_fs_info *info = root->fs_info;
+ u64 total = num_bytes;
+ u64 old_val;
+ u64 byte_in_group;
+ u64 start;
+ u64 end;
+
+ /* block accounting for super block */
+ old_val = btrfs_super_bytes_used(info->super_copy);
+ if (alloc)
+ old_val += num_bytes;
+ else
+ old_val -= num_bytes;
+ btrfs_set_super_bytes_used(info->super_copy, old_val);
+
+ /* block accounting for root item */
+ old_val = btrfs_root_used(&root->root_item);
+ if (alloc)
+ old_val += num_bytes;
+ else
+ old_val -= num_bytes;
+ btrfs_set_root_used(&root->root_item, old_val);
+
+ while(total) {
+ cache = btrfs_lookup_block_group(info, bytenr);
+ if (!cache) {
+ return -1;
+ }
+ byte_in_group = bytenr - cache->key.objectid;
+ WARN_ON(byte_in_group > cache->key.offset);
+ start = cache->key.objectid;
+ end = start + cache->key.offset - 1;
+ set_extent_bits(&info->block_group_cache, start, end,
+ BLOCK_GROUP_DIRTY, GFP_NOFS);
+
+ old_val = btrfs_block_group_used(&cache->item);
+ num_bytes = min(total, cache->key.offset - byte_in_group);
+
+ if (alloc) {
+ old_val += num_bytes;
+ cache->space_info->bytes_used += num_bytes;
+ } else {
+ old_val -= num_bytes;
+ cache->space_info->bytes_used -= num_bytes;
+ if (mark_free) {
+ set_extent_dirty(&info->free_space_cache,
+ bytenr, bytenr + num_bytes - 1,
+ GFP_NOFS);
+ }
+ }
+ btrfs_set_block_group_used(&cache->item, old_val);
+ total -= num_bytes;
+ bytenr += num_bytes;
+ }
+ return 0;
+}
+
+static int update_pinned_extents(struct btrfs_root *root,
+ u64 bytenr, u64 num, int pin)
+{
+ u64 len;
+ struct btrfs_block_group_cache *cache;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+
+ if (pin) {
+ set_extent_dirty(&fs_info->pinned_extents,
+ bytenr, bytenr + num - 1, GFP_NOFS);
+ } else {
+ clear_extent_dirty(&fs_info->pinned_extents,
+ bytenr, bytenr + num - 1, GFP_NOFS);
+ }
+ while (num > 0) {
+ cache = btrfs_lookup_block_group(fs_info, bytenr);
+ if (!cache) {
+ len = min((u64)root->sectorsize, num);
+ goto next;
+ }
+ WARN_ON(!cache);
+ len = min(num, cache->key.offset -
+ (bytenr - cache->key.objectid));
+ if (pin) {
+ cache->pinned += len;
+ cache->space_info->bytes_pinned += len;
+ fs_info->total_pinned += len;
+ } else {
+ cache->pinned -= len;
+ cache->space_info->bytes_pinned -= len;
+ fs_info->total_pinned -= len;
+ }
+next:
+ bytenr += len;
+ num -= len;
+ }
+ return 0;
+}
+
+int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_io_tree *unpin)
+{
+ u64 start;
+ u64 end;
+ int ret;
+ struct extent_io_tree *free_space_cache;
+ free_space_cache = &root->fs_info->free_space_cache;
+
+ while(1) {
+ ret = find_first_extent_bit(unpin, 0, &start, &end,
+ EXTENT_DIRTY);
+ if (ret)
+ break;
+ update_pinned_extents(root, start, end + 1 - start, 0);
+ clear_extent_dirty(unpin, start, end, GFP_NOFS);
+ set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
+ }
+ return 0;
+}
+
+static int extent_root_pending_ops(struct btrfs_fs_info *info)
+{
+ u64 start;
+ u64 end;
+ int ret;
+
+ ret = find_first_extent_bit(&info->extent_ins, 0, &start,
+ &end, EXTENT_LOCKED);
+ if (!ret) {
+ ret = find_first_extent_bit(&info->pending_del, 0, &start, &end,
+ EXTENT_LOCKED);
+ }
+ return ret == 0;
+
+}
+static int finish_current_insert(struct btrfs_trans_handle *trans,
+ struct btrfs_root *extent_root)
+{
+ u64 start;
+ u64 end;
+ u64 priv;
+ struct btrfs_fs_info *info = extent_root->fs_info;
+ struct pending_extent_op *extent_op;
+ struct btrfs_key key;
+ int ret;
+ int skinny_metadata =
+ btrfs_fs_incompat(extent_root->fs_info,
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
+
+ while(1) {
+ ret = find_first_extent_bit(&info->extent_ins, 0, &start,
+ &end, EXTENT_LOCKED);
+ if (ret)
+ break;
+
+ ret = get_state_private(&info->extent_ins, start, &priv);
+ BUG_ON(ret);
+ extent_op = (struct pending_extent_op *)(unsigned long)priv;
+
+ if (extent_op->type == PENDING_EXTENT_INSERT) {
+ key.objectid = start;
+ if (skinny_metadata) {
+ key.offset = extent_op->level;
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ } else {
+ key.offset = extent_op->num_bytes;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ }
+ ret = alloc_reserved_tree_block(trans, extent_root,
+ extent_root->root_key.objectid,
+ trans->transid,
+ extent_op->flags,
+ &extent_op->key,
+ extent_op->level, &key);
+ BUG_ON(ret);
+ } else {
+ BUG_ON(1);
+ }
+
+ clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
+ GFP_NOFS);
+ kfree(extent_op);
+ }
+ return 0;
+}
+
+static int pin_down_bytes(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, int is_data)
+{
+ int err = 0;
+ struct extent_buffer *buf;
+
+ if (is_data)
+ goto pinit;
+
+ buf = btrfs_find_tree_block(root, bytenr, num_bytes);
+ if (!buf)
+ goto pinit;
+
+ /* we can reuse a block if it hasn't been written
+ * and it is from this transaction. We can't
+ * reuse anything from the tree log root because
+ * it has tiny sub-transactions.
+ */
+ if (btrfs_buffer_uptodate(buf, 0)) {
+ u64 header_owner = btrfs_header_owner(buf);
+ u64 header_transid = btrfs_header_generation(buf);
+ if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
+ header_transid == trans->transid &&
+ !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
+ clean_tree_block(NULL, root, buf);
+ free_extent_buffer(buf);
+ return 1;
+ }
+ }
+ free_extent_buffer(buf);
+pinit:
+ update_pinned_extents(root, bytenr, num_bytes, 1);
+
+ BUG_ON(err < 0);
+ return 0;
+}
+
+void btrfs_pin_extent(struct btrfs_fs_info *fs_info,
+ u64 bytenr, u64 num_bytes)
+{
+ update_pinned_extents(fs_info->extent_root, bytenr, num_bytes, 1);
+}
+
+void btrfs_unpin_extent(struct btrfs_fs_info *fs_info,
+ u64 bytenr, u64 num_bytes)
+{
+ update_pinned_extents(fs_info->extent_root, bytenr, num_bytes, 0);
+}
+
+/*
+ * remove an extent from the root, returns 0 on success
+ */
+static int __free_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 owner_objectid,
+ u64 owner_offset, int refs_to_drop)
+{
+
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ struct btrfs_extent_ops *ops = root->fs_info->extent_ops;
+ struct btrfs_root *extent_root = root->fs_info->extent_root;
+ struct extent_buffer *leaf;
+ struct btrfs_extent_item *ei;
+ struct btrfs_extent_inline_ref *iref;
+ int ret;
+ int is_data;
+ int extent_slot = 0;
+ int found_extent = 0;
+ int num_to_del = 1;
+ u32 item_size;
+ u64 refs;
+ int skinny_metadata =
+ btrfs_fs_incompat(extent_root->fs_info,
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
+
+ if (root->fs_info->free_extent_hook) {
+ root->fs_info->free_extent_hook(trans, root, bytenr, num_bytes,
+ parent, root_objectid, owner_objectid,
+ owner_offset, refs_to_drop);
+
+ }
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ path->reada = 1;
+
+ is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
+ if (is_data)
+ skinny_metadata = 0;
+ BUG_ON(!is_data && refs_to_drop != 1);
+
+ ret = lookup_extent_backref(trans, extent_root, path, &iref,
+ bytenr, num_bytes, parent,
+ root_objectid, owner_objectid,
+ owner_offset);
+ if (ret == 0) {
+ extent_slot = path->slots[0];
+ while (extent_slot >= 0) {
+ btrfs_item_key_to_cpu(path->nodes[0], &key,
+ extent_slot);
+ if (key.objectid != bytenr)
+ break;
+ if (key.type == BTRFS_EXTENT_ITEM_KEY &&
+ key.offset == num_bytes) {
+ found_extent = 1;
+ break;
+ }
+ if (key.type == BTRFS_METADATA_ITEM_KEY &&
+ key.offset == owner_objectid) {
+ found_extent = 1;
+ break;
+ }
+ if (path->slots[0] - extent_slot > 5)
+ break;
+ extent_slot--;
+ }
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
+ if (found_extent && item_size < sizeof(*ei))
+ found_extent = 0;
+#endif
+ if (!found_extent) {
+ BUG_ON(iref);
+ ret = remove_extent_backref(trans, extent_root, path,
+ NULL, refs_to_drop,
+ is_data);
+ BUG_ON(ret);
+ btrfs_release_path(path);
+
+ key.objectid = bytenr;
+
+ if (skinny_metadata) {
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ key.offset = owner_objectid;
+ } else {
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = num_bytes;
+ }
+
+ ret = btrfs_search_slot(trans, extent_root,
+ &key, path, -1, 1);
+ if (ret > 0 && skinny_metadata && path->slots[0]) {
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(path->nodes[0],
+ &key,
+ path->slots[0]);
+ if (key.objectid == bytenr &&
+ key.type == BTRFS_EXTENT_ITEM_KEY &&
+ key.offset == num_bytes)
+ ret = 0;
+ }
+
+ if (ret > 0 && skinny_metadata) {
+ skinny_metadata = 0;
+ btrfs_release_path(path);
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = num_bytes;
+ ret = btrfs_search_slot(trans, extent_root,
+ &key, path, -1, 1);
+ }
+
+ if (ret) {
+ printk(KERN_ERR "umm, got %d back from search"
+ ", was looking for %llu\n", ret,
+ (unsigned long long)bytenr);
+ btrfs_print_leaf(extent_root, path->nodes[0]);
+ }
+ BUG_ON(ret);
+ extent_slot = path->slots[0];
+ }
+ } else {
+ printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
+ "parent %llu root %llu owner %llu offset %llu\n",
+ (unsigned long long)bytenr,
+ (unsigned long long)parent,
+ (unsigned long long)root_objectid,
+ (unsigned long long)owner_objectid,
+ (unsigned long long)owner_offset);
+ ret = -EIO;
+ goto fail;
+ }
+
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, extent_slot);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ if (item_size < sizeof(*ei)) {
+ BUG_ON(found_extent || extent_slot != path->slots[0]);
+ ret = convert_extent_item_v0(trans, extent_root, path,
+ owner_objectid, 0);
+ BUG_ON(ret < 0);
+
+ btrfs_release_path(path);
+
+ key.objectid = bytenr;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = num_bytes;
+
+ ret = btrfs_search_slot(trans, extent_root, &key, path,
+ -1, 1);
+ if (ret) {
+ printk(KERN_ERR "umm, got %d back from search"
+ ", was looking for %llu\n", ret,
+ (unsigned long long)bytenr);
+ btrfs_print_leaf(extent_root, path->nodes[0]);
+ }
+ BUG_ON(ret);
+ extent_slot = path->slots[0];
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, extent_slot);
+ }
+#endif
+ BUG_ON(item_size < sizeof(*ei));
+ ei = btrfs_item_ptr(leaf, extent_slot,
+ struct btrfs_extent_item);
+ if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
+ key.type == BTRFS_EXTENT_ITEM_KEY) {
+ struct btrfs_tree_block_info *bi;
+ BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
+ bi = (struct btrfs_tree_block_info *)(ei + 1);
+ WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
+ }
+
+ refs = btrfs_extent_refs(leaf, ei);
+ BUG_ON(refs < refs_to_drop);
+ refs -= refs_to_drop;
+
+ if (refs > 0) {
+ /*
+ * In the case of inline back ref, reference count will
+ * be updated by remove_extent_backref
+ */
+ if (iref) {
+ BUG_ON(!found_extent);
+ } else {
+ btrfs_set_extent_refs(leaf, ei, refs);
+ btrfs_mark_buffer_dirty(leaf);
+ }
+ if (found_extent) {
+ ret = remove_extent_backref(trans, extent_root, path,
+ iref, refs_to_drop,
+ is_data);
+ BUG_ON(ret);
+ }
+ } else {
+ int mark_free = 0;
+ int pin = 1;
+
+ if (found_extent) {
+ BUG_ON(is_data && refs_to_drop !=
+ extent_data_ref_count(root, path, iref));
+ if (iref) {
+ BUG_ON(path->slots[0] != extent_slot);
+ } else {
+ BUG_ON(path->slots[0] != extent_slot + 1);
+ path->slots[0] = extent_slot;
+ num_to_del = 2;
+ }
+ }
+
+ if (ops && ops->free_extent) {
+ ret = ops->free_extent(root, bytenr, num_bytes);
+ if (ret > 0) {
+ pin = 0;
+ mark_free = 0;
+ }
+ }
+
+ if (pin) {
+ ret = pin_down_bytes(trans, root, bytenr, num_bytes,
+ is_data);
+ if (ret > 0)
+ mark_free = 1;
+ BUG_ON(ret < 0);
+ }
+
+ ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
+ num_to_del);
+ BUG_ON(ret);
+ btrfs_release_path(path);
+
+ if (is_data) {
+ ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
+ BUG_ON(ret);
+ }
+
+ update_block_group(trans, root, bytenr, num_bytes, 0, mark_free);
+ }
+fail:
+ btrfs_free_path(path);
+ finish_current_insert(trans, extent_root);
+ return ret;
+}
+
+/*
+ * find all the blocks marked as pending in the radix tree and remove
+ * them from the extent map
+ */
+static int del_pending_extents(struct btrfs_trans_handle *trans, struct
+ btrfs_root *extent_root)
+{
+ int ret;
+ int err = 0;
+ u64 start;
+ u64 end;
+ u64 priv;
+ struct extent_io_tree *pending_del;
+ struct extent_io_tree *extent_ins;
+ struct pending_extent_op *extent_op;
+
+ extent_ins = &extent_root->fs_info->extent_ins;
+ pending_del = &extent_root->fs_info->pending_del;
+
+ while(1) {
+ ret = find_first_extent_bit(pending_del, 0, &start, &end,
+ EXTENT_LOCKED);
+ if (ret)
+ break;
+
+ ret = get_state_private(pending_del, start, &priv);
+ BUG_ON(ret);
+ extent_op = (struct pending_extent_op *)(unsigned long)priv;
+
+ clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
+ GFP_NOFS);
+
+ if (!test_range_bit(extent_ins, start, end,
+ EXTENT_LOCKED, 0)) {
+ ret = __free_extent(trans, extent_root,
+ start, end + 1 - start, 0,
+ extent_root->root_key.objectid,
+ extent_op->level, 0, 1);
+ kfree(extent_op);
+ } else {
+ kfree(extent_op);
+ ret = get_state_private(extent_ins, start, &priv);
+ BUG_ON(ret);
+ extent_op = (struct pending_extent_op *)
+ (unsigned long)priv;
+
+ clear_extent_bits(extent_ins, start, end,
+ EXTENT_LOCKED, GFP_NOFS);
+
+ if (extent_op->type == PENDING_BACKREF_UPDATE)
+ BUG_ON(1);
+
+ kfree(extent_op);
+ }
+ if (ret)
+ err = ret;
+ }
+ return err;
+}
+
+/*
+ * remove an extent from the root, returns 0 on success
+ */
+
+int btrfs_free_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 owner, u64 offset)
+{
+ struct btrfs_root *extent_root = root->fs_info->extent_root;
+ int pending_ret;
+ int ret;
+
+ WARN_ON(num_bytes < root->sectorsize);
+ if (root == extent_root) {
+ struct pending_extent_op *extent_op;
+
+ extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
+ BUG_ON(!extent_op);
+
+ extent_op->type = PENDING_EXTENT_DELETE;
+ extent_op->bytenr = bytenr;
+ extent_op->num_bytes = num_bytes;
+ extent_op->level = (int)owner;
+
+ set_extent_bits(&root->fs_info->pending_del,
+ bytenr, bytenr + num_bytes - 1,
+ EXTENT_LOCKED, GFP_NOFS);
+ set_state_private(&root->fs_info->pending_del,
+ bytenr, (unsigned long)extent_op);
+ return 0;
+ }
+ ret = __free_extent(trans, root, bytenr, num_bytes, parent,
+ root_objectid, owner, offset, 1);
+ pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
+ return ret ? ret : pending_ret;
+}
+
+static u64 stripe_align(struct btrfs_root *root, u64 val)
+{
+ u64 mask = ((u64)root->stripesize - 1);
+ u64 ret = (val + mask) & ~mask;
+ return ret;
+}
+
+/*
+ * walks the btree of allocated extents and find a hole of a given size.
+ * The key ins is changed to record the hole:
+ * ins->objectid == block start
+ * ins->flags = BTRFS_EXTENT_ITEM_KEY
+ * ins->offset == number of blocks
+ * Any available blocks before search_start are skipped.
+ */
+static int noinline find_free_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *orig_root,
+ u64 num_bytes, u64 empty_size,
+ u64 search_start, u64 search_end,
+ u64 hint_byte, struct btrfs_key *ins,
+ u64 exclude_start, u64 exclude_nr,
+ int data)
+{
+ int ret;
+ u64 orig_search_start = search_start;
+ struct btrfs_root * root = orig_root->fs_info->extent_root;
+ struct btrfs_fs_info *info = root->fs_info;
+ u64 total_needed = num_bytes;
+ struct btrfs_block_group_cache *block_group;
+ int full_scan = 0;
+ int wrapped = 0;
+
+ WARN_ON(num_bytes < root->sectorsize);
+ btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
+
+ search_start = stripe_align(root, search_start);
+
+ if (hint_byte) {
+ block_group = btrfs_lookup_first_block_group(info, hint_byte);
+ if (!block_group)
+ hint_byte = search_start;
+ block_group = btrfs_find_block_group(root, block_group,
+ hint_byte, data, 1);
+ } else {
+ block_group = btrfs_find_block_group(root,
+ trans->block_group,
+ search_start, data, 1);
+ }
+
+ total_needed += empty_size;
+
+check_failed:
+ search_start = stripe_align(root, search_start);
+ if (!block_group) {
+ block_group = btrfs_lookup_first_block_group(info,
+ search_start);
+ if (!block_group)
+ block_group = btrfs_lookup_first_block_group(info,
+ orig_search_start);
+ }
+ ret = find_search_start(root, &block_group, &search_start,
+ total_needed, data);
+ if (ret)
+ goto new_group;
+
+ ins->objectid = search_start;
+ ins->offset = num_bytes;
+
+ if (ins->objectid + num_bytes >
+ block_group->key.objectid + block_group->key.offset) {
+ search_start = block_group->key.objectid +
+ block_group->key.offset;
+ goto new_group;
+ }
+
+ if (test_range_bit(&info->extent_ins, ins->objectid,
+ ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
+ search_start = ins->objectid + num_bytes;
+ goto new_group;
+ }
+
+ if (test_range_bit(&info->pinned_extents, ins->objectid,
+ ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
+ search_start = ins->objectid + num_bytes;
+ goto new_group;
+ }
+
+ if (info->excluded_extents &&
+ test_range_bit(info->excluded_extents, ins->objectid,
+ ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
+ search_start = ins->objectid + num_bytes;
+ goto new_group;
+ }
+
+ if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
+ ins->objectid < exclude_start + exclude_nr)) {
+ search_start = exclude_start + exclude_nr;
+ goto new_group;
+ }
+
+ if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
+ if (check_crossing_stripes(ins->objectid, num_bytes)) {
+ search_start = round_down(ins->objectid + num_bytes,
+ BTRFS_STRIPE_LEN);
+ goto new_group;
+ }
+ block_group = btrfs_lookup_block_group(info, ins->objectid);
+ if (block_group)
+ trans->block_group = block_group;
+ }
+ ins->offset = num_bytes;
+ return 0;
+
+new_group:
+ block_group = btrfs_lookup_first_block_group(info, search_start);
+ if (!block_group) {
+ search_start = orig_search_start;
+ if (full_scan) {
+ ret = -ENOSPC;
+ goto error;
+ }
+ if (wrapped) {
+ if (!full_scan)
+ total_needed -= empty_size;
+ full_scan = 1;
+ } else
+ wrapped = 1;
+ }
+ cond_resched();
+ block_group = btrfs_find_block_group(root, block_group,
+ search_start, data, 0);
+ goto check_failed;
+
+error:
+ return ret;
+}
+
+int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 empty_size,
+ u64 hint_byte, u64 search_end,
+ struct btrfs_key *ins, int data)
+{
+ int ret;
+ u64 search_start = 0;
+ u64 alloc_profile;
+ struct btrfs_fs_info *info = root->fs_info;
+
+ if (info->extent_ops) {
+ struct btrfs_extent_ops *ops = info->extent_ops;
+ ret = ops->alloc_extent(root, num_bytes, hint_byte, ins, !data);
+ BUG_ON(ret);
+ goto found;
+ }
+
+ if (data) {
+ alloc_profile = info->avail_data_alloc_bits &
+ info->data_alloc_profile;
+ data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
+ } else if ((info->system_allocs > 0 || root == info->chunk_root) &&
+ info->system_allocs >= 0) {
+ alloc_profile = info->avail_system_alloc_bits &
+ info->system_alloc_profile;
+ data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
+ } else {
+ alloc_profile = info->avail_metadata_alloc_bits &
+ info->metadata_alloc_profile;
+ data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
+ }
+
+ if (root->ref_cows) {
+ if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
+ ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+ num_bytes,
+ BTRFS_BLOCK_GROUP_METADATA);
+ BUG_ON(ret);
+ }
+ ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+ num_bytes + 2 * 1024 * 1024, data);
+ BUG_ON(ret);
+ }
+
+ WARN_ON(num_bytes < root->sectorsize);
+ ret = find_free_extent(trans, root, num_bytes, empty_size,
+ search_start, search_end, hint_byte, ins,
+ trans->alloc_exclude_start,
+ trans->alloc_exclude_nr, data);
+ BUG_ON(ret);
+found:
+ clear_extent_dirty(&root->fs_info->free_space_cache,
+ ins->objectid, ins->objectid + ins->offset - 1,
+ GFP_NOFS);
+ return ret;
+}
+
+static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 root_objectid, u64 generation,
+ u64 flags, struct btrfs_disk_key *key,
+ int level, struct btrfs_key *ins)
+{
+ int ret;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_extent_item *extent_item;
+ struct btrfs_tree_block_info *block_info;
+ struct btrfs_extent_inline_ref *iref;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ u32 size = sizeof(*extent_item) + sizeof(*iref);
+ int skinny_metadata =
+ btrfs_fs_incompat(fs_info,
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
+
+ if (!skinny_metadata)
+ size += sizeof(*block_info);
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+
+ ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
+ ins, size);
+ BUG_ON(ret);
+
+ leaf = path->nodes[0];
+ extent_item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_item);
+ btrfs_set_extent_refs(leaf, extent_item, 1);
+ btrfs_set_extent_generation(leaf, extent_item, generation);
+ btrfs_set_extent_flags(leaf, extent_item,
+ flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
+
+ if (skinny_metadata) {
+ iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
+ } else {
+ block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
+ btrfs_set_tree_block_key(leaf, block_info, key);
+ btrfs_set_tree_block_level(leaf, block_info, level);
+ iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
+ }
+
+ btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY);
+ btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
+
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_free_path(path);
+
+ ret = update_block_group(trans, root, ins->objectid, root->leafsize,
+ 1, 0);
+ return ret;
+}
+
+static int alloc_tree_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 num_bytes,
+ u64 root_objectid, u64 generation,
+ u64 flags, struct btrfs_disk_key *key,
+ int level, u64 empty_size, u64 hint_byte,
+ u64 search_end, struct btrfs_key *ins)
+{
+ int ret;
+ ret = btrfs_reserve_extent(trans, root, num_bytes, empty_size,
+ hint_byte, search_end, ins, 0);
+ BUG_ON(ret);
+
+ if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) {
+ struct pending_extent_op *extent_op;
+
+ extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
+ BUG_ON(!extent_op);
+
+ extent_op->type = PENDING_EXTENT_INSERT;
+ extent_op->bytenr = ins->objectid;
+ extent_op->num_bytes = ins->offset;
+ extent_op->level = level;
+ extent_op->flags = flags;
+ memcpy(&extent_op->key, key, sizeof(*key));
+
+ set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
+ ins->objectid + ins->offset - 1,
+ EXTENT_LOCKED, GFP_NOFS);
+ set_state_private(&root->fs_info->extent_ins,
+ ins->objectid, (unsigned long)extent_op);
+ } else {
+ if (btrfs_fs_incompat(root->fs_info,
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) {
+ ins->offset = level;
+ ins->type = BTRFS_METADATA_ITEM_KEY;
+ }
+ ret = alloc_reserved_tree_block(trans, root, root_objectid,
+ generation, flags,
+ key, level, ins);
+ finish_current_insert(trans, root->fs_info->extent_root);
+ del_pending_extents(trans, root->fs_info->extent_root);
+ }
+ return ret;
+}
+
+/*
+ * helper function to allocate a block for a given tree
+ * returns the tree buffer or NULL.
+ */
+struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u32 blocksize, u64 root_objectid,
+ struct btrfs_disk_key *key, int level,
+ u64 hint, u64 empty_size)
+{
+ struct btrfs_key ins;
+ int ret;
+ struct extent_buffer *buf;
+
+ ret = alloc_tree_block(trans, root, blocksize, root_objectid,
+ trans->transid, 0, key, level,
+ empty_size, hint, (u64)-1, &ins);
+ if (ret) {
+ BUG_ON(ret > 0);
+ return ERR_PTR(ret);
+ }
+
+ buf = btrfs_find_create_tree_block(root->fs_info, ins.objectid,
+ blocksize);
+ if (!buf) {
+ btrfs_free_extent(trans, root, ins.objectid, ins.offset,
+ 0, root->root_key.objectid, level, 0);
+ BUG_ON(1);
+ return ERR_PTR(-ENOMEM);
+ }
+ btrfs_set_buffer_uptodate(buf);
+ trans->blocks_used++;
+
+ return buf;
+}
+
+#if 0
+
+static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *leaf)
+{
+ u64 leaf_owner;
+ u64 leaf_generation;
+ struct btrfs_key key;
+ struct btrfs_file_extent_item *fi;
+ int i;
+ int nritems;
+ int ret;
+
+ BUG_ON(!btrfs_is_leaf(leaf));
+ nritems = btrfs_header_nritems(leaf);
+ leaf_owner = btrfs_header_owner(leaf);
+ leaf_generation = btrfs_header_generation(leaf);
+
+ for (i = 0; i < nritems; i++) {
+ u64 disk_bytenr;
+
+ btrfs_item_key_to_cpu(leaf, &key, i);
+ if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
+ continue;
+ fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(leaf, fi) ==
+ BTRFS_FILE_EXTENT_INLINE)
+ continue;
+ /*
+ * FIXME make sure to insert a trans record that
+ * repeats the snapshot del on crash
+ */
+ disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ if (disk_bytenr == 0)
+ continue;
+ ret = btrfs_free_extent(trans, root, disk_bytenr,
+ btrfs_file_extent_disk_num_bytes(leaf, fi),
+ leaf->start, leaf_owner, leaf_generation,
+ key.objectid, 0);
+ BUG_ON(ret);
+ }
+ return 0;
+}
+
+static void noinline reada_walk_down(struct btrfs_root *root,
+ struct extent_buffer *node,
+ int slot)
+{
+ u64 bytenr;
+ u64 last = 0;
+ u32 nritems;
+ u32 refs;
+ u32 blocksize;
+ int ret;
+ int i;
+ int level;
+ int skipped = 0;
+
+ nritems = btrfs_header_nritems(node);
+ level = btrfs_header_level(node);
+ if (level)
+ return;
+
+ for (i = slot; i < nritems && skipped < 32; i++) {
+ bytenr = btrfs_node_blockptr(node, i);
+ if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
+ (last > bytenr && last - bytenr > 32 * 1024))) {
+ skipped++;
+ continue;
+ }
+ blocksize = btrfs_level_size(root, level - 1);
+ if (i != slot) {
+ ret = btrfs_lookup_extent_ref(NULL, root, bytenr,
+ blocksize, &refs);
+ BUG_ON(ret);
+ if (refs != 1) {
+ skipped++;
+ continue;
+ }
+ }
+ mutex_unlock(&root->fs_info->fs_mutex);
+ ret = readahead_tree_block(root, bytenr, blocksize,
+ btrfs_node_ptr_generation(node, i));
+ last = bytenr + blocksize;
+ cond_resched();
+ mutex_lock(&root->fs_info->fs_mutex);
+ if (ret)
+ break;
+ }
+}
+
+/*
+ * helper function for drop_snapshot, this walks down the tree dropping ref
+ * counts as it goes.
+ */
+static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, int *level)
+{
+ u64 root_owner;
+ u64 root_gen;
+ u64 bytenr;
+ u64 ptr_gen;
+ struct extent_buffer *next;
+ struct extent_buffer *cur;
+ struct extent_buffer *parent;
+ u32 blocksize;
+ int ret;
+ u32 refs;
+
+ WARN_ON(*level < 0);
+ WARN_ON(*level >= BTRFS_MAX_LEVEL);
+ ret = btrfs_lookup_extent_ref(trans, root,
+ path->nodes[*level]->start,
+ path->nodes[*level]->len, &refs);
+ BUG_ON(ret);
+ if (refs > 1)
+ goto out;
+
+ /*
+ * walk down to the last node level and free all the leaves
+ */
+ while(*level >= 0) {
+ WARN_ON(*level < 0);
+ WARN_ON(*level >= BTRFS_MAX_LEVEL);
+ cur = path->nodes[*level];
+
+ if (btrfs_header_level(cur) != *level)
+ WARN_ON(1);
+
+ if (path->slots[*level] >=
+ btrfs_header_nritems(cur))
+ break;
+ if (*level == 0) {
+ ret = drop_leaf_ref(trans, root, cur);
+ BUG_ON(ret);
+ break;
+ }
+ bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
+ ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
+ blocksize = btrfs_level_size(root, *level - 1);
+ ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
+ &refs);
+ BUG_ON(ret);
+ if (refs != 1) {
+ parent = path->nodes[*level];
+ root_owner = btrfs_header_owner(parent);
+ root_gen = btrfs_header_generation(parent);
+ path->slots[*level]++;
+ ret = btrfs_free_extent(trans, root, bytenr, blocksize,
+ parent->start, root_owner,
+ root_gen, *level - 1, 1);
+ BUG_ON(ret);
+ continue;
+ }
+ next = btrfs_find_tree_block(root, bytenr, blocksize);
+ if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
+ free_extent_buffer(next);
+ reada_walk_down(root, cur, path->slots[*level]);
+ mutex_unlock(&root->fs_info->fs_mutex);
+ next = read_tree_block(root, bytenr, blocksize,
+ ptr_gen);
+ mutex_lock(&root->fs_info->fs_mutex);
+ if (!extent_buffer_uptodate(next)) {
+ if (IS_ERR(next))
+ ret = PTR_ERR(next);
+ else
+ ret = -EIO;
+ break;
+ }
+ }
+ WARN_ON(*level <= 0);
+ if (path->nodes[*level-1])
+ free_extent_buffer(path->nodes[*level-1]);
+ path->nodes[*level-1] = next;
+ *level = btrfs_header_level(next);
+ path->slots[*level] = 0;
+ }
+out:
+ WARN_ON(*level < 0);
+ WARN_ON(*level >= BTRFS_MAX_LEVEL);
+
+ if (path->nodes[*level] == root->node) {
+ root_owner = root->root_key.objectid;
+ parent = path->nodes[*level];
+ } else {
+ parent = path->nodes[*level + 1];
+ root_owner = btrfs_header_owner(parent);
+ }
+
+ root_gen = btrfs_header_generation(parent);
+ ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
+ path->nodes[*level]->len, parent->start,
+ root_owner, root_gen, *level, 1);
+ free_extent_buffer(path->nodes[*level]);
+ path->nodes[*level] = NULL;
+ *level += 1;
+ BUG_ON(ret);
+ return 0;
+}
+
+/*
+ * helper for dropping snapshots. This walks back up the tree in the path
+ * to find the first node higher up where we haven't yet gone through
+ * all the slots
+ */
+static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, int *level)
+{
+ u64 root_owner;
+ u64 root_gen;
+ struct btrfs_root_item *root_item = &root->root_item;
+ int i;
+ int slot;
+ int ret;
+
+ for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
+ slot = path->slots[i];
+ if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
+ struct extent_buffer *node;
+ struct btrfs_disk_key disk_key;
+ node = path->nodes[i];
+ path->slots[i]++;
+ *level = i;
+ WARN_ON(*level == 0);
+ btrfs_node_key(node, &disk_key, path->slots[i]);
+ memcpy(&root_item->drop_progress,
+ &disk_key, sizeof(disk_key));
+ root_item->drop_level = i;
+ return 0;
+ } else {
+ struct extent_buffer *parent;
+ if (path->nodes[*level] == root->node)
+ parent = path->nodes[*level];
+ else
+ parent = path->nodes[*level + 1];
+
+ root_owner = btrfs_header_owner(parent);
+ root_gen = btrfs_header_generation(parent);
+ ret = btrfs_free_extent(trans, root,
+ path->nodes[*level]->start,
+ path->nodes[*level]->len,
+ parent->start, root_owner,
+ root_gen, *level, 1);
+ BUG_ON(ret);
+ free_extent_buffer(path->nodes[*level]);
+ path->nodes[*level] = NULL;
+ *level = i + 1;
+ }
+ }
+ return 1;
+}
+
+#endif
+
+int btrfs_free_block_groups(struct btrfs_fs_info *info)
+{
+ struct btrfs_space_info *sinfo;
+ struct btrfs_block_group_cache *cache;
+ u64 start;
+ u64 end;
+ u64 ptr;
+ int ret;
+
+ while(1) {
+ ret = find_first_extent_bit(&info->block_group_cache, 0,
+ &start, &end, (unsigned int)-1);
+ if (ret)
+ break;
+ ret = get_state_private(&info->block_group_cache, start, &ptr);
+ if (!ret) {
+ cache = u64_to_ptr(ptr);
+ if (cache->free_space_ctl) {
+ btrfs_remove_free_space_cache(cache);
+ kfree(cache->free_space_ctl);
+ }
+ kfree(cache);
+ }
+ clear_extent_bits(&info->block_group_cache, start,
+ end, (unsigned int)-1, GFP_NOFS);
+ }
+ while(1) {
+ ret = find_first_extent_bit(&info->free_space_cache, 0,
+ &start, &end, EXTENT_DIRTY);
+ if (ret)
+ break;
+ clear_extent_dirty(&info->free_space_cache, start,
+ end, GFP_NOFS);
+ }
+
+ while (!list_empty(&info->space_info)) {
+ sinfo = list_entry(info->space_info.next,
+ struct btrfs_space_info, list);
+ list_del_init(&sinfo->list);
+ kfree(sinfo);
+ }
+ return 0;
+}
+
+static int find_first_block_group(struct btrfs_root *root,
+ struct btrfs_path *path, struct btrfs_key *key)
+{
+ int ret;
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
+ int slot;
+
+ ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+ while(1) {
+ slot = path->slots[0];
+ leaf = path->nodes[0];
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret == 0)
+ continue;
+ if (ret < 0)
+ goto error;
+ break;
+ }
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+
+ if (found_key.objectid >= key->objectid &&
+ found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
+ return 0;
+ path->slots[0]++;
+ }
+ ret = -ENOENT;
+error:
+ return ret;
+}
+
+static void account_super_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *cache)
+{
+ u64 bytenr;
+ u64 *logical;
+ int stripe_len;
+ int i, nr, ret;
+
+ if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
+ stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
+ cache->bytes_super += stripe_len;
+ }
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ ret = btrfs_rmap_block(&fs_info->mapping_tree,
+ cache->key.objectid, bytenr,
+ 0, &logical, &nr, &stripe_len);
+ if (ret)
+ return;
+
+ while (nr--) {
+ u64 start, len;
+
+ if (logical[nr] > cache->key.objectid +
+ cache->key.offset)
+ continue;
+
+ if (logical[nr] + stripe_len <= cache->key.objectid)
+ continue;
+
+ start = logical[nr];
+ if (start < cache->key.objectid) {
+ start = cache->key.objectid;
+ len = (logical[nr] + stripe_len) - start;
+ } else {
+ len = min_t(u64, stripe_len,
+ cache->key.objectid +
+ cache->key.offset - start);
+ }
+
+ cache->bytes_super += len;
+ }
+
+ kfree(logical);
+ }
+}
+
+int btrfs_read_block_groups(struct btrfs_root *root)
+{
+ struct btrfs_path *path;
+ int ret;
+ int bit;
+ struct btrfs_block_group_cache *cache;
+ struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_space_info *space_info;
+ struct extent_io_tree *block_group_cache;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
+
+ block_group_cache = &info->block_group_cache;
+
+ root = info->extent_root;
+ key.objectid = 0;
+ key.offset = 0;
+ btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ while(1) {
+ ret = find_first_block_group(root, path, &key);
+ if (ret > 0) {
+ ret = 0;
+ goto error;
+ }
+ if (ret != 0) {
+ goto error;
+ }
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ cache = kzalloc(sizeof(*cache), GFP_NOFS);
+ if (!cache) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ read_extent_buffer(leaf, &cache->item,
+ btrfs_item_ptr_offset(leaf, path->slots[0]),
+ sizeof(cache->item));
+ memcpy(&cache->key, &found_key, sizeof(found_key));
+ cache->cached = 0;
+ cache->pinned = 0;
+ key.objectid = found_key.objectid + found_key.offset;
+ btrfs_release_path(path);
+ cache->flags = btrfs_block_group_flags(&cache->item);
+ bit = 0;
+ if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
+ bit = BLOCK_GROUP_DATA;
+ } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
+ bit = BLOCK_GROUP_SYSTEM;
+ } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
+ bit = BLOCK_GROUP_METADATA;
+ }
+ set_avail_alloc_bits(info, cache->flags);
+ if (btrfs_chunk_readonly(root, cache->key.objectid))
+ cache->ro = 1;
+
+ account_super_bytes(info, cache);
+
+ ret = update_space_info(info, cache->flags, found_key.offset,
+ btrfs_block_group_used(&cache->item),
+ &space_info);
+ BUG_ON(ret);
+ cache->space_info = space_info;
+
+ /* use EXTENT_LOCKED to prevent merging */
+ set_extent_bits(block_group_cache, found_key.objectid,
+ found_key.objectid + found_key.offset - 1,
+ bit | EXTENT_LOCKED, GFP_NOFS);
+ set_state_private(block_group_cache, found_key.objectid,
+ (unsigned long)cache);
+ }
+ ret = 0;
+error:
+ btrfs_free_path(path);
+ return ret;
+}
+
+struct btrfs_block_group_cache *
+btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type,
+ u64 chunk_objectid, u64 chunk_offset, u64 size)
+{
+ int ret;
+ int bit = 0;
+ struct btrfs_block_group_cache *cache;
+ struct extent_io_tree *block_group_cache;
+
+ block_group_cache = &fs_info->block_group_cache;
+
+ cache = kzalloc(sizeof(*cache), GFP_NOFS);
+ BUG_ON(!cache);
+ cache->key.objectid = chunk_offset;
+ cache->key.offset = size;
+
+ btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
+ btrfs_set_block_group_used(&cache->item, bytes_used);
+ btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
+ cache->flags = type;
+ btrfs_set_block_group_flags(&cache->item, type);
+
+ account_super_bytes(fs_info, cache);
+ ret = update_space_info(fs_info, cache->flags, size, bytes_used,
+ &cache->space_info);
+ BUG_ON(ret);
+
+ bit = block_group_state_bits(type);
+ ret = set_extent_bits(block_group_cache, chunk_offset,
+ chunk_offset + size - 1,
+ bit | EXTENT_LOCKED, GFP_NOFS);
+ BUG_ON(ret);
+
+ ret = set_state_private(block_group_cache, chunk_offset,
+ (unsigned long)cache);
+ BUG_ON(ret);
+ set_avail_alloc_bits(fs_info, type);
+
+ return cache;
+}
+
+int btrfs_make_block_group(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytes_used,
+ u64 type, u64 chunk_objectid, u64 chunk_offset,
+ u64 size)
+{
+ int ret;
+ struct btrfs_root *extent_root;
+ struct btrfs_block_group_cache *cache;
+
+ cache = btrfs_add_block_group(root->fs_info, bytes_used, type,
+ chunk_objectid, chunk_offset, size);
+ extent_root = root->fs_info->extent_root;
+ ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
+ sizeof(cache->item));
+ BUG_ON(ret);
+
+ ret = finish_current_insert(trans, extent_root);
+ BUG_ON(ret);
+ ret = del_pending_extents(trans, extent_root);
+ BUG_ON(ret);
+
+ return 0;
+}
+
+/*
+ * This is for converter use only.
+ *
+ * In that case, we don't know where are free blocks located.
+ * Therefore all block group cache entries must be setup properly
+ * before doing any block allocation.
+ */
+int btrfs_make_block_groups(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ u64 total_bytes;
+ u64 cur_start;
+ u64 group_type;
+ u64 group_size;
+ u64 group_align;
+ u64 total_data = 0;
+ u64 total_metadata = 0;
+ u64 chunk_objectid;
+ int ret;
+ int bit;
+ struct btrfs_root *extent_root;
+ struct btrfs_block_group_cache *cache;
+ struct extent_io_tree *block_group_cache;
+
+ extent_root = root->fs_info->extent_root;
+ block_group_cache = &root->fs_info->block_group_cache;
+ chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+ total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
+ group_align = 64 * root->sectorsize;
+
+ cur_start = 0;
+ while (cur_start < total_bytes) {
+ group_size = total_bytes / 12;
+ group_size = min_t(u64, group_size, total_bytes - cur_start);
+ if (cur_start == 0) {
+ bit = BLOCK_GROUP_SYSTEM;
+ group_type = BTRFS_BLOCK_GROUP_SYSTEM;
+ group_size /= 4;
+ group_size &= ~(group_align - 1);
+ group_size = max_t(u64, group_size, 8 * 1024 * 1024);
+ group_size = min_t(u64, group_size, 32 * 1024 * 1024);
+ } else {
+ group_size &= ~(group_align - 1);
+ if (total_data >= total_metadata * 2) {
+ group_type = BTRFS_BLOCK_GROUP_METADATA;
+ group_size = min_t(u64, group_size,
+ 1ULL * 1024 * 1024 * 1024);
+ total_metadata += group_size;
+ } else {
+ group_type = BTRFS_BLOCK_GROUP_DATA;
+ group_size = min_t(u64, group_size,
+ 5ULL * 1024 * 1024 * 1024);
+ total_data += group_size;
+ }
+ if ((total_bytes - cur_start) * 4 < group_size * 5)
+ group_size = total_bytes - cur_start;
+ }
+
+ cache = kzalloc(sizeof(*cache), GFP_NOFS);
+ BUG_ON(!cache);
+
+ cache->key.objectid = cur_start;
+ cache->key.offset = group_size;
+ btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
+
+ btrfs_set_block_group_used(&cache->item, 0);
+ btrfs_set_block_group_chunk_objectid(&cache->item,
+ chunk_objectid);
+ btrfs_set_block_group_flags(&cache->item, group_type);
+
+ cache->flags = group_type;
+
+ ret = update_space_info(root->fs_info, group_type, group_size,
+ 0, &cache->space_info);
+ BUG_ON(ret);
+ set_avail_alloc_bits(extent_root->fs_info, group_type);
+
+ set_extent_bits(block_group_cache, cur_start,
+ cur_start + group_size - 1,
+ bit | EXTENT_LOCKED, GFP_NOFS);
+ set_state_private(block_group_cache, cur_start,
+ (unsigned long)cache);
+ cur_start += group_size;
+ }
+ /* then insert all the items */
+ cur_start = 0;
+ while(cur_start < total_bytes) {
+ cache = btrfs_lookup_block_group(root->fs_info, cur_start);
+ BUG_ON(!cache);
+
+ ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
+ sizeof(cache->item));
+ BUG_ON(ret);
+
+ finish_current_insert(trans, extent_root);
+ ret = del_pending_extents(trans, extent_root);
+ BUG_ON(ret);
+
+ cur_start = cache->key.objectid + cache->key.offset;
+ }
+ return 0;
+}
+
+int btrfs_update_block_group(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, int alloc,
+ int mark_free)
+{
+ return update_block_group(trans, root, bytenr, num_bytes,
+ alloc, mark_free);
+}
+
+/*
+ * Just remove a block group item in extent tree
+ * Caller should ensure the block group is empty and all space is pinned.
+ * Or new tree block/data may be allocated into it.
+ */
+static int free_block_group_item(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ u64 bytenr, u64 len)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_root *root = fs_info->extent_root;
+ int ret = 0;
+
+ key.objectid = bytenr;
+ key.offset = len;
+ key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+ if (ret < 0)
+ goto out;
+
+ ret = btrfs_del_item(trans, root, path);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int free_dev_extent_item(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ u64 devid, u64 dev_offset)
+{
+ struct btrfs_root *root = fs_info->dev_root;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = devid;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+ key.offset = dev_offset;
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ ret = btrfs_del_item(trans, root, path);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int free_chunk_dev_extent_items(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ u64 chunk_offset)
+{
+ struct btrfs_chunk *chunk = NULL;
+ struct btrfs_root *root= fs_info->chunk_root;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ u16 num_stripes;
+ int i;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+ key.offset = chunk_offset;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+ chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_chunk);
+ num_stripes = btrfs_chunk_num_stripes(path->nodes[0], chunk);
+ for (i = 0; i < num_stripes; i++) {
+ ret = free_dev_extent_item(trans, fs_info,
+ btrfs_stripe_devid_nr(path->nodes[0], chunk, i),
+ btrfs_stripe_offset_nr(path->nodes[0], chunk, i));
+ if (ret < 0)
+ goto out;
+ }
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int free_system_chunk_item(struct btrfs_super_block *super,
+ struct btrfs_key *key)
+{
+ struct btrfs_disk_key *disk_key;
+ struct btrfs_key cpu_key;
+ u32 array_size = btrfs_super_sys_array_size(super);
+ char *ptr = (char *)super->sys_chunk_array;
+ int cur = 0;
+ int ret = -ENOENT;
+
+ while (cur < btrfs_super_sys_array_size(super)) {
+ struct btrfs_chunk *chunk;
+ u32 num_stripes;
+ u32 chunk_len;
+
+ disk_key = (struct btrfs_disk_key *)(ptr + cur);
+ btrfs_disk_key_to_cpu(&cpu_key, disk_key);
+ if (cpu_key.type != BTRFS_CHUNK_ITEM_KEY) {
+ /* just in case */
+ ret = -EIO;
+ goto out;
+ }
+
+ chunk = (struct btrfs_chunk *)(ptr + cur + sizeof(*disk_key));
+ num_stripes = btrfs_stack_chunk_num_stripes(chunk);
+ chunk_len = btrfs_chunk_item_size(num_stripes) +
+ sizeof(*disk_key);
+
+ if (key->objectid == cpu_key.objectid &&
+ key->offset == cpu_key.offset &&
+ key->type == cpu_key.type) {
+ memmove(ptr + cur, ptr + cur + chunk_len,
+ array_size - cur - chunk_len);
+ array_size -= chunk_len;
+ btrfs_set_super_sys_array_size(super, array_size);
+ ret = 0;
+ goto out;
+ }
+
+ cur += chunk_len;
+ }
+out:
+ return ret;
+}
+
+static int free_chunk_item(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ u64 bytenr, u64 len)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_root *root = fs_info->chunk_root;
+ struct btrfs_chunk *chunk;
+ u64 chunk_type;
+ int ret;
+
+ key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+ key.offset = bytenr;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+ if (ret < 0)
+ goto out;
+ chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_chunk);
+ chunk_type = btrfs_chunk_type(path->nodes[0], chunk);
+
+ ret = btrfs_del_item(trans, root, path);
+ if (ret < 0)
+ goto out;
+
+ if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
+ ret = free_system_chunk_item(fs_info->super_copy, &key);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static u64 get_dev_extent_len(struct map_lookup *map)
+{
+ int div;
+
+ switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ case 0: /* Single */
+ case BTRFS_BLOCK_GROUP_DUP:
+ case BTRFS_BLOCK_GROUP_RAID1:
+ div = 1;
+ break;
+ case BTRFS_BLOCK_GROUP_RAID5:
+ div = (map->num_stripes - 1);
+ break;
+ case BTRFS_BLOCK_GROUP_RAID6:
+ div = (map->num_stripes - 2);
+ break;
+ case BTRFS_BLOCK_GROUP_RAID10:
+ div = (map->num_stripes / map->sub_stripes);
+ break;
+ default:
+ /* normally, read chunk security hook should handled it */
+ BUG_ON(1);
+ }
+ return map->ce.size / div;
+}
+
+/* free block group/chunk related caches */
+static int free_block_group_cache(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ u64 bytenr, u64 len)
+{
+ struct btrfs_block_group_cache *cache;
+ struct cache_extent *ce;
+ struct map_lookup *map;
+ int ret;
+ int i;
+ u64 flags;
+
+ /* Free block group cache first */
+ cache = btrfs_lookup_block_group(fs_info, bytenr);
+ if (!cache)
+ return -ENOENT;
+ flags = cache->flags;
+ if (cache->free_space_ctl) {
+ btrfs_remove_free_space_cache(cache);
+ kfree(cache->free_space_ctl);
+ }
+ clear_extent_bits(&fs_info->block_group_cache, bytenr, bytenr + len,
+ (unsigned int)-1, GFP_NOFS);
+ ret = free_space_info(fs_info, flags, len, 0, NULL);
+ if (ret < 0)
+ goto out;
+ kfree(cache);
+
+ /* Then free mapping info and dev usage info */
+ ce = search_cache_extent(&fs_info->mapping_tree.cache_tree, bytenr);
+ if (!ce || ce->start != bytenr) {
+ ret = -ENOENT;
+ goto out;
+ }
+ map = container_of(ce, struct map_lookup, ce);
+ for (i = 0; i < map->num_stripes; i++) {
+ struct btrfs_device *device;
+
+ device = map->stripes[i].dev;
+ device->bytes_used -= get_dev_extent_len(map);
+ ret = btrfs_update_device(trans, device);
+ if (ret < 0)
+ goto out;
+ }
+ remove_cache_extent(&fs_info->mapping_tree.cache_tree, ce);
+ free(map);
+out:
+ return ret;
+}
+
+int btrfs_free_block_group(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr, u64 len)
+{
+ struct btrfs_root *extent_root = fs_info->extent_root;
+ struct btrfs_path *path;
+ struct btrfs_block_group_item *bgi;
+ struct btrfs_key key;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = bytenr;
+ key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ key.offset = len;
+
+ /* Double check the block group to ensure it's empty */
+ ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
+ if (ret > 0) {
+ ret = -ENONET;
+ goto out;
+ }
+ if (ret < 0)
+ goto out;
+
+ bgi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_block_group_item);
+ if (btrfs_disk_block_group_used(path->nodes[0], bgi)) {
+ fprintf(stderr,
+ "WARNING: block group [%llu,%llu) is not empty\n",
+ bytenr, bytenr + len);
+ ret = -EINVAL;
+ goto out;
+ }
+ btrfs_release_path(path);
+
+ /*
+ * Now pin all space in the block group, to prevent further transaction
+ * allocate space from it.
+ * Every operation needs a transaction must be in the range.
+ */
+ btrfs_pin_extent(fs_info, bytenr, len);
+
+ /* delete block group item and chunk item */
+ ret = free_block_group_item(trans, fs_info, bytenr, len);
+ if (ret < 0) {
+ fprintf(stderr,
+ "failed to free block group item for [%llu,%llu)\n",
+ bytenr, bytenr + len);
+ btrfs_unpin_extent(fs_info, bytenr, len);
+ goto out;
+ }
+
+ ret = free_chunk_dev_extent_items(trans, fs_info, bytenr);
+ if (ret < 0) {
+ fprintf(stderr,
+ "failed to dev extents belongs to [%llu,%llu)\n",
+ bytenr, bytenr + len);
+ btrfs_unpin_extent(fs_info, bytenr, len);
+ goto out;
+ }
+ ret = free_chunk_item(trans, fs_info, bytenr, len);
+ if (ret < 0) {
+ fprintf(stderr,
+ "failed to free chunk for [%llu,%llu)\n",
+ bytenr, bytenr + len);
+ btrfs_unpin_extent(fs_info, bytenr, len);
+ goto out;
+ }
+
+ /* Now release the block_group_cache */
+ ret = free_block_group_cache(trans, fs_info, bytenr, len);
+ btrfs_unpin_extent(fs_info, bytenr, len);
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * Fixup block accounting. The initial block accounting created by
+ * make_block_groups isn't accuracy in this case.
+ */
+int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ int ret;
+ int slot;
+ u64 start = 0;
+ u64 bytes_used = 0;
+ struct btrfs_path path;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct btrfs_block_group_cache *cache;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+
+ root = root->fs_info->extent_root;
+
+ while(extent_root_pending_ops(fs_info)) {
+ ret = finish_current_insert(trans, root);
+ if (ret)
+ return ret;
+ ret = del_pending_extents(trans, root);
+ if (ret)
+ return ret;
+ }
+
+ while(1) {
+ cache = btrfs_lookup_first_block_group(fs_info, start);
+ if (!cache)
+ break;
+ start = cache->key.objectid + cache->key.offset;
+ btrfs_set_block_group_used(&cache->item, 0);
+ cache->space_info->bytes_used = 0;
+ set_extent_bits(&root->fs_info->block_group_cache,
+ cache->key.objectid,
+ cache->key.objectid + cache->key.offset -1,
+ BLOCK_GROUP_DIRTY, GFP_NOFS);
+ }
+
+ btrfs_init_path(&path);
+ key.offset = 0;
+ key.objectid = 0;
+ btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+ ret = btrfs_search_slot(trans, root->fs_info->extent_root,
+ &key, &path, 0, 0);
+ if (ret < 0)
+ return ret;
+ while(1) {
+ leaf = path.nodes[0];
+ slot = path.slots[0];
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, &path);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ break;
+ leaf = path.nodes[0];
+ slot = path.slots[0];
+ }
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.type == BTRFS_EXTENT_ITEM_KEY) {
+ bytes_used += key.offset;
+ ret = btrfs_update_block_group(trans, root,
+ key.objectid, key.offset, 1, 0);
+ BUG_ON(ret);
+ } else if (key.type == BTRFS_METADATA_ITEM_KEY) {
+ bytes_used += root->leafsize;
+ ret = btrfs_update_block_group(trans, root,
+ key.objectid, root->leafsize, 1, 0);
+ BUG_ON(ret);
+ }
+ path.slots[0]++;
+ }
+ btrfs_set_super_bytes_used(root->fs_info->super_copy, bytes_used);
+ btrfs_release_path(&path);
+ return 0;
+}
+
+/*
+ * Record a file extent. Do all the required works, such as inserting
+ * file extent item, inserting extent item and backref item into extent
+ * tree and updating block accounting.
+ */
+int btrfs_record_file_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ struct btrfs_inode_item *inode,
+ u64 file_pos, u64 disk_bytenr,
+ u64 num_bytes)
+{
+ int ret;
+ struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_root *extent_root = info->extent_root;
+ struct extent_buffer *leaf;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_key ins_key;
+ struct btrfs_path path;
+ struct btrfs_extent_item *ei;
+ u64 nbytes;
+
+ if (disk_bytenr == 0) {
+ ret = btrfs_insert_file_extent(trans, root, objectid,
+ file_pos, disk_bytenr,
+ num_bytes, num_bytes);
+ return ret;
+ }
+
+ btrfs_init_path(&path);
+
+ ins_key.objectid = objectid;
+ ins_key.offset = file_pos;
+ btrfs_set_key_type(&ins_key, BTRFS_EXTENT_DATA_KEY);
+ ret = btrfs_insert_empty_item(trans, root, &path, &ins_key,
+ sizeof(*fi));
+ if (ret)
+ goto fail;
+ leaf = path.nodes[0];
+ fi = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_generation(leaf, fi, trans->transid);
+ btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
+ btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
+ btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
+ btrfs_set_file_extent_offset(leaf, fi, 0);
+ btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
+ btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
+ btrfs_set_file_extent_compression(leaf, fi, 0);
+ btrfs_set_file_extent_encryption(leaf, fi, 0);
+ btrfs_set_file_extent_other_encoding(leaf, fi, 0);
+ btrfs_mark_buffer_dirty(leaf);
+
+ nbytes = btrfs_stack_inode_nbytes(inode) + num_bytes;
+ btrfs_set_stack_inode_nbytes(inode, nbytes);
+
+ btrfs_release_path(&path);
+
+ ins_key.objectid = disk_bytenr;
+ ins_key.offset = num_bytes;
+ ins_key.type = BTRFS_EXTENT_ITEM_KEY;
+
+ ret = btrfs_insert_empty_item(trans, extent_root, &path,
+ &ins_key, sizeof(*ei));
+ if (ret == 0) {
+ leaf = path.nodes[0];
+ ei = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_extent_item);
+
+ btrfs_set_extent_refs(leaf, ei, 0);
+ btrfs_set_extent_generation(leaf, ei, 0);
+ btrfs_set_extent_flags(leaf, ei, BTRFS_EXTENT_FLAG_DATA);
+
+ btrfs_mark_buffer_dirty(leaf);
+
+ ret = btrfs_update_block_group(trans, root, disk_bytenr,
+ num_bytes, 1, 0);
+ if (ret)
+ goto fail;
+ } else if (ret != -EEXIST) {
+ goto fail;
+ }
+ btrfs_extent_post_op(trans, extent_root);
+
+ ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, 0,
+ root->root_key.objectid,
+ objectid, file_pos);
+ if (ret)
+ goto fail;
+ ret = 0;
+fail:
+ btrfs_release_path(&path);
+ return ret;
+}
+
+
+static int add_excluded_extent(struct btrfs_root *root,
+ u64 start, u64 num_bytes)
+{
+ u64 end = start + num_bytes - 1;
+ set_extent_bits(&root->fs_info->pinned_extents,
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
+ return 0;
+}
+
+void free_excluded_extents(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
+{
+ u64 start, end;
+
+ start = cache->key.objectid;
+ end = start + cache->key.offset - 1;
+
+ clear_extent_bits(&root->fs_info->pinned_extents,
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
+}
+
+int exclude_super_stripes(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
+{
+ u64 bytenr;
+ u64 *logical;
+ int stripe_len;
+ int i, nr, ret;
+
+ if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
+ stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
+ cache->bytes_super += stripe_len;
+ ret = add_excluded_extent(root, cache->key.objectid,
+ stripe_len);
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
+ cache->key.objectid, bytenr,
+ 0, &logical, &nr, &stripe_len);
+ if (ret)
+ return ret;
+
+ while (nr--) {
+ u64 start, len;
+
+ if (logical[nr] > cache->key.objectid +
+ cache->key.offset)
+ continue;
+
+ if (logical[nr] + stripe_len <= cache->key.objectid)
+ continue;
+
+ start = logical[nr];
+ if (start < cache->key.objectid) {
+ start = cache->key.objectid;
+ len = (logical[nr] + stripe_len) - start;
+ } else {
+ len = min_t(u64, stripe_len,
+ cache->key.objectid +
+ cache->key.offset - start);
+ }
+
+ cache->bytes_super += len;
+ ret = add_excluded_extent(root, start, len);
+ if (ret) {
+ kfree(logical);
+ return ret;
+ }
+ }
+
+ kfree(logical);
+ }
+ return 0;
+}
+
+u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_fs_info *info, u64 start, u64 end)
+{
+ u64 extent_start, extent_end, size, total_added = 0;
+ int ret;
+
+ while (start < end) {
+ ret = find_first_extent_bit(&info->pinned_extents, start,
+ &extent_start, &extent_end,
+ EXTENT_DIRTY | EXTENT_UPTODATE);
+ if (ret)
+ break;
+
+ if (extent_start <= start) {
+ start = extent_end + 1;
+ } else if (extent_start > start && extent_start < end) {
+ size = extent_start - start;
+ total_added += size;
+ ret = btrfs_add_free_space(block_group->free_space_ctl,
+ start, size);
+ BUG_ON(ret); /* -ENOMEM or logic error */
+ start = extent_end + 1;
+ } else {
+ break;
+ }
+ }
+
+ if (start < end) {
+ size = end - start;
+ total_added += size;
+ ret = btrfs_add_free_space(block_group->free_space_ctl, start,
+ size);
+ BUG_ON(ret); /* -ENOMEM or logic error */
+ }
+
+ return total_added;
+}
diff --git a/extent_io.c b/extent_io.c
new file mode 100644
index 00000000..88e92736
--- /dev/null
+++ b/extent_io.c
@@ -0,0 +1,892 @@
+
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "kerncompat.h"
+#include "extent_io.h"
+#include "list.h"
+#include "ctree.h"
+#include "volumes.h"
+
+void extent_io_tree_init(struct extent_io_tree *tree)
+{
+ cache_tree_init(&tree->state);
+ cache_tree_init(&tree->cache);
+ INIT_LIST_HEAD(&tree->lru);
+ tree->cache_size = 0;
+}
+
+static struct extent_state *alloc_extent_state(void)
+{
+ struct extent_state *state;
+
+ state = malloc(sizeof(*state));
+ if (!state)
+ return NULL;
+ state->cache_node.objectid = 0;
+ state->refs = 1;
+ state->state = 0;
+ state->xprivate = 0;
+ return state;
+}
+
+static void btrfs_free_extent_state(struct extent_state *state)
+{
+ state->refs--;
+ BUG_ON(state->refs < 0);
+ if (state->refs == 0)
+ free(state);
+}
+
+static void free_extent_state_func(struct cache_extent *cache)
+{
+ struct extent_state *es;
+
+ es = container_of(cache, struct extent_state, cache_node);
+ btrfs_free_extent_state(es);
+}
+
+void extent_io_tree_cleanup(struct extent_io_tree *tree)
+{
+ struct extent_buffer *eb;
+
+ while(!list_empty(&tree->lru)) {
+ eb = list_entry(tree->lru.next, struct extent_buffer, lru);
+ fprintf(stderr, "extent buffer leak: "
+ "start %llu len %u\n",
+ (unsigned long long)eb->start, eb->len);
+ free_extent_buffer(eb);
+ }
+
+ cache_tree_free_extents(&tree->state, free_extent_state_func);
+}
+
+static inline void update_extent_state(struct extent_state *state)
+{
+ state->cache_node.start = state->start;
+ state->cache_node.size = state->end + 1 - state->start;
+}
+
+/*
+ * Utility function to look for merge candidates inside a given range.
+ * Any extents with matching state are merged together into a single
+ * extent in the tree. Extents with EXTENT_IO in their state field are
+ * not merged
+ */
+static int merge_state(struct extent_io_tree *tree,
+ struct extent_state *state)
+{
+ struct extent_state *other;
+ struct cache_extent *other_node;
+
+ if (state->state & EXTENT_IOBITS)
+ return 0;
+
+ other_node = prev_cache_extent(&state->cache_node);
+ if (other_node) {
+ other = container_of(other_node, struct extent_state,
+ cache_node);
+ if (other->end == state->start - 1 &&
+ other->state == state->state) {
+ state->start = other->start;
+ update_extent_state(state);
+ remove_cache_extent(&tree->state, &other->cache_node);
+ btrfs_free_extent_state(other);
+ }
+ }
+ other_node = next_cache_extent(&state->cache_node);
+ if (other_node) {
+ other = container_of(other_node, struct extent_state,
+ cache_node);
+ if (other->start == state->end + 1 &&
+ other->state == state->state) {
+ other->start = state->start;
+ update_extent_state(other);
+ remove_cache_extent(&tree->state, &state->cache_node);
+ btrfs_free_extent_state(state);
+ }
+ }
+ return 0;
+}
+
+/*
+ * insert an extent_state struct into the tree. 'bits' are set on the
+ * struct before it is inserted.
+ */
+static int insert_state(struct extent_io_tree *tree,
+ struct extent_state *state, u64 start, u64 end,
+ int bits)
+{
+ int ret;
+
+ BUG_ON(end < start);
+ state->state |= bits;
+ state->start = start;
+ state->end = end;
+ update_extent_state(state);
+ ret = insert_cache_extent(&tree->state, &state->cache_node);
+ BUG_ON(ret);
+ merge_state(tree, state);
+ return 0;
+}
+
+/*
+ * split a given extent state struct in two, inserting the preallocated
+ * struct 'prealloc' as the newly created second half. 'split' indicates an
+ * offset inside 'orig' where it should be split.
+ */
+static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
+ struct extent_state *prealloc, u64 split)
+{
+ int ret;
+ prealloc->start = orig->start;
+ prealloc->end = split - 1;
+ prealloc->state = orig->state;
+ update_extent_state(prealloc);
+ orig->start = split;
+ update_extent_state(orig);
+ ret = insert_cache_extent(&tree->state, &prealloc->cache_node);
+ BUG_ON(ret);
+ return 0;
+}
+
+/*
+ * clear some bits on a range in the tree.
+ */
+static int clear_state_bit(struct extent_io_tree *tree,
+ struct extent_state *state, int bits)
+{
+ int ret = state->state & bits;
+
+ state->state &= ~bits;
+ if (state->state == 0) {
+ remove_cache_extent(&tree->state, &state->cache_node);
+ btrfs_free_extent_state(state);
+ } else {
+ merge_state(tree, state);
+ }
+ return ret;
+}
+
+/*
+ * clear some bits on a range in the tree.
+ */
+int clear_extent_bits(struct extent_io_tree *tree, u64 start,
+ u64 end, int bits, gfp_t mask)
+{
+ struct extent_state *state;
+ struct extent_state *prealloc = NULL;
+ struct cache_extent *node;
+ u64 last_end;
+ int err;
+ int set = 0;
+
+again:
+ if (!prealloc) {
+ prealloc = alloc_extent_state();
+ if (!prealloc)
+ return -ENOMEM;
+ }
+
+ /*
+ * this search will find the extents that end after
+ * our range starts
+ */
+ node = search_cache_extent(&tree->state, start);
+ if (!node)
+ goto out;
+ state = container_of(node, struct extent_state, cache_node);
+ if (state->start > end)
+ goto out;
+ last_end = state->end;
+
+ /*
+ * | ---- desired range ---- |
+ * | state | or
+ * | ------------- state -------------- |
+ *
+ * We need to split the extent we found, and may flip
+ * bits on second half.
+ *
+ * If the extent we found extends past our range, we
+ * just split and search again. It'll get split again
+ * the next time though.
+ *
+ * If the extent we found is inside our range, we clear
+ * the desired bit on it.
+ */
+ if (state->start < start) {
+ err = split_state(tree, state, prealloc, start);
+ BUG_ON(err == -EEXIST);
+ prealloc = NULL;
+ if (err)
+ goto out;
+ if (state->end <= end) {
+ set |= clear_state_bit(tree, state, bits);
+ if (last_end == (u64)-1)
+ goto out;
+ start = last_end + 1;
+ } else {
+ start = state->start;
+ }
+ goto search_again;
+ }
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ * We need to split the extent, and clear the bit
+ * on the first half
+ */
+ if (state->start <= end && state->end > end) {
+ err = split_state(tree, state, prealloc, end + 1);
+ BUG_ON(err == -EEXIST);
+
+ set |= clear_state_bit(tree, prealloc, bits);
+ prealloc = NULL;
+ goto out;
+ }
+
+ start = state->end + 1;
+ set |= clear_state_bit(tree, state, bits);
+ if (last_end == (u64)-1)
+ goto out;
+ start = last_end + 1;
+ goto search_again;
+out:
+ if (prealloc)
+ btrfs_free_extent_state(prealloc);
+ return set;
+
+search_again:
+ if (start > end)
+ goto out;
+ goto again;
+}
+
+/*
+ * set some bits on a range in the tree.
+ */
+int set_extent_bits(struct extent_io_tree *tree, u64 start,
+ u64 end, int bits, gfp_t mask)
+{
+ struct extent_state *state;
+ struct extent_state *prealloc = NULL;
+ struct cache_extent *node;
+ int err = 0;
+ u64 last_start;
+ u64 last_end;
+again:
+ if (!prealloc) {
+ prealloc = alloc_extent_state();
+ if (!prealloc)
+ return -ENOMEM;
+ }
+
+ /*
+ * this search will find the extents that end after
+ * our range starts
+ */
+ node = search_cache_extent(&tree->state, start);
+ if (!node) {
+ err = insert_state(tree, prealloc, start, end, bits);
+ BUG_ON(err == -EEXIST);
+ prealloc = NULL;
+ goto out;
+ }
+
+ state = container_of(node, struct extent_state, cache_node);
+ last_start = state->start;
+ last_end = state->end;
+
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ *
+ * Just lock what we found and keep going
+ */
+ if (state->start == start && state->end <= end) {
+ state->state |= bits;
+ merge_state(tree, state);
+ if (last_end == (u64)-1)
+ goto out;
+ start = last_end + 1;
+ goto search_again;
+ }
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ * or
+ * | ------------- state -------------- |
+ *
+ * We need to split the extent we found, and may flip bits on
+ * second half.
+ *
+ * If the extent we found extends past our
+ * range, we just split and search again. It'll get split
+ * again the next time though.
+ *
+ * If the extent we found is inside our range, we set the
+ * desired bit on it.
+ */
+ if (state->start < start) {
+ err = split_state(tree, state, prealloc, start);
+ BUG_ON(err == -EEXIST);
+ prealloc = NULL;
+ if (err)
+ goto out;
+ if (state->end <= end) {
+ state->state |= bits;
+ start = state->end + 1;
+ merge_state(tree, state);
+ if (last_end == (u64)-1)
+ goto out;
+ start = last_end + 1;
+ } else {
+ start = state->start;
+ }
+ goto search_again;
+ }
+ /*
+ * | ---- desired range ---- |
+ * | state | or | state |
+ *
+ * There's a hole, we need to insert something in it and
+ * ignore the extent we found.
+ */
+ if (state->start > start) {
+ u64 this_end;
+ if (end < last_start)
+ this_end = end;
+ else
+ this_end = last_start -1;
+ err = insert_state(tree, prealloc, start, this_end,
+ bits);
+ BUG_ON(err == -EEXIST);
+ prealloc = NULL;
+ if (err)
+ goto out;
+ start = this_end + 1;
+ goto search_again;
+ }
+ /*
+ * | ---- desired range ---- |
+ * | ---------- state ---------- |
+ * We need to split the extent, and set the bit
+ * on the first half
+ */
+ err = split_state(tree, state, prealloc, end + 1);
+ BUG_ON(err == -EEXIST);
+
+ state->state |= bits;
+ merge_state(tree, prealloc);
+ prealloc = NULL;
+out:
+ if (prealloc)
+ btrfs_free_extent_state(prealloc);
+ return err;
+search_again:
+ if (start > end)
+ goto out;
+ goto again;
+}
+
+int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
+}
+
+int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
+}
+
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, int bits)
+{
+ struct cache_extent *node;
+ struct extent_state *state;
+ int ret = 1;
+
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+ node = search_cache_extent(&tree->state, start);
+ if (!node)
+ goto out;
+
+ while(1) {
+ state = container_of(node, struct extent_state, cache_node);
+ if (state->end >= start && (state->state & bits)) {
+ *start_ret = state->start;
+ *end_ret = state->end;
+ ret = 0;
+ break;
+ }
+ node = next_cache_extent(node);
+ if (!node)
+ break;
+ }
+out:
+ return ret;
+}
+
+int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, int filled)
+{
+ struct extent_state *state = NULL;
+ struct cache_extent *node;
+ int bitset = 0;
+
+ node = search_cache_extent(&tree->state, start);
+ while (node && start <= end) {
+ state = container_of(node, struct extent_state, cache_node);
+
+ if (filled && state->start > start) {
+ bitset = 0;
+ break;
+ }
+ if (state->start > end)
+ break;
+ if (state->state & bits) {
+ bitset = 1;
+ if (!filled)
+ break;
+ } else if (filled) {
+ bitset = 0;
+ break;
+ }
+ start = state->end + 1;
+ if (start > end)
+ break;
+ node = next_cache_extent(node);
+ if (!node) {
+ if (filled)
+ bitset = 0;
+ break;
+ }
+ }
+ return bitset;
+}
+
+int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
+{
+ struct cache_extent *node;
+ struct extent_state *state;
+ int ret = 0;
+
+ node = search_cache_extent(&tree->state, start);
+ if (!node) {
+ ret = -ENOENT;
+ goto out;
+ }
+ state = container_of(node, struct extent_state, cache_node);
+ if (state->start != start) {
+ ret = -ENOENT;
+ goto out;
+ }
+ state->xprivate = private;
+out:
+ return ret;
+}
+
+int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
+{
+ struct cache_extent *node;
+ struct extent_state *state;
+ int ret = 0;
+
+ node = search_cache_extent(&tree->state, start);
+ if (!node) {
+ ret = -ENOENT;
+ goto out;
+ }
+ state = container_of(node, struct extent_state, cache_node);
+ if (state->start != start) {
+ ret = -ENOENT;
+ goto out;
+ }
+ *private = state->xprivate;
+out:
+ return ret;
+}
+
+static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
+ u64 bytenr, u32 blocksize)
+{
+ struct extent_buffer *eb;
+
+ eb = calloc(1, sizeof(struct extent_buffer) + blocksize);
+ if (!eb) {
+ BUG();
+ return NULL;
+ }
+
+ eb->start = bytenr;
+ eb->len = blocksize;
+ eb->refs = 1;
+ eb->flags = 0;
+ eb->tree = tree;
+ eb->fd = -1;
+ eb->dev_bytenr = (u64)-1;
+ eb->cache_node.start = bytenr;
+ eb->cache_node.size = blocksize;
+ INIT_LIST_HEAD(&eb->recow);
+
+ return eb;
+}
+
+struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
+{
+ struct extent_buffer *new;
+
+ new = __alloc_extent_buffer(NULL, src->start, src->len);
+ if (new == NULL)
+ return NULL;
+
+ copy_extent_buffer(new, src, 0, 0, src->len);
+ new->flags |= EXTENT_BUFFER_DUMMY;
+
+ return new;
+}
+
+void free_extent_buffer(struct extent_buffer *eb)
+{
+ if (!eb || IS_ERR(eb))
+ return;
+
+ eb->refs--;
+ BUG_ON(eb->refs < 0);
+ if (eb->refs == 0) {
+ struct extent_io_tree *tree = eb->tree;
+ BUG_ON(eb->flags & EXTENT_DIRTY);
+ list_del_init(&eb->lru);
+ list_del_init(&eb->recow);
+ if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
+ BUG_ON(tree->cache_size < eb->len);
+ remove_cache_extent(&tree->cache, &eb->cache_node);
+ tree->cache_size -= eb->len;
+ }
+ free(eb);
+ }
+}
+
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+ u64 bytenr, u32 blocksize)
+{
+ struct extent_buffer *eb = NULL;
+ struct cache_extent *cache;
+
+ cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
+ if (cache && cache->start == bytenr &&
+ cache->size == blocksize) {
+ eb = container_of(cache, struct extent_buffer, cache_node);
+ list_move_tail(&eb->lru, &tree->lru);
+ eb->refs++;
+ }
+ return eb;
+}
+
+struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
+ u64 start)
+{
+ struct extent_buffer *eb = NULL;
+ struct cache_extent *cache;
+
+ cache = search_cache_extent(&tree->cache, start);
+ if (cache) {
+ eb = container_of(cache, struct extent_buffer, cache_node);
+ list_move_tail(&eb->lru, &tree->lru);
+ eb->refs++;
+ }
+ return eb;
+}
+
+struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
+ u64 bytenr, u32 blocksize)
+{
+ struct extent_buffer *eb;
+ struct cache_extent *cache;
+
+ cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
+ if (cache && cache->start == bytenr &&
+ cache->size == blocksize) {
+ eb = container_of(cache, struct extent_buffer, cache_node);
+ list_move_tail(&eb->lru, &tree->lru);
+ eb->refs++;
+ } else {
+ int ret;
+
+ if (cache) {
+ eb = container_of(cache, struct extent_buffer,
+ cache_node);
+ free_extent_buffer(eb);
+ }
+ eb = __alloc_extent_buffer(tree, bytenr, blocksize);
+ if (!eb)
+ return NULL;
+ ret = insert_cache_extent(&tree->cache, &eb->cache_node);
+ if (ret) {
+ free(eb);
+ return NULL;
+ }
+ list_add_tail(&eb->lru, &tree->lru);
+ tree->cache_size += blocksize;
+ }
+ return eb;
+}
+
+int read_extent_from_disk(struct extent_buffer *eb,
+ unsigned long offset, unsigned long len)
+{
+ int ret;
+ ret = pread(eb->fd, eb->data + offset, len, eb->dev_bytenr);
+ if (ret < 0) {
+ ret = -errno;
+ goto out;
+ }
+ if (ret != len) {
+ ret = -EIO;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int write_extent_to_disk(struct extent_buffer *eb)
+{
+ int ret;
+ ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
+ if (ret < 0)
+ goto out;
+ if (ret != eb->len) {
+ ret = -EIO;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
+ u64 bytes, int mirror)
+{
+ struct btrfs_multi_bio *multi = NULL;
+ struct btrfs_device *device;
+ u64 bytes_left = bytes;
+ u64 read_len;
+ u64 total_read = 0;
+ int ret;
+
+ while (bytes_left) {
+ read_len = bytes_left;
+ ret = btrfs_map_block(&info->mapping_tree, READ, offset,
+ &read_len, &multi, mirror, NULL);
+ if (ret) {
+ fprintf(stderr, "Couldn't map the block %Lu\n",
+ offset);
+ return -EIO;
+ }
+ device = multi->stripes[0].dev;
+
+ read_len = min(bytes_left, read_len);
+ if (device->fd <= 0) {
+ kfree(multi);
+ return -EIO;
+ }
+
+ ret = pread(device->fd, buf + total_read, read_len,
+ multi->stripes[0].physical);
+ kfree(multi);
+ if (ret < 0) {
+ fprintf(stderr, "Error reading %Lu, %d\n", offset,
+ ret);
+ return ret;
+ }
+ if (ret != read_len) {
+ fprintf(stderr, "Short read for %Lu, read %d, "
+ "read_len %Lu\n", offset, ret, read_len);
+ return -EIO;
+ }
+
+ bytes_left -= read_len;
+ offset += read_len;
+ total_read += read_len;
+ }
+
+ return 0;
+}
+
+int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
+ u64 bytes, int mirror)
+{
+ struct btrfs_multi_bio *multi = NULL;
+ struct btrfs_device *device;
+ u64 bytes_left = bytes;
+ u64 this_len;
+ u64 total_write = 0;
+ u64 *raid_map = NULL;
+ u64 dev_bytenr;
+ int dev_nr;
+ int ret = 0;
+
+ while (bytes_left > 0) {
+ this_len = bytes_left;
+ dev_nr = 0;
+
+ ret = btrfs_map_block(&info->mapping_tree, WRITE, offset,
+ &this_len, &multi, mirror, &raid_map);
+ if (ret) {
+ fprintf(stderr, "Couldn't map the block %Lu\n",
+ offset);
+ return -EIO;
+ }
+
+ if (raid_map) {
+ struct extent_buffer *eb;
+ u64 stripe_len = this_len;
+
+ this_len = min(this_len, bytes_left);
+ this_len = min(this_len, (u64)info->tree_root->leafsize);
+
+ eb = malloc(sizeof(struct extent_buffer) + this_len);
+ BUG_ON(!eb);
+
+ memset(eb, 0, sizeof(struct extent_buffer) + this_len);
+ eb->start = offset;
+ eb->len = this_len;
+
+ memcpy(eb->data, buf + total_write, this_len);
+ ret = write_raid56_with_parity(info, eb, multi,
+ stripe_len, raid_map);
+ BUG_ON(ret);
+
+ free(eb);
+ kfree(raid_map);
+ raid_map = NULL;
+ } else while (dev_nr < multi->num_stripes) {
+ device = multi->stripes[dev_nr].dev;
+ if (device->fd <= 0) {
+ kfree(multi);
+ return -EIO;
+ }
+
+ dev_bytenr = multi->stripes[dev_nr].physical;
+ this_len = min(this_len, bytes_left);
+ dev_nr++;
+
+ ret = pwrite(device->fd, buf + total_write, this_len, dev_bytenr);
+ if (ret != this_len) {
+ if (ret < 0) {
+ fprintf(stderr, "Error writing to "
+ "device %d\n", errno);
+ ret = errno;
+ kfree(multi);
+ return ret;
+ } else {
+ fprintf(stderr, "Short write\n");
+ kfree(multi);
+ return -EIO;
+ }
+ }
+ }
+
+ BUG_ON(bytes_left < this_len);
+
+ bytes_left -= this_len;
+ offset += this_len;
+ total_write += this_len;
+
+ kfree(multi);
+ multi = NULL;
+ }
+ return 0;
+}
+
+int set_extent_buffer_dirty(struct extent_buffer *eb)
+{
+ struct extent_io_tree *tree = eb->tree;
+ if (!(eb->flags & EXTENT_DIRTY)) {
+ eb->flags |= EXTENT_DIRTY;
+ set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
+ extent_buffer_get(eb);
+ }
+ return 0;
+}
+
+int clear_extent_buffer_dirty(struct extent_buffer *eb)
+{
+ struct extent_io_tree *tree = eb->tree;
+ if (eb->flags & EXTENT_DIRTY) {
+ eb->flags &= ~EXTENT_DIRTY;
+ clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
+ free_extent_buffer(eb);
+ }
+ return 0;
+}
+
+int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
+ unsigned long start, unsigned long len)
+{
+ return memcmp(eb->data + start, ptrv, len);
+}
+
+void read_extent_buffer(struct extent_buffer *eb, void *dst,
+ unsigned long start, unsigned long len)
+{
+ memcpy(dst, eb->data + start, len);
+}
+
+void write_extent_buffer(struct extent_buffer *eb, const void *src,
+ unsigned long start, unsigned long len)
+{
+ memcpy(eb->data + start, src, len);
+}
+
+void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+ unsigned long dst_offset, unsigned long src_offset,
+ unsigned long len)
+{
+ memcpy(dst->data + dst_offset, src->data + src_offset, len);
+}
+
+void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+ unsigned long src_offset, unsigned long len)
+{
+ memmove(dst->data + dst_offset, dst->data + src_offset, len);
+}
+
+void memset_extent_buffer(struct extent_buffer *eb, char c,
+ unsigned long start, unsigned long len)
+{
+ memset(eb->data + start, c, len);
+}
+
+int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+ unsigned long nr)
+{
+ return test_bit(nr, (unsigned long *)(eb->data + start));
+}
diff --git a/extent_io.h b/extent_io.h
new file mode 100644
index 00000000..a9a73535
--- /dev/null
+++ b/extent_io.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_EXTENT_IO_H__
+#define __BTRFS_EXTENT_IO_H__
+
+#if BTRFS_FLAT_INCLUDES
+#include "kerncompat.h"
+#include "extent-cache.h"
+#include "list.h"
+#else
+#include <btrfs/kerncompat.h>
+#include <btrfs/extent-cache.h>
+#include <btrfs/list.h>
+#endif /* BTRFS_FLAT_INCLUDES */
+
+#define EXTENT_DIRTY 1
+#define EXTENT_WRITEBACK (1 << 1)
+#define EXTENT_UPTODATE (1 << 2)
+#define EXTENT_LOCKED (1 << 3)
+#define EXTENT_NEW (1 << 4)
+#define EXTENT_DELALLOC (1 << 5)
+#define EXTENT_DEFRAG (1 << 6)
+#define EXTENT_DEFRAG_DONE (1 << 7)
+#define EXTENT_BUFFER_FILLED (1 << 8)
+#define EXTENT_CSUM (1 << 9)
+#define EXTENT_BAD_TRANSID (1 << 10)
+#define EXTENT_BUFFER_DUMMY (1 << 11)
+#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
+
+#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
+#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
+#define BLOCK_GROUP_SYSTEM EXTENT_NEW
+
+#define BLOCK_GROUP_DIRTY EXTENT_DIRTY
+
+struct btrfs_fs_info;
+
+struct extent_io_tree {
+ struct cache_tree state;
+ struct cache_tree cache;
+ struct list_head lru;
+ u64 cache_size;
+};
+
+struct extent_state {
+ struct cache_extent cache_node;
+ u64 start;
+ u64 end;
+ int refs;
+ unsigned long state;
+ u64 xprivate;
+};
+
+struct extent_buffer {
+ struct cache_extent cache_node;
+ u64 start;
+ u64 dev_bytenr;
+ u32 len;
+ struct extent_io_tree *tree;
+ struct list_head lru;
+ struct list_head recow;
+ int refs;
+ int flags;
+ int fd;
+ char data[];
+};
+
+static inline void extent_buffer_get(struct extent_buffer *eb)
+{
+ eb->refs++;
+}
+
+void extent_io_tree_init(struct extent_io_tree *tree);
+void extent_io_tree_cleanup(struct extent_io_tree *tree);
+int set_extent_bits(struct extent_io_tree *tree, u64 start,
+ u64 end, int bits, gfp_t mask);
+int clear_extent_bits(struct extent_io_tree *tree, u64 start,
+ u64 end, int bits, gfp_t mask);
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, int bits);
+int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, int filled);
+int set_extent_dirty(struct extent_io_tree *tree, u64 start,
+ u64 end, gfp_t mask);
+int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
+ u64 end, gfp_t mask);
+static inline int set_extent_buffer_uptodate(struct extent_buffer *eb)
+{
+ eb->flags |= EXTENT_UPTODATE;
+ return 0;
+}
+
+static inline int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
+ struct extent_buffer *eb)
+{
+ eb->flags &= ~EXTENT_UPTODATE;
+ return 0;
+}
+
+static inline int extent_buffer_uptodate(struct extent_buffer *eb)
+{
+ if (!eb || IS_ERR(eb))
+ return 0;
+ if (eb->flags & EXTENT_UPTODATE)
+ return 1;
+ return 0;
+}
+
+int set_state_private(struct extent_io_tree *tree, u64 start, u64 xprivate);
+int get_state_private(struct extent_io_tree *tree, u64 start, u64 *xprivate);
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+ u64 bytenr, u32 blocksize);
+struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
+ u64 start);
+struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
+ u64 bytenr, u32 blocksize);
+struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
+void free_extent_buffer(struct extent_buffer *eb);
+int read_extent_from_disk(struct extent_buffer *eb,
+ unsigned long offset, unsigned long len);
+int write_extent_to_disk(struct extent_buffer *eb);
+int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
+ unsigned long start, unsigned long len);
+void read_extent_buffer(struct extent_buffer *eb, void *dst,
+ unsigned long start, unsigned long len);
+void write_extent_buffer(struct extent_buffer *eb, const void *src,
+ unsigned long start, unsigned long len);
+void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+ unsigned long dst_offset, unsigned long src_offset,
+ unsigned long len);
+void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+ unsigned long src_offset, unsigned long len);
+void memset_extent_buffer(struct extent_buffer *eb, char c,
+ unsigned long start, unsigned long len);
+int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+ unsigned long nr);
+int set_extent_buffer_dirty(struct extent_buffer *eb);
+int clear_extent_buffer_dirty(struct extent_buffer *eb);
+int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
+ u64 bytes, int mirror);
+int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
+ u64 bytes, int mirror);
+#endif
diff --git a/file-item.c b/file-item.c
new file mode 100644
index 00000000..b46d7f10
--- /dev/null
+++ b/file-item.c
@@ -0,0 +1,486 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "kerncompat.h"
+#include "radix-tree.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "print-tree.h"
+#include "crc32c.h"
+
+#define MAX_CSUM_ITEMS(r,size) ((((BTRFS_LEAF_DATA_SIZE(r) - \
+ sizeof(struct btrfs_item) * 2) / \
+ size) - 1))
+int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 objectid, u64 pos, u64 offset,
+ u64 disk_num_bytes, u64 num_bytes)
+{
+ int ret = 0;
+ struct btrfs_file_extent_item *item;
+ struct btrfs_key file_key;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+ file_key.objectid = objectid;
+ file_key.offset = pos;
+ btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
+
+ ret = btrfs_insert_empty_item(trans, root, path, &file_key,
+ sizeof(*item));
+ if (ret < 0)
+ goto out;
+ BUG_ON(ret);
+ leaf = path->nodes[0];
+ item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_disk_bytenr(leaf, item, offset);
+ btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes);
+ btrfs_set_file_extent_offset(leaf, item, 0);
+ btrfs_set_file_extent_num_bytes(leaf, item, num_bytes);
+ btrfs_set_file_extent_ram_bytes(leaf, item, num_bytes);
+ btrfs_set_file_extent_generation(leaf, item, trans->transid);
+ btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
+ btrfs_set_file_extent_compression(leaf, item, 0);
+ btrfs_set_file_extent_encryption(leaf, item, 0);
+ btrfs_set_file_extent_other_encoding(leaf, item, 0);
+ btrfs_mark_buffer_dirty(leaf);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+int btrfs_insert_inline_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ u64 offset, char *buffer, size_t size)
+{
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ unsigned long ptr;
+ struct btrfs_file_extent_item *ei;
+ u32 datasize;
+ int err = 0;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = objectid;
+ key.offset = offset;
+ btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
+
+ datasize = btrfs_file_extent_calc_inline_size(size);
+ ret = btrfs_insert_empty_item(trans, root, path, &key, datasize);
+ if (ret) {
+ err = ret;
+ goto fail;
+ }
+
+ leaf = path->nodes[0];
+ ei = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_generation(leaf, ei, trans->transid);
+ btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
+ btrfs_set_file_extent_ram_bytes(leaf, ei, size);
+ btrfs_set_file_extent_compression(leaf, ei, 0);
+ btrfs_set_file_extent_encryption(leaf, ei, 0);
+ btrfs_set_file_extent_other_encoding(leaf, ei, 0);
+
+ ptr = btrfs_file_extent_inline_start(ei) + offset - key.offset;
+ write_extent_buffer(leaf, buffer, ptr, size);
+ btrfs_mark_buffer_dirty(leaf);
+fail:
+ btrfs_free_path(path);
+ return err;
+}
+
+static struct btrfs_csum_item *
+btrfs_lookup_csum(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 bytenr, int cow)
+{
+ int ret;
+ struct btrfs_key file_key;
+ struct btrfs_key found_key;
+ struct btrfs_csum_item *item;
+ struct extent_buffer *leaf;
+ u64 csum_offset = 0;
+ u16 csum_size =
+ btrfs_super_csum_size(root->fs_info->super_copy);
+ int csums_in_item;
+
+ file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
+ file_key.offset = bytenr;
+ btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
+ ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow);
+ if (ret < 0)
+ goto fail;
+ leaf = path->nodes[0];
+ if (ret > 0) {
+ ret = 1;
+ if (path->slots[0] == 0)
+ goto fail;
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY)
+ goto fail;
+
+ csum_offset = (bytenr - found_key.offset) / root->sectorsize;
+ csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
+ csums_in_item /= csum_size;
+
+ if (csum_offset >= csums_in_item) {
+ ret = -EFBIG;
+ goto fail;
+ }
+ }
+ item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
+ item = (struct btrfs_csum_item *)((unsigned char *)item +
+ csum_offset * csum_size);
+ return item;
+fail:
+ if (ret > 0)
+ ret = -ENOENT;
+ return ERR_PTR(ret);
+}
+
+int btrfs_csum_file_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 alloc_end,
+ u64 bytenr, char *data, size_t len)
+{
+ int ret = 0;
+ struct btrfs_key file_key;
+ struct btrfs_key found_key;
+ u64 next_offset = (u64)-1;
+ int found_next = 0;
+ struct btrfs_path *path;
+ struct btrfs_csum_item *item;
+ struct extent_buffer *leaf = NULL;
+ u64 csum_offset;
+ u32 csum_result = ~(u32)0;
+ u32 nritems;
+ u32 ins_size;
+ u16 csum_size =
+ btrfs_super_csum_size(root->fs_info->super_copy);
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+
+ file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
+ file_key.offset = bytenr;
+ file_key.type = BTRFS_EXTENT_CSUM_KEY;
+
+ item = btrfs_lookup_csum(trans, root, path, bytenr, 1);
+ if (!IS_ERR(item)) {
+ leaf = path->nodes[0];
+ ret = 0;
+ goto found;
+ }
+ ret = PTR_ERR(item);
+ if (ret == -EFBIG) {
+ u32 item_size;
+ /* we found one, but it isn't big enough yet */
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ if ((item_size / csum_size) >= MAX_CSUM_ITEMS(root, csum_size)) {
+ /* already at max size, make a new one */
+ goto insert;
+ }
+ } else {
+ int slot = path->slots[0] + 1;
+ /* we didn't find a csum item, insert one */
+ nritems = btrfs_header_nritems(path->nodes[0]);
+ if (path->slots[0] >= nritems - 1) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret == 1)
+ found_next = 1;
+ if (ret != 0)
+ goto insert;
+ slot = 0;
+ }
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
+ if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
+ found_key.type != BTRFS_EXTENT_CSUM_KEY) {
+ found_next = 1;
+ goto insert;
+ }
+ next_offset = found_key.offset;
+ found_next = 1;
+ goto insert;
+ }
+
+ /*
+ * at this point, we know the tree has an item, but it isn't big
+ * enough yet to put our csum in. Grow it
+ */
+ btrfs_release_path(path);
+ ret = btrfs_search_slot(trans, root, &file_key, path,
+ csum_size, 1);
+ if (ret < 0)
+ goto fail;
+ if (ret == 0) {
+ BUG();
+ }
+ if (path->slots[0] == 0) {
+ goto insert;
+ }
+ path->slots[0]--;
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ csum_offset = (file_key.offset - found_key.offset) / root->sectorsize;
+ if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
+ found_key.type != BTRFS_EXTENT_CSUM_KEY ||
+ csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) {
+ goto insert;
+ }
+ if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) /
+ csum_size) {
+ u32 diff = (csum_offset + 1) * csum_size;
+ diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
+ if (diff != csum_size)
+ goto insert;
+ ret = btrfs_extend_item(trans, root, path, diff);
+ BUG_ON(ret);
+ goto csum;
+ }
+
+insert:
+ btrfs_release_path(path);
+ csum_offset = 0;
+ if (found_next) {
+ u64 tmp = min(alloc_end, next_offset);
+ tmp -= file_key.offset;
+ tmp /= root->sectorsize;
+ tmp = max((u64)1, tmp);
+ tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size));
+ ins_size = csum_size * tmp;
+ } else {
+ ins_size = csum_size;
+ }
+ ret = btrfs_insert_empty_item(trans, root, path, &file_key,
+ ins_size);
+ if (ret < 0)
+ goto fail;
+ if (ret != 0) {
+ WARN_ON(1);
+ goto fail;
+ }
+csum:
+ leaf = path->nodes[0];
+ item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
+ ret = 0;
+ item = (struct btrfs_csum_item *)((unsigned char *)item +
+ csum_offset * csum_size);
+found:
+ csum_result = btrfs_csum_data(root, data, csum_result, len);
+ btrfs_csum_final(csum_result, (char *)&csum_result);
+ if (csum_result == 0) {
+ printk("csum result is 0 for block %llu\n",
+ (unsigned long long)bytenr);
+ }
+
+ write_extent_buffer(leaf, &csum_result, (unsigned long)item,
+ csum_size);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+fail:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * helper function for csum removal, this expects the
+ * key to describe the csum pointed to by the path, and it expects
+ * the csum to overlap the range [bytenr, len]
+ *
+ * The csum should not be entirely contained in the range and the
+ * range should not be entirely contained in the csum.
+ *
+ * This calls btrfs_truncate_item with the correct args based on the
+ * overlap, and fixes up the key as required.
+ */
+static noinline int truncate_one_csum(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *key,
+ u64 bytenr, u64 len)
+{
+ struct extent_buffer *leaf;
+ u16 csum_size =
+ btrfs_super_csum_size(root->fs_info->super_copy);
+ u64 csum_end;
+ u64 end_byte = bytenr + len;
+ u32 blocksize = root->sectorsize;
+ int ret;
+
+ leaf = path->nodes[0];
+ csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
+ csum_end *= root->sectorsize;
+ csum_end += key->offset;
+
+ if (key->offset < bytenr && csum_end <= end_byte) {
+ /*
+ * [ bytenr - len ]
+ * [ ]
+ * [csum ]
+ * A simple truncate off the end of the item
+ */
+ u32 new_size = (bytenr - key->offset) / blocksize;
+ new_size *= csum_size;
+ ret = btrfs_truncate_item(trans, root, path, new_size, 1);
+ BUG_ON(ret);
+ } else if (key->offset >= bytenr && csum_end > end_byte &&
+ end_byte > key->offset) {
+ /*
+ * [ bytenr - len ]
+ * [ ]
+ * [csum ]
+ * we need to truncate from the beginning of the csum
+ */
+ u32 new_size = (csum_end - end_byte) / blocksize;
+ new_size *= csum_size;
+
+ ret = btrfs_truncate_item(trans, root, path, new_size, 0);
+ BUG_ON(ret);
+
+ key->offset = end_byte;
+ ret = btrfs_set_item_key_safe(root, path, key);
+ BUG_ON(ret);
+ } else {
+ BUG();
+ }
+ return 0;
+}
+
+/*
+ * deletes the csum items from the csum tree for a given
+ * range of bytes.
+ */
+int btrfs_del_csums(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr, u64 len)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ u64 end_byte = bytenr + len;
+ u64 csum_end;
+ struct extent_buffer *leaf;
+ int ret;
+ u16 csum_size =
+ btrfs_super_csum_size(root->fs_info->super_copy);
+ int blocksize = root->sectorsize;
+
+ root = root->fs_info->csum_root;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ while (1) {
+ key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
+ key.offset = end_byte - 1;
+ key.type = BTRFS_EXTENT_CSUM_KEY;
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret > 0) {
+ if (path->slots[0] == 0)
+ goto out;
+ path->slots[0]--;
+ }
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+ if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
+ key.type != BTRFS_EXTENT_CSUM_KEY) {
+ break;
+ }
+
+ if (key.offset >= end_byte)
+ break;
+
+ csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
+ csum_end *= blocksize;
+ csum_end += key.offset;
+
+ /* this csum ends before we start, we're done */
+ if (csum_end <= bytenr)
+ break;
+
+ /* delete the entire item, it is inside our range */
+ if (key.offset >= bytenr && csum_end <= end_byte) {
+ ret = btrfs_del_item(trans, root, path);
+ BUG_ON(ret);
+ } else if (key.offset < bytenr && csum_end > end_byte) {
+ unsigned long offset;
+ unsigned long shift_len;
+ unsigned long item_offset;
+ /*
+ * [ bytenr - len ]
+ * [csum ]
+ *
+ * Our bytes are in the middle of the csum,
+ * we need to split this item and insert a new one.
+ *
+ * But we can't drop the path because the
+ * csum could change, get removed, extended etc.
+ *
+ * The trick here is the max size of a csum item leaves
+ * enough room in the tree block for a single
+ * item header. So, we split the item in place,
+ * adding a new header pointing to the existing
+ * bytes. Then we loop around again and we have
+ * a nicely formed csum item that we can neatly
+ * truncate.
+ */
+ offset = (bytenr - key.offset) / blocksize;
+ offset *= csum_size;
+
+ shift_len = (len / blocksize) * csum_size;
+
+ item_offset = btrfs_item_ptr_offset(leaf,
+ path->slots[0]);
+
+ memset_extent_buffer(leaf, 0, item_offset + offset,
+ shift_len);
+ key.offset = bytenr;
+
+ /*
+ * btrfs_split_item returns -EAGAIN when the
+ * item changed size or key
+ */
+ ret = btrfs_split_item(trans, root, path, &key, offset);
+ BUG_ON(ret && ret != -EAGAIN);
+
+ key.offset = end_byte - 1;
+ } else {
+ ret = truncate_one_csum(trans, root, path,
+ &key, bytenr, len);
+ BUG_ON(ret);
+ }
+ btrfs_release_path(path);
+ }
+out:
+ btrfs_free_path(path);
+ return 0;
+}
diff --git a/file.c b/file.c
new file mode 100644
index 00000000..0e9253ee
--- /dev/null
+++ b/file.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2014 Fujitsu. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <sys/stat.h>
+#include "ctree.h"
+#include "transaction.h"
+#include "kerncompat.h"
+
+/*
+ * Get the first file extent that covers (part of) the given range
+ * Unlike kernel using extent_map to handle hole even no-hole is enabled,
+ * progs don't have such infrastructure, so caller should do extra care
+ * for no-hole.
+ *
+ * return 0 for found, and path points to the file extent.
+ * return >0 for not found, and path points to the insert position.
+ * return <0 for error.
+ */
+int btrfs_get_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 ino, u64 offset, u64 len, int ins_len)
+{
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct btrfs_file_extent_item *fi_item;
+ u64 end = 0;
+ int ret = 0;
+ int not_found = 1;
+
+ key.objectid = ino;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = offset;
+
+ ret = btrfs_search_slot(trans, root, &key, path, ins_len,
+ ins_len ? 1 : 0);
+ if (ret <= 0)
+ goto out;
+ if (ret > 0) {
+ /* Check preivous file extent */
+ ret = btrfs_previous_item(root, path, ino,
+ BTRFS_EXTENT_DATA_KEY);
+ if (ret < 0)
+ goto out;
+ if (ret > 0)
+ goto check_next;
+ }
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
+ if (found_key.objectid != ino ||
+ found_key.type != BTRFS_EXTENT_DATA_KEY)
+ goto check_next;
+
+ fi_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+ end = found_key.offset +
+ btrfs_file_extent_ram_bytes(path->nodes[0], fi_item);
+ /*
+ * existing file extent
+ * |--------| |----|
+ * |-------|
+ * offset + len
+ * OR
+ * |---------------|
+ * |-------|
+ */
+ if (end > offset) {
+ not_found = 0;
+ goto out;
+ }
+check_next:
+ ret = btrfs_next_item(root, path);
+ if (ret)
+ goto out;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
+ if (found_key.objectid != ino ||
+ found_key.type != BTRFS_EXTENT_DATA_KEY) {
+ ret = 1;
+ goto out;
+ }
+ if (found_key.offset < offset + len)
+ /*
+ * existing file extent
+ * |---| |------|
+ * |-------|
+ * offset + len
+ */
+ not_found = 0;
+ else
+ /*
+ * existing file extent
+ * |----| |----|
+ * |----|
+ * offset + len
+ */
+ not_found = 1;
+
+ /*
+ * To keep the search hehavior consistent with search_slot(),
+ * we need to go back to the prev leaf's nritem slot if
+ * we are at the first slot of the leaf.
+ */
+ if (path->slots[0] == 0) {
+ ret = btrfs_prev_leaf(root, path);
+ /* Not possible */
+ if (ret)
+ goto out;
+ path->slots[0] = btrfs_header_nritems(path->nodes[0]);
+ }
+
+out:
+ if (ret == 0)
+ ret = not_found;
+ return ret;
+}
+
+/*
+ * Punch hole ranged [offset,len) for the file given by ino and root.
+ *
+ * Unlink kernel punch_hole, which will not zero/free existing extent,
+ * instead it will return -EEXIST if there is any extents in the hole
+ * range.
+ */
+int btrfs_punch_hole(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 ino, u64 offset, u64 len)
+{
+ struct btrfs_path *path;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_get_extent(NULL, root, path, ino, offset, len, 0);
+ if (ret < 0)
+ goto out;
+ if (ret == 0) {
+ ret = -EEXIST;
+ goto out;
+ }
+
+ ret = btrfs_insert_file_extent(trans, root, ino, offset, 0, 0, len);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
diff --git a/find-root.c b/find-root.c
new file mode 100644
index 00000000..823db6ad
--- /dev/null
+++ b/find-root.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2015 Fujitsu. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "kerncompat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "ctree.h"
+#include "utils.h"
+#include "find-root.h"
+#include "volumes.h"
+#include "disk-io.h"
+#include "extent-cache.h"
+
+/* Return value is the same as btrfs_find_root_search(). */
+static int add_eb_to_result(struct extent_buffer *eb,
+ struct cache_tree *result,
+ u32 leafsize,
+ struct btrfs_find_root_filter *filter,
+ struct cache_extent **match)
+{
+ u64 generation = btrfs_header_generation(eb);
+ u64 level = btrfs_header_level(eb);
+ u64 owner = btrfs_header_owner(eb);
+ u64 start = eb->start;
+ struct cache_extent *cache;
+ struct btrfs_find_root_gen_cache *gen_cache = NULL;
+ int ret = 0;
+
+ if (owner != filter->objectid || level < filter->level ||
+ generation < filter->generation)
+ return ret;
+
+ /*
+ * Get the generation cache or create one
+ *
+ * NOTE: search_cache_extent() may return cache that doesn't cover
+ * the range. So we need an extra check to make sure it's the right one.
+ */
+ cache = search_cache_extent(result, generation);
+ if (!cache || cache->start != generation) {
+ gen_cache = malloc(sizeof(*gen_cache));
+ BUG_ON(!gen_cache);
+ cache = &gen_cache->cache;
+ cache->start = generation;
+ cache->size = 1;
+ cache->objectid = 0;
+ gen_cache->highest_level = 0;
+ cache_tree_init(&gen_cache->eb_tree);
+
+ ret = insert_cache_extent(result, cache);
+ if (ret < 0)
+ return ret;
+ }
+ gen_cache = container_of(cache, struct btrfs_find_root_gen_cache,
+ cache);
+
+ /* Higher level, clean tree and insert the new one */
+ if (level > gen_cache->highest_level) {
+ free_extent_cache_tree(&gen_cache->eb_tree);
+ gen_cache->highest_level = level;
+ /* Fall into the insert routine */
+ }
+
+ /* Same level, insert it into the eb_tree */
+ if (level == gen_cache->highest_level) {
+ ret = add_cache_extent(&gen_cache->eb_tree,
+ start, leafsize);
+ if (ret < 0 && ret != -EEXIST)
+ return ret;
+ ret = 0;
+ }
+ if (generation == filter->match_gen &&
+ level == filter->match_level &&
+ !filter->search_all) {
+ ret = 1;
+ if (match)
+ *match = search_cache_extent(&gen_cache->eb_tree,
+ start);
+ }
+ return ret;
+}
+
+/*
+ * Return 0 if iterating all the metadata extents.
+ * Return 1 if found root with given gen/level and set *match to it.
+ * Return <0 if error happens
+ */
+int btrfs_find_root_search(struct btrfs_fs_info *fs_info,
+ struct btrfs_find_root_filter *filter,
+ struct cache_tree *result,
+ struct cache_extent **match)
+{
+ struct extent_buffer *eb;
+ u64 chunk_offset = 0;
+ u64 chunk_size = 0;
+ u64 offset = 0;
+ u32 leafsize = btrfs_super_leafsize(fs_info->super_copy);
+ int suppress_errors = 0;
+ int ret = 0;
+
+ suppress_errors = fs_info->suppress_check_block_errors;
+ fs_info->suppress_check_block_errors = 1;
+ while (1) {
+ if (filter->objectid != BTRFS_CHUNK_TREE_OBJECTID)
+ ret = btrfs_next_bg_metadata(&fs_info->mapping_tree,
+ &chunk_offset,
+ &chunk_size);
+ else
+ ret = btrfs_next_bg_system(&fs_info->mapping_tree,
+ &chunk_offset,
+ &chunk_size);
+ if (ret) {
+ if (ret == -ENOENT)
+ ret = 0;
+ break;
+ }
+ for (offset = chunk_offset;
+ offset < chunk_offset + chunk_size;
+ offset += leafsize) {
+ eb = read_tree_block_fs_info(fs_info, offset, leafsize,
+ 0);
+ if (!eb || IS_ERR(eb))
+ continue;
+ ret = add_eb_to_result(eb, result, leafsize, filter,
+ match);
+ free_extent_buffer(eb);
+ if (ret)
+ goto out;
+ }
+ }
+out:
+ fs_info->suppress_check_block_errors = suppress_errors;
+ return ret;
+}
diff --git a/find-root.h b/find-root.h
new file mode 100644
index 00000000..60d1111d
--- /dev/null
+++ b/find-root.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2015 Fujitsu. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_FIND_ROOT_H__
+#define __BTRFS_FIND_ROOT_H__
+
+#include "kerncompat.h"
+
+#include "ctree.h"
+#include "list.h"
+#include "extent-cache.h"
+
+/*
+ * Find-root will restore the search result in a 2-level trees.
+ * Search result is a cache_tree consisted of generation_cache.
+ * Each generation cache records the highest level of this generation
+ * and all the tree blocks with this generation.
+ *
+ * <result>
+ * cache_tree ----> generation_cache: gen:1 level: 2 eb_tree ----> eb1
+ * | |-> eb2
+ * | ......
+ * |-> generation_cache: gen:2 level: 3 eb_tree ---> eb3
+ *
+ * In the above example, generation 1's highest level is 2, but have multiple
+ * eb with same generation, so the root of generation 1 must be missing,
+ * possibly has already been overwritten.
+ * On the other hand, generation 2's highest level is 3 and we find only one
+ * eb for it, so it may be the root of generation 2.
+ */
+
+struct btrfs_find_root_gen_cache {
+ struct cache_extent cache; /* cache->start is generation */
+ u64 highest_level;
+ struct cache_tree eb_tree;
+};
+
+struct btrfs_find_root_filter {
+ u64 objectid; /* Only search tree with this objectid */
+ u64 generation; /* Only record tree block with higher or
+ equal generation */
+ u8 level; /* Only record tree block with higher or
+ equal level */
+ u8 match_level;
+ u64 match_gen;
+ int search_all;
+ /*
+ * If set search_all, even the tree block matches match_gen
+ * and match_level and objectid, still continue searching
+ * This *WILL* take *TONS* of extra time.
+ */
+};
+int btrfs_find_root_search(struct btrfs_fs_info *fs_info,
+ struct btrfs_find_root_filter *filter,
+ struct cache_tree *result,
+ struct cache_extent **match);
+static inline void btrfs_find_root_free(struct cache_tree *result)
+{
+ struct btrfs_find_root_gen_cache *gen_cache;
+ struct cache_extent *cache;
+
+ cache = first_cache_extent(result);
+ while (cache) {
+ gen_cache = container_of(cache,
+ struct btrfs_find_root_gen_cache, cache);
+ free_extent_cache_tree(&gen_cache->eb_tree);
+ remove_cache_extent(result, cache);
+ free(gen_cache);
+ cache = first_cache_extent(result);
+ }
+}
+#endif
diff --git a/free-space-cache.c b/free-space-cache.c
new file mode 100644
index 00000000..d10a5f51
--- /dev/null
+++ b/free-space-cache.c
@@ -0,0 +1,878 @@
+/*
+ * Copyright (C) 2008 Red Hat. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "free-space-cache.h"
+#include "transaction.h"
+#include "disk-io.h"
+#include "extent_io.h"
+#include "crc32c.h"
+#include "bitops.h"
+
+/*
+ * Kernel always uses PAGE_CACHE_SIZE for sectorsize, but we don't have
+ * anything like that in userspace and have to get the value from the
+ * filesystem
+ */
+#define BITS_PER_BITMAP(sectorsize) ((sectorsize) * 8)
+#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
+
+static int link_free_space(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *info);
+static void merge_space_tree(struct btrfs_free_space_ctl *ctl);
+
+struct io_ctl {
+ void *cur, *orig;
+ void *buffer;
+ struct btrfs_root *root;
+ unsigned long size;
+ u64 total_size;
+ int index;
+ int num_pages;
+ unsigned check_crcs:1;
+};
+
+static int io_ctl_init(struct io_ctl *io_ctl, u64 size, u64 ino,
+ struct btrfs_root *root)
+{
+ memset(io_ctl, 0, sizeof(struct io_ctl));
+ io_ctl->num_pages = (size + root->sectorsize - 1) / root->sectorsize;
+ io_ctl->buffer = kzalloc(size, GFP_NOFS);
+ if (!io_ctl->buffer)
+ return -ENOMEM;
+ io_ctl->total_size = size;
+ io_ctl->root = root;
+ if (ino != BTRFS_FREE_INO_OBJECTID)
+ io_ctl->check_crcs = 1;
+ return 0;
+}
+
+static void io_ctl_free(struct io_ctl *io_ctl)
+{
+ kfree(io_ctl->buffer);
+}
+
+static void io_ctl_unmap_page(struct io_ctl *io_ctl)
+{
+ if (io_ctl->cur) {
+ io_ctl->cur = NULL;
+ io_ctl->orig = NULL;
+ }
+}
+
+static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
+{
+ BUG_ON(io_ctl->index >= io_ctl->num_pages);
+ io_ctl->cur = io_ctl->buffer + (io_ctl->index++ * io_ctl->root->sectorsize);
+ io_ctl->orig = io_ctl->cur;
+ io_ctl->size = io_ctl->root->sectorsize;
+ if (clear)
+ memset(io_ctl->cur, 0, io_ctl->root->sectorsize);
+}
+
+static void io_ctl_drop_pages(struct io_ctl *io_ctl)
+{
+ io_ctl_unmap_page(io_ctl);
+}
+
+static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct btrfs_root *root,
+ struct btrfs_path *path, u64 ino)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_key key;
+ u64 bytenr, len;
+ u64 total_read = 0;
+ int ret = 0;
+
+ key.objectid = ino;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret) {
+ fprintf(stderr,
+ "Couldn't find file extent item for free space inode"
+ " %Lu\n", ino);
+ btrfs_release_path(path);
+ return -EINVAL;
+ }
+
+ while (total_read < io_ctl->total_size) {
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret) {
+ ret = -EINVAL;
+ break;
+ }
+ }
+ leaf = path->nodes[0];
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid != ino) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (key.type != BTRFS_EXTENT_DATA_KEY) {
+ ret = -EINVAL;
+ break;
+ }
+
+ fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(path->nodes[0], fi) !=
+ BTRFS_FILE_EXTENT_REG) {
+ fprintf(stderr, "Not the file extent type we wanted\n");
+ ret = -EINVAL;
+ break;
+ }
+
+ bytenr = btrfs_file_extent_disk_bytenr(leaf, fi) +
+ btrfs_file_extent_offset(leaf, fi);
+ len = btrfs_file_extent_num_bytes(leaf, fi);
+ ret = read_data_from_disk(root->fs_info,
+ io_ctl->buffer + key.offset, bytenr,
+ len, 0);
+ if (ret)
+ break;
+ total_read += len;
+ path->slots[0]++;
+ }
+
+ btrfs_release_path(path);
+ return ret;
+}
+
+static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
+{
+ __le64 *gen;
+
+ /*
+ * Skip the crc area. If we don't check crcs then we just have a 64bit
+ * chunk at the front of the first page.
+ */
+ if (io_ctl->check_crcs) {
+ io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
+ io_ctl->size -= sizeof(u64) +
+ (sizeof(u32) * io_ctl->num_pages);
+ } else {
+ io_ctl->cur += sizeof(u64);
+ io_ctl->size -= sizeof(u64) * 2;
+ }
+
+ gen = io_ctl->cur;
+ if (le64_to_cpu(*gen) != generation) {
+ printk("btrfs: space cache generation "
+ "(%Lu) does not match inode (%Lu)\n", *gen,
+ generation);
+ io_ctl_unmap_page(io_ctl);
+ return -EIO;
+ }
+ io_ctl->cur += sizeof(u64);
+ return 0;
+}
+
+static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
+{
+ u32 *tmp, val;
+ u32 crc = ~(u32)0;
+ unsigned offset = 0;
+
+ if (!io_ctl->check_crcs) {
+ io_ctl_map_page(io_ctl, 0);
+ return 0;
+ }
+
+ if (index == 0)
+ offset = sizeof(u32) * io_ctl->num_pages;
+
+ tmp = io_ctl->buffer;
+ tmp += index;
+ val = *tmp;
+
+ io_ctl_map_page(io_ctl, 0);
+ crc = crc32c(crc, io_ctl->orig + offset, io_ctl->root->sectorsize - offset);
+ btrfs_csum_final(crc, (char *)&crc);
+ if (val != crc) {
+ printk("btrfs: csum mismatch on free space cache\n");
+ io_ctl_unmap_page(io_ctl);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int io_ctl_read_entry(struct io_ctl *io_ctl,
+ struct btrfs_free_space *entry, u8 *type)
+{
+ struct btrfs_free_space_entry *e;
+ int ret;
+
+ if (!io_ctl->cur) {
+ ret = io_ctl_check_crc(io_ctl, io_ctl->index);
+ if (ret)
+ return ret;
+ }
+
+ e = io_ctl->cur;
+ entry->offset = le64_to_cpu(e->offset);
+ entry->bytes = le64_to_cpu(e->bytes);
+ *type = e->type;
+ io_ctl->cur += sizeof(struct btrfs_free_space_entry);
+ io_ctl->size -= sizeof(struct btrfs_free_space_entry);
+
+ if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
+ return 0;
+
+ io_ctl_unmap_page(io_ctl);
+
+ return 0;
+}
+
+static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
+ struct btrfs_free_space *entry)
+{
+ int ret;
+
+ ret = io_ctl_check_crc(io_ctl, io_ctl->index);
+ if (ret)
+ return ret;
+
+ memcpy(entry->bitmap, io_ctl->cur, io_ctl->root->sectorsize);
+ io_ctl_unmap_page(io_ctl);
+
+ return 0;
+}
+
+
+static int __load_free_space_cache(struct btrfs_root *root,
+ struct btrfs_free_space_ctl *ctl,
+ struct btrfs_path *path, u64 offset)
+{
+ struct btrfs_free_space_header *header;
+ struct btrfs_inode_item *inode_item;
+ struct extent_buffer *leaf;
+ struct io_ctl io_ctl;
+ struct btrfs_key key;
+ struct btrfs_key inode_location;
+ struct btrfs_disk_key disk_key;
+ struct btrfs_free_space *e, *n;
+ struct list_head bitmaps;
+ u64 num_entries;
+ u64 num_bitmaps;
+ u64 generation;
+ u64 inode_size;
+ u8 type;
+ int ret = 0;
+
+ INIT_LIST_HEAD(&bitmaps);
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = offset;
+ key.type = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ return 0;
+ } else if (ret > 0) {
+ btrfs_release_path(path);
+ return 0;
+ }
+
+ leaf = path->nodes[0];
+ header = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_header);
+ num_entries = btrfs_free_space_entries(leaf, header);
+ num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
+ generation = btrfs_free_space_generation(leaf, header);
+ btrfs_free_space_key(leaf, header, &disk_key);
+ btrfs_disk_key_to_cpu(&inode_location, &disk_key);
+ btrfs_release_path(path);
+
+ ret = btrfs_search_slot(NULL, root, &inode_location, path, 0, 0);
+ if (ret) {
+ fprintf(stderr, "Couldn't find free space inode %d\n", ret);
+ return 0;
+ }
+
+ leaf = path->nodes[0];
+ inode_item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_inode_item);
+
+ inode_size = btrfs_inode_size(leaf, inode_item);
+ if (!inode_size || !btrfs_inode_generation(leaf, inode_item)) {
+ btrfs_release_path(path);
+ return 0;
+ }
+
+ if (btrfs_inode_generation(leaf, inode_item) != generation) {
+ fprintf(stderr,
+ "free space inode generation (%llu) did not match "
+ "free space cache generation (%llu)\n",
+ (unsigned long long)btrfs_inode_generation(leaf,
+ inode_item),
+ (unsigned long long)generation);
+ btrfs_release_path(path);
+ return 0;
+ }
+
+ btrfs_release_path(path);
+
+ if (!num_entries)
+ return 0;
+
+ ret = io_ctl_init(&io_ctl, inode_size, inode_location.objectid, root);
+ if (ret)
+ return ret;
+
+ ret = io_ctl_prepare_pages(&io_ctl, root, path,
+ inode_location.objectid);
+ if (ret)
+ goto out;
+
+ ret = io_ctl_check_crc(&io_ctl, 0);
+ if (ret)
+ goto free_cache;
+
+ ret = io_ctl_check_generation(&io_ctl, generation);
+ if (ret)
+ goto free_cache;
+
+ while (num_entries) {
+ e = calloc(1, sizeof(*e));
+ if (!e)
+ goto free_cache;
+
+ ret = io_ctl_read_entry(&io_ctl, e, &type);
+ if (ret) {
+ free(e);
+ goto free_cache;
+ }
+
+ if (!e->bytes) {
+ free(e);
+ goto free_cache;
+ }
+
+ if (type == BTRFS_FREE_SPACE_EXTENT) {
+ ret = link_free_space(ctl, e);
+ if (ret) {
+ fprintf(stderr,
+ "Duplicate entries in free space cache\n");
+ free(e);
+ goto free_cache;
+ }
+ } else {
+ BUG_ON(!num_bitmaps);
+ num_bitmaps--;
+ e->bitmap = kzalloc(ctl->sectorsize, GFP_NOFS);
+ if (!e->bitmap) {
+ free(e);
+ goto free_cache;
+ }
+ ret = link_free_space(ctl, e);
+ ctl->total_bitmaps++;
+ if (ret) {
+ fprintf(stderr,
+ "Duplicate entries in free space cache\n");
+ free(e->bitmap);
+ free(e);
+ goto free_cache;
+ }
+ list_add_tail(&e->list, &bitmaps);
+ }
+
+ num_entries--;
+ }
+
+ io_ctl_unmap_page(&io_ctl);
+
+ /*
+ * We add the bitmaps at the end of the entries in order that
+ * the bitmap entries are added to the cache.
+ */
+ list_for_each_entry_safe(e, n, &bitmaps, list) {
+ list_del_init(&e->list);
+ ret = io_ctl_read_bitmap(&io_ctl, e);
+ if (ret)
+ goto free_cache;
+ }
+
+ io_ctl_drop_pages(&io_ctl);
+ merge_space_tree(ctl);
+ ret = 1;
+out:
+ io_ctl_free(&io_ctl);
+ return ret;
+free_cache:
+ io_ctl_drop_pages(&io_ctl);
+ __btrfs_remove_free_space_cache(ctl);
+ goto out;
+}
+
+int load_free_space_cache(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group)
+{
+ struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+ struct btrfs_path *path;
+ u64 used = btrfs_block_group_used(&block_group->item);
+ int ret = 0;
+ int matched;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return 0;
+
+ ret = __load_free_space_cache(fs_info->tree_root, ctl, path,
+ block_group->key.objectid);
+ btrfs_free_path(path);
+
+ matched = (ctl->free_space == (block_group->key.offset - used -
+ block_group->bytes_super));
+ if (ret == 1 && !matched) {
+ __btrfs_remove_free_space_cache(ctl);
+ fprintf(stderr,
+ "block group %llu has wrong amount of free space\n",
+ block_group->key.objectid);
+ ret = -1;
+ }
+
+ if (ret < 0) {
+ ret = 0;
+
+ fprintf(stderr,
+ "failed to load free space cache for block group %llu\n",
+ block_group->key.objectid);
+ }
+
+ return ret;
+}
+
+static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
+ u64 offset)
+{
+ BUG_ON(offset < bitmap_start);
+ offset -= bitmap_start;
+ return (unsigned long)(offset / unit);
+}
+
+static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
+{
+ return (unsigned long)(bytes / unit);
+}
+
+static int tree_insert_offset(struct rb_root *root, u64 offset,
+ struct rb_node *node, int bitmap)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct btrfs_free_space *info;
+
+ while (*p) {
+ parent = *p;
+ info = rb_entry(parent, struct btrfs_free_space, offset_index);
+
+ if (offset < info->offset) {
+ p = &(*p)->rb_left;
+ } else if (offset > info->offset) {
+ p = &(*p)->rb_right;
+ } else {
+ /*
+ * we could have a bitmap entry and an extent entry
+ * share the same offset. If this is the case, we want
+ * the extent entry to always be found first if we do a
+ * linear search through the tree, since we want to have
+ * the quickest allocation time, and allocating from an
+ * extent is faster than allocating from a bitmap. So
+ * if we're inserting a bitmap and we find an entry at
+ * this offset, we want to go right, or after this entry
+ * logically. If we are inserting an extent and we've
+ * found a bitmap, we want to go left, or before
+ * logically.
+ */
+ if (bitmap) {
+ if (info->bitmap)
+ return -EEXIST;
+ p = &(*p)->rb_right;
+ } else {
+ if (!info->bitmap)
+ return -EEXIST;
+ p = &(*p)->rb_left;
+ }
+ }
+ }
+
+ rb_link_node(node, parent, p);
+ rb_insert_color(node, root);
+
+ return 0;
+}
+
+/*
+ * searches the tree for the given offset.
+ *
+ * fuzzy - If this is set, then we are trying to make an allocation, and we just
+ * want a section that has at least bytes size and comes at or after the given
+ * offset.
+ */
+static struct btrfs_free_space *
+tree_search_offset(struct btrfs_free_space_ctl *ctl,
+ u64 offset, int bitmap_only, int fuzzy)
+{
+ struct rb_node *n = ctl->free_space_offset.rb_node;
+ struct btrfs_free_space *entry, *prev = NULL;
+ u32 sectorsize = ctl->sectorsize;
+
+ /* find entry that is closest to the 'offset' */
+ while (1) {
+ if (!n) {
+ entry = NULL;
+ break;
+ }
+
+ entry = rb_entry(n, struct btrfs_free_space, offset_index);
+ prev = entry;
+
+ if (offset < entry->offset)
+ n = n->rb_left;
+ else if (offset > entry->offset)
+ n = n->rb_right;
+ else
+ break;
+ }
+
+ if (bitmap_only) {
+ if (!entry)
+ return NULL;
+ if (entry->bitmap)
+ return entry;
+
+ /*
+ * bitmap entry and extent entry may share same offset,
+ * in that case, bitmap entry comes after extent entry.
+ */
+ n = rb_next(n);
+ if (!n)
+ return NULL;
+ entry = rb_entry(n, struct btrfs_free_space, offset_index);
+ if (entry->offset != offset)
+ return NULL;
+
+ WARN_ON(!entry->bitmap);
+ return entry;
+ } else if (entry) {
+ if (entry->bitmap) {
+ /*
+ * if previous extent entry covers the offset,
+ * we should return it instead of the bitmap entry
+ */
+ n = rb_prev(&entry->offset_index);
+ if (n) {
+ prev = rb_entry(n, struct btrfs_free_space,
+ offset_index);
+ if (!prev->bitmap &&
+ prev->offset + prev->bytes > offset)
+ entry = prev;
+ }
+ }
+ return entry;
+ }
+
+ if (!prev)
+ return NULL;
+
+ /* find last entry before the 'offset' */
+ entry = prev;
+ if (entry->offset > offset) {
+ n = rb_prev(&entry->offset_index);
+ if (n) {
+ entry = rb_entry(n, struct btrfs_free_space,
+ offset_index);
+ BUG_ON(entry->offset > offset);
+ } else {
+ if (fuzzy)
+ return entry;
+ else
+ return NULL;
+ }
+ }
+
+ if (entry->bitmap) {
+ n = rb_prev(&entry->offset_index);
+ if (n) {
+ prev = rb_entry(n, struct btrfs_free_space,
+ offset_index);
+ if (!prev->bitmap &&
+ prev->offset + prev->bytes > offset)
+ return prev;
+ }
+ if (entry->offset + BITS_PER_BITMAP(sectorsize) * ctl->unit > offset)
+ return entry;
+ } else if (entry->offset + entry->bytes > offset)
+ return entry;
+
+ if (!fuzzy)
+ return NULL;
+
+ while (1) {
+ if (entry->bitmap) {
+ if (entry->offset + BITS_PER_BITMAP(sectorsize) *
+ ctl->unit > offset)
+ break;
+ } else {
+ if (entry->offset + entry->bytes > offset)
+ break;
+ }
+
+ n = rb_next(&entry->offset_index);
+ if (!n)
+ return NULL;
+ entry = rb_entry(n, struct btrfs_free_space, offset_index);
+ }
+ return entry;
+}
+
+void unlink_free_space(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *info)
+{
+ rb_erase(&info->offset_index, &ctl->free_space_offset);
+ ctl->free_extents--;
+ ctl->free_space -= info->bytes;
+}
+
+static int link_free_space(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *info)
+{
+ int ret = 0;
+
+ BUG_ON(!info->bitmap && !info->bytes);
+ ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
+ &info->offset_index, (info->bitmap != NULL));
+ if (ret)
+ return ret;
+
+ ctl->free_space += info->bytes;
+ ctl->free_extents++;
+ return ret;
+}
+
+static int search_bitmap(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *bitmap_info, u64 *offset,
+ u64 *bytes)
+{
+ unsigned long found_bits = 0;
+ unsigned long bits, i;
+ unsigned long next_zero;
+ u32 sectorsize = ctl->sectorsize;
+
+ i = offset_to_bit(bitmap_info->offset, ctl->unit,
+ max_t(u64, *offset, bitmap_info->offset));
+ bits = bytes_to_bits(*bytes, ctl->unit);
+
+ for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP(sectorsize)) {
+ next_zero = find_next_zero_bit(bitmap_info->bitmap,
+ BITS_PER_BITMAP(sectorsize), i);
+ if ((next_zero - i) >= bits) {
+ found_bits = next_zero - i;
+ break;
+ }
+ i = next_zero;
+ }
+
+ if (found_bits) {
+ *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
+ *bytes = (u64)(found_bits) * ctl->unit;
+ return 0;
+ }
+
+ return -1;
+}
+
+struct btrfs_free_space *
+btrfs_find_free_space(struct btrfs_free_space_ctl *ctl, u64 offset, u64 bytes)
+{
+ return tree_search_offset(ctl, offset, 0, 0);
+}
+
+static void try_merge_free_space(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *info)
+{
+ struct btrfs_free_space *left_info;
+ struct btrfs_free_space *right_info;
+ u64 offset = info->offset;
+ u64 bytes = info->bytes;
+
+ /*
+ * first we want to see if there is free space adjacent to the range we
+ * are adding, if there is remove that struct and add a new one to
+ * cover the entire range
+ */
+ right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
+ if (right_info && rb_prev(&right_info->offset_index))
+ left_info = rb_entry(rb_prev(&right_info->offset_index),
+ struct btrfs_free_space, offset_index);
+ else
+ left_info = tree_search_offset(ctl, offset - 1, 0, 0);
+
+ if (right_info && !right_info->bitmap) {
+ unlink_free_space(ctl, right_info);
+ info->bytes += right_info->bytes;
+ free(right_info);
+ }
+
+ if (left_info && !left_info->bitmap &&
+ left_info->offset + left_info->bytes == offset) {
+ unlink_free_space(ctl, left_info);
+ info->offset = left_info->offset;
+ info->bytes += left_info->bytes;
+ free(left_info);
+ }
+}
+
+void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
+ u64 bytes)
+{
+ struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+ struct btrfs_free_space *info;
+ struct rb_node *n;
+ int count = 0;
+
+ for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
+ info = rb_entry(n, struct btrfs_free_space, offset_index);
+ if (info->bytes >= bytes && !block_group->ro)
+ count++;
+ printk("entry offset %llu, bytes %llu, bitmap %s\n",
+ (unsigned long long)info->offset,
+ (unsigned long long)info->bytes,
+ (info->bitmap) ? "yes" : "no");
+ }
+ printk("%d blocks of free space at or bigger than bytes is \n", count);
+}
+
+int btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group,
+ int sectorsize)
+{
+ struct btrfs_free_space_ctl *ctl;
+
+ ctl = calloc(1, sizeof(*ctl));
+ if (!ctl)
+ return -ENOMEM;
+
+ ctl->sectorsize = sectorsize;
+ ctl->unit = sectorsize;
+ ctl->start = block_group->key.objectid;
+ ctl->private = block_group;
+ block_group->free_space_ctl = ctl;
+
+ return 0;
+}
+
+void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
+{
+ struct btrfs_free_space *info;
+ struct rb_node *node;
+
+ while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
+ info = rb_entry(node, struct btrfs_free_space, offset_index);
+ unlink_free_space(ctl, info);
+ free(info->bitmap);
+ free(info);
+ }
+}
+
+void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
+{
+ __btrfs_remove_free_space_cache(block_group->free_space_ctl);
+}
+
+int btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, u64 offset,
+ u64 bytes)
+{
+ struct btrfs_free_space *info;
+ int ret = 0;
+
+ info = calloc(1, sizeof(*info));
+ if (!info)
+ return -ENOMEM;
+
+ info->offset = offset;
+ info->bytes = bytes;
+
+ try_merge_free_space(ctl, info);
+
+ ret = link_free_space(ctl, info);
+ if (ret) {
+ printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
+ BUG_ON(ret == -EEXIST);
+ }
+
+ return ret;
+}
+
+/*
+ * Merges all the free space cache and kills the bitmap entries since we just
+ * want to use the free space cache to verify it's correct, no reason to keep
+ * the bitmaps around to confuse things.
+ */
+static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
+{
+ struct btrfs_free_space *e, *prev = NULL;
+ struct rb_node *n;
+ int ret;
+ u32 sectorsize = ctl->sectorsize;
+
+again:
+ prev = NULL;
+ for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
+ e = rb_entry(n, struct btrfs_free_space, offset_index);
+ if (e->bitmap) {
+ u64 offset = e->offset, bytes = ctl->unit;
+ u64 end;
+
+ end = e->offset + (u64)(BITS_PER_BITMAP(sectorsize) * ctl->unit);
+
+ unlink_free_space(ctl, e);
+ while (!(search_bitmap(ctl, e, &offset, &bytes))) {
+ ret = btrfs_add_free_space(ctl, offset,
+ bytes);
+ BUG_ON(ret);
+ offset += bytes;
+ if (offset >= end)
+ break;
+ bytes = ctl->unit;
+ }
+ free(e->bitmap);
+ free(e);
+ goto again;
+ }
+ if (!prev)
+ goto next;
+ if (prev->offset + prev->bytes == e->offset) {
+ unlink_free_space(ctl, prev);
+ unlink_free_space(ctl, e);
+ prev->bytes += e->bytes;
+ free(e);
+ link_free_space(ctl, prev);
+ goto again;
+ }
+next:
+ prev = e;
+ }
+}
diff --git a/free-space-cache.h b/free-space-cache.h
new file mode 100644
index 00000000..9214077a
--- /dev/null
+++ b/free-space-cache.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2009 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_FREE_SPACE_CACHE_H__
+#define __BTRFS_FREE_SPACE_CACHE_H__
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "rbtree.h"
+
+struct btrfs_free_space {
+ struct rb_node offset_index;
+ u64 offset;
+ u64 bytes;
+ unsigned long *bitmap;
+ struct list_head list;
+};
+
+struct btrfs_free_space_ctl {
+ struct rb_root free_space_offset;
+ u64 free_space;
+ int extents_thresh;
+ int free_extents;
+ int total_bitmaps;
+ int unit;
+ u64 start;
+ void *private;
+ u32 sectorsize;
+};
+
+int load_free_space_cache(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group);
+
+void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
+void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
+ *block_group);
+void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
+ u64 bytes);
+struct btrfs_free_space *
+btrfs_find_free_space(struct btrfs_free_space_ctl *ctl, u64 offset, u64 bytes);
+int btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group,
+ int sectorsize);
+void unlink_free_space(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *info);
+int btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, u64 offset,
+ u64 bytes);
+#endif
diff --git a/free-space-tree.c b/free-space-tree.c
new file mode 100644
index 00000000..3c7a2463
--- /dev/null
+++ b/free-space-tree.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2015 Facebook. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "ctree.h"
+#include "disk-io.h"
+#include "free-space-cache.h"
+#include "free-space-tree.h"
+
+static struct btrfs_free_space_info *
+search_free_space_info(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path, int cow)
+{
+ struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_key key;
+ int ret;
+
+ key.objectid = block_group->key.objectid;
+ key.type = BTRFS_FREE_SPACE_INFO_KEY;
+ key.offset = block_group->key.offset;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret != 0)
+ return ERR_PTR(-ENOENT);
+
+ return btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_free_space_info);
+}
+
+static int free_space_test_bit(struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path, u64 offset,
+ u64 sectorsize)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ u64 found_start, found_end;
+ unsigned long ptr, i;
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
+
+ found_start = key.objectid;
+ found_end = key.objectid + key.offset;
+ ASSERT(offset >= found_start && offset < found_end);
+
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ i = (offset - found_start) / sectorsize;
+ return !!extent_buffer_test_bit(leaf, ptr, i);
+}
+
+static int load_free_space_bitmaps(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path,
+ u32 expected_extent_count,
+ int *errors)
+{
+ struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_key key;
+ int prev_bit = 0, bit;
+ u64 extent_start = 0;
+ u64 start, end, offset;
+ u32 extent_count = 0;
+ int ret;
+
+ start = block_group->key.objectid;
+ end = block_group->key.objectid + block_group->key.offset;
+
+ while (1) {
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ goto out;
+ if (ret)
+ break;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
+ break;
+
+ if (key.type != BTRFS_FREE_SPACE_BITMAP_KEY) {
+ fprintf(stderr, "unexpected key of type %u\n", key.type);
+ (*errors)++;
+ break;
+ }
+ if (key.objectid >= end) {
+ fprintf(stderr,
+ "free space bitmap starts at %llu, beyond end of block group %llu-%llu\n",
+ key.objectid, start, end);
+ (*errors)++;
+ break;
+ }
+ if (key.objectid + key.offset > end) {
+ fprintf(stderr,
+ "free space bitmap ends at %llu, beyond end of block group %llu-%llu\n",
+ key.objectid, start, end);
+ (*errors)++;
+ break;
+ }
+
+ offset = key.objectid;
+ while (offset < key.objectid + key.offset) {
+ bit = free_space_test_bit(block_group, path, offset,
+ root->sectorsize);
+ if (prev_bit == 0 && bit == 1) {
+ extent_start = offset;
+ } else if (prev_bit == 1 && bit == 0) {
+ add_new_free_space(block_group, fs_info, extent_start, offset);
+ extent_count++;
+ }
+ prev_bit = bit;
+ offset += root->sectorsize;
+ }
+ }
+
+ if (prev_bit == 1) {
+ add_new_free_space(block_group, fs_info, extent_start, end);
+ extent_count++;
+ }
+
+ if (extent_count != expected_extent_count) {
+ fprintf(stderr, "free space info recorded %u extents, counted %u\n",
+ expected_extent_count, extent_count);
+ (*errors)++;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int load_free_space_extents(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path,
+ u32 expected_extent_count,
+ int *errors)
+{
+ struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_key key, prev_key;
+ int have_prev = 0;
+ u64 start, end;
+ u32 extent_count = 0;
+ int ret;
+
+ start = block_group->key.objectid;
+ end = block_group->key.objectid + block_group->key.offset;
+
+ while (1) {
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ goto out;
+ if (ret)
+ break;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
+ break;
+
+ if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
+ fprintf(stderr, "unexpected key of type %u\n", key.type);
+ (*errors)++;
+ break;
+ }
+ if (key.objectid >= end) {
+ fprintf(stderr,
+ "free space extent starts at %llu, beyond end of block group %llu-%llu\n",
+ key.objectid, start, end);
+ (*errors)++;
+ break;
+ }
+ if (key.objectid + key.offset > end) {
+ fprintf(stderr,
+ "free space extent ends at %llu, beyond end of block group %llu-%llu\n",
+ key.objectid, start, end);
+ (*errors)++;
+ break;
+ }
+
+ if (have_prev) {
+ u64 cur_start = key.objectid;
+ u64 cur_end = cur_start + key.offset;
+ u64 prev_start = prev_key.objectid;
+ u64 prev_end = prev_start + prev_key.offset;
+
+ if (cur_start < prev_end) {
+ fprintf(stderr,
+ "free space extent %llu-%llu overlaps with previous %llu-%llu\n",
+ cur_start, cur_end,
+ prev_start, prev_end);
+ (*errors)++;
+ } else if (cur_start == prev_end) {
+ fprintf(stderr,
+ "free space extent %llu-%llu is unmerged with previous %llu-%llu\n",
+ cur_start, cur_end,
+ prev_start, prev_end);
+ (*errors)++;
+ }
+ }
+
+ add_new_free_space(block_group, fs_info, key.objectid, key.objectid + key.offset);
+ extent_count++;
+
+ prev_key = key;
+ have_prev = 1;
+ }
+
+ if (extent_count != expected_extent_count) {
+ fprintf(stderr, "free space info recorded %u extents, counted %u\n",
+ expected_extent_count, extent_count);
+ (*errors)++;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int load_free_space_tree(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group)
+{
+ struct btrfs_free_space_info *info;
+ struct btrfs_path *path;
+ u32 extent_count, flags;
+ int errors = 0;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->reada = 1;
+
+ info = search_free_space_info(NULL, fs_info, block_group, path, 0);
+ if (IS_ERR(info)) {
+ ret = PTR_ERR(info);
+ goto out;
+ }
+ extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
+ flags = btrfs_free_space_flags(path->nodes[0], info);
+
+ if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
+ ret = load_free_space_bitmaps(fs_info, block_group, path,
+ extent_count, &errors);
+ } else {
+ ret = load_free_space_extents(fs_info, block_group, path,
+ extent_count, &errors);
+ }
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ return ret ? ret : errors;
+}
diff --git a/free-space-tree.h b/free-space-tree.h
new file mode 100644
index 00000000..7529a468
--- /dev/null
+++ b/free-space-tree.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2015 Facebook. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_FREE_SPACE_TREE_H__
+#define __BTRFS_FREE_SPACE_TREE_H__
+
+int load_free_space_tree(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group);
+
+#endif
diff --git a/fsck.btrfs b/fsck.btrfs
new file mode 100755
index 00000000..e1eff2ca
--- /dev/null
+++ b/fsck.btrfs
@@ -0,0 +1,39 @@
+#!/bin/sh -f
+#
+# Copyright (c) 2013 SUSE
+#
+# copied from fsck.xfs
+# Copyright (c) 2006 Silicon Graphics, Inc. All Rights Reserved.
+#
+
+# fsck.btrfs is a type of utility that should exist for any filesystem and is
+# called during system setup when the corresponding /etc/fstab entries contain
+# non-zero value for fs_passno. (See fstab(5) for more.)
+#
+# Traditional filesystems need to run their respective fsck utility in case the
+# filesystem was not unmounted cleanly and the log needs to be replayed before
+# mount. This is not needed for BTRFS. You should set fs_passno to 0.
+#
+# If you wish to check the consistency of a BTRFS filesystem or repair a
+# damaged filesystem, see btrfs(8) subcommand 'check'. By default the
+# filesystem consistency is checked, the repair mode is enabled via --repair
+# option (use with care!).
+
+AUTO=false
+while getopts ":aApy" c
+do
+ case $c in
+ a|A|p|y) AUTO=true;;
+ esac
+done
+shift $(($OPTIND - 1))
+eval DEV=\${$#}
+if [ ! -e $DEV ]; then
+ echo "$0: $DEV does not exist"
+ exit 8
+fi
+if ! $AUTO; then
+ echo "If you wish to check the consistency of a BTRFS filesystem or"
+ echo "repair a damaged filesystem, see btrfs(8) subcommand 'check'."
+fi
+exit 0
diff --git a/hash.h b/hash.h
new file mode 100644
index 00000000..ac4c4117
--- /dev/null
+++ b/hash.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_HASH_H__
+#define __BTRFS_HASH_H__
+
+#include "crc32c.h"
+
+static inline u64 btrfs_name_hash(const char *name, int len)
+{
+ return crc32c((u32)~1, name, len);
+}
+#endif
diff --git a/hasher.c b/hasher.c
new file mode 100644
index 00000000..b80407cf
--- /dev/null
+++ b/hasher.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "kerncompat.h"
+#include "hash.h"
+
+int main() {
+ u64 result;
+ int ret;
+ char line[255];
+ char *p;
+ while(1) {
+ p = fgets(line, 255, stdin);
+ if (!p)
+ break;
+ if (strlen(line) == 0)
+ continue;
+ if (line[strlen(line)-1] == '\n')
+ line[strlen(line)-1] = '\0';
+ result = btrfs_name_hash(line, strlen(line));
+ printf("hash returns %llu\n", (unsigned long long)result);
+ }
+ return 0;
+}
diff --git a/help.c b/help.c
new file mode 100644
index 00000000..c8bb7204
--- /dev/null
+++ b/help.c
@@ -0,0 +1,271 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#include "commands.h"
+#include "utils.h"
+
+#define USAGE_SHORT 1U
+#define USAGE_LONG 2U
+#define USAGE_OPTIONS 4U
+#define USAGE_LISTING 8U
+
+static int do_usage_one_command(const char * const *usagestr,
+ unsigned int flags, FILE *outf)
+{
+ int pad = 4;
+
+ if (!usagestr || !*usagestr)
+ return -1;
+
+ fprintf(outf, "%s%s", (flags & USAGE_LISTING) ? " " : "usage: ",
+ *usagestr++);
+
+ /* a short one-line description (mandatory) */
+ if ((flags & USAGE_SHORT) == 0)
+ return 0;
+ else if (!*usagestr)
+ return -2;
+ fputc('\n', outf);
+
+ if (flags & USAGE_LISTING)
+ pad = 8;
+ else
+ fputc('\n', outf);
+
+ fprintf(outf, "%*s%s\n", pad, "", *usagestr++);
+
+ /* a long (possibly multi-line) description (optional) */
+ if (!*usagestr || ((flags & USAGE_LONG) == 0))
+ return 0;
+
+ if (**usagestr)
+ fputc('\n', outf);
+ while (*usagestr && **usagestr)
+ fprintf(outf, "%*s%s\n", pad, "", *usagestr++);
+
+ /* options (optional) */
+ if (!*usagestr || ((flags & USAGE_OPTIONS) == 0))
+ return 0;
+
+ /*
+ * options (if present) should always (even if there is no long
+ * description) be prepended with an empty line, skip it
+ */
+ usagestr++;
+
+ fputc('\n', outf);
+ while (*usagestr)
+ fprintf(outf, "%*s%s\n", pad, "", *usagestr++);
+
+ return 0;
+}
+
+static int usage_command_internal(const char * const *usagestr,
+ const char *token, int full, int lst,
+ int alias, FILE *outf)
+{
+ unsigned int flags = 0;
+ int ret;
+
+ if (!alias)
+ flags |= USAGE_SHORT;
+ if (full)
+ flags |= USAGE_LONG | USAGE_OPTIONS;
+ if (lst)
+ flags |= USAGE_LISTING;
+
+ ret = do_usage_one_command(usagestr, flags, outf);
+ switch (ret) {
+ case -1:
+ fprintf(outf, "No usage for '%s'\n", token);
+ break;
+ case -2:
+ fprintf(outf, "No short description for '%s'\n", token);
+ break;
+ }
+
+ return ret;
+}
+
+static void usage_command_usagestr(const char * const *usagestr,
+ const char *token, int full, int err)
+{
+ FILE *outf = err ? stderr : stdout;
+ int ret;
+
+ ret = usage_command_internal(usagestr, token, full, 0, 0, outf);
+ if (!ret)
+ fputc('\n', outf);
+}
+
+void usage_command(const struct cmd_struct *cmd, int full, int err)
+{
+ usage_command_usagestr(cmd->usagestr, cmd->token, full, err);
+}
+
+void usage(const char * const *usagestr)
+{
+ usage_command_usagestr(usagestr, NULL, 1, 1);
+ exit(1);
+}
+
+static void usage_command_group_internal(const struct cmd_group *grp, int full,
+ FILE *outf)
+{
+ const struct cmd_struct *cmd = grp->commands;
+ int do_sep = 0;
+
+ for (; cmd->token; cmd++) {
+ if (cmd->flags & CMD_HIDDEN)
+ continue;
+
+ if (full && cmd != grp->commands)
+ fputc('\n', outf);
+
+ if (!cmd->next) {
+ if (do_sep) {
+ fputc('\n', outf);
+ do_sep = 0;
+ }
+
+ usage_command_internal(cmd->usagestr, cmd->token, full,
+ 1, cmd->flags & CMD_ALIAS, outf);
+ continue;
+ }
+
+ /* this is an entry point to a nested command group */
+
+ if (!full && cmd != grp->commands)
+ fputc('\n', outf);
+
+ usage_command_group_internal(cmd->next, full, outf);
+
+ if (!full)
+ do_sep = 1;
+ }
+}
+
+void usage_command_group_short(const struct cmd_group *grp)
+{
+ const char * const *usagestr = grp->usagestr;
+ FILE *outf = stdout;
+ const struct cmd_struct *cmd;
+
+ if (usagestr && *usagestr) {
+ fprintf(outf, "usage: %s\n", *usagestr++);
+ while (*usagestr)
+ fprintf(outf, " or: %s\n", *usagestr++);
+ }
+
+ fputc('\n', outf);
+
+ fprintf(outf, "Command groups:\n");
+ for (cmd = grp->commands; cmd->token; cmd++) {
+ if (cmd->flags & CMD_HIDDEN)
+ continue;
+
+ if (!cmd->next)
+ continue;
+
+ fprintf(outf, " %-16s %s\n", cmd->token, cmd->next->infostr);
+ }
+
+ fprintf(outf, "\nCommands:\n");
+ for (cmd = grp->commands; cmd->token; cmd++) {
+ if (cmd->flags & CMD_HIDDEN)
+ continue;
+
+ if (cmd->next)
+ continue;
+
+ fprintf(outf, " %-16s %s\n", cmd->token, cmd->usagestr[1]);
+ }
+
+ fputc('\n', outf);
+ fprintf(stderr, "For an overview of a given command use 'btrfs command --help'\n");
+ fprintf(stderr, "or 'btrfs [command...] --help --full' to print all available options.\n");
+ fprintf(stderr, "Any command name can be shortened as far as it stays unambiguous,\n");
+ fprintf(stderr, "however it is recommended to use full command names in scripts.\n");
+ fprintf(stderr, "All command groups have their manual page named 'btrfs-<group>'.\n");
+}
+
+void usage_command_group(const struct cmd_group *grp, int full, int err)
+{
+ const char * const *usagestr = grp->usagestr;
+ FILE *outf = err ? stderr : stdout;
+
+ if (usagestr && *usagestr) {
+ fprintf(outf, "usage: %s\n", *usagestr++);
+ while (*usagestr)
+ fprintf(outf, " or: %s\n", *usagestr++);
+ }
+
+ fputc('\n', outf);
+ usage_command_group_internal(grp, full, outf);
+ fputc('\n', outf);
+
+ if (grp->infostr)
+ fprintf(outf, "%s\n", grp->infostr);
+}
+
+void help_unknown_token(const char *arg, const struct cmd_group *grp)
+{
+ fprintf(stderr, "%s: unknown token '%s'\n", get_argv0_buf(), arg);
+ usage_command_group(grp, 0, 1);
+ exit(1);
+}
+
+void help_ambiguous_token(const char *arg, const struct cmd_group *grp)
+{
+ const struct cmd_struct *cmd = grp->commands;
+
+ fprintf(stderr, "%s: ambiguous token '%s'\n", get_argv0_buf(), arg);
+ fprintf(stderr, "\nDid you mean one of these ?\n");
+
+ for (; cmd->token; cmd++) {
+ if (!prefixcmp(cmd->token, arg))
+ fprintf(stderr, "\t%s\n", cmd->token);
+ }
+
+ exit(1);
+}
+
+void help_command_group(const struct cmd_group *grp, int argc, char **argv)
+{
+ int full = 0;
+
+ if (argc > 1) {
+ if (!strcmp(argv[1], "--full"))
+ full = 1;
+ }
+
+ usage_command_group(grp, full, 0);
+}
+
+int prefixcmp(const char *str, const char *prefix)
+{
+ for (; ; str++, prefix++)
+ if (!*prefix)
+ return 0;
+ else if (*str != *prefix)
+ return (unsigned char)*prefix - (unsigned char)*str;
+}
+
diff --git a/inode-item.c b/inode-item.c
new file mode 100644
index 00000000..522d25a4
--- /dev/null
+++ b/inode-item.c
@@ -0,0 +1,460 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "crc32c.h"
+
+static int find_name_in_backref(struct btrfs_path *path, const char * name,
+ int name_len, struct btrfs_inode_ref **ref_ret)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_inode_ref *ref;
+ unsigned long ptr;
+ unsigned long name_ptr;
+ u32 item_size;
+ u32 cur_offset = 0;
+ int len;
+
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ while (cur_offset < item_size) {
+ ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
+ len = btrfs_inode_ref_name_len(leaf, ref);
+ name_ptr = (unsigned long)(ref + 1);
+ cur_offset += len + sizeof(*ref);
+ if (len != name_len)
+ continue;
+ if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) {
+ *ref_ret = ref;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const char *name, int name_len,
+ u64 inode_objectid, u64 ref_objectid, u64 index)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_inode_ref *ref;
+ unsigned long ptr;
+ int ret;
+ int ins_len = name_len + sizeof(*ref);
+
+ key.objectid = inode_objectid;
+ key.offset = ref_objectid;
+ btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ ins_len);
+ if (ret == -EEXIST) {
+ u32 old_size;
+
+ if (find_name_in_backref(path, name, name_len, &ref))
+ goto out;
+
+ old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
+ ret = btrfs_extend_item(trans, root, path, ins_len);
+ BUG_ON(ret);
+ ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_ref);
+ ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
+ btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
+ btrfs_set_inode_ref_index(path->nodes[0], ref, index);
+ ptr = (unsigned long)(ref + 1);
+ ret = 0;
+ } else if (ret < 0) {
+ if (ret == -EOVERFLOW)
+ ret = -EMLINK;
+ goto out;
+ } else {
+ ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_ref);
+ btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
+ btrfs_set_inode_ref_index(path->nodes[0], ref, index);
+ ptr = (unsigned long)(ref + 1);
+ }
+ write_extent_buffer(path->nodes[0], name, ptr, name_len);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+
+out:
+ btrfs_free_path(path);
+
+ if (ret == -EMLINK) {
+ if (btrfs_fs_incompat(root->fs_info,
+ BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF))
+ ret = btrfs_insert_inode_extref(trans, root, name,
+ name_len,
+ inode_objectid,
+ ref_objectid, index);
+ }
+ return ret;
+}
+
+int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_path *path,
+ struct btrfs_key *location, int mod)
+{
+ int ins_len = mod < 0 ? -1 : 0;
+ int cow = mod != 0;
+ int ret;
+ int slot;
+ struct extent_buffer *leaf;
+ struct btrfs_key found_key;
+
+ ret = btrfs_search_slot(trans, root, location, path, ins_len, cow);
+ if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY &&
+ location->offset == (u64)-1 && path->slots[0] != 0) {
+ slot = path->slots[0] - 1;
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+ if (found_key.objectid == location->objectid &&
+ btrfs_key_type(&found_key) == btrfs_key_type(location)) {
+ path->slots[0]--;
+ return 0;
+ }
+ }
+ return ret;
+}
+
+int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, u64 objectid, struct btrfs_inode_item
+ *inode_item)
+{
+ int ret;
+ struct btrfs_key key;
+
+ key.objectid = objectid;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_insert_item(trans, root, &key, inode_item,
+ sizeof(*inode_item));
+ return ret;
+}
+
+struct btrfs_inode_ref *btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ const char *name, int namelen, u64 ino, u64 parent_ino,
+ u64 index, int ins_len)
+{
+ struct btrfs_key key;
+ struct btrfs_inode_ref *ret_inode_ref = NULL;
+ int ret = 0;
+
+ key.objectid = ino;
+ key.type = BTRFS_INODE_REF_KEY;
+ key.offset = parent_ino;
+
+ ret = btrfs_search_slot(trans, root, &key, path, ins_len,
+ ins_len ? 1 : 0);
+ if (ret)
+ goto out;
+
+ find_name_in_backref(path, name, namelen, &ret_inode_ref);
+out:
+ if (ret < 0)
+ return ERR_PTR(ret);
+ else
+ return ret_inode_ref;
+}
+
+static inline u64 btrfs_extref_hash(u64 parent_ino, const char *name,
+ int namelen)
+{
+ return (u64)btrfs_crc32c(parent_ino, name, namelen);
+}
+
+static int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
+ u64 parent_ino, const char *name, int namelen,
+ struct btrfs_inode_extref **extref_ret)
+{
+ struct extent_buffer *node;
+ struct btrfs_inode_extref *extref;
+ unsigned long ptr;
+ unsigned long name_ptr;
+ u32 item_size;
+ u32 cur_offset = 0;
+ int ref_name_len;
+ int slot;
+
+ node = path->nodes[0];
+ slot = path->slots[0];
+ item_size = btrfs_item_size_nr(node, slot);
+ ptr = btrfs_item_ptr_offset(node, slot);
+
+ /*
+ * Search all extended backrefs in this item. We're only looking
+ * through any collisions so most of the time this is just going to
+ * compare against one buffer. If all is well, we'll return success and
+ * the inode ref object.
+ */
+ while (cur_offset < item_size) {
+ extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
+ name_ptr = (unsigned long)(&extref->name);
+ ref_name_len = btrfs_inode_extref_name_len(node, extref);
+
+ if (ref_name_len == namelen &&
+ btrfs_inode_extref_parent(node, extref) == parent_ino &&
+ (memcmp_extent_buffer(node, name, name_ptr, namelen) == 0))
+ {
+ if (extref_ret)
+ *extref_ret = extref;
+ return 1;
+ }
+
+ cur_offset += ref_name_len + sizeof(*extref);
+ }
+
+ return 0;
+}
+
+struct btrfs_inode_extref *btrfs_lookup_inode_extref(struct btrfs_trans_handle
+ *trans, struct btrfs_path *path, struct btrfs_root *root,
+ u64 ino, u64 parent_ino, u64 index, const char *name,
+ int namelen, int ins_len)
+{
+ struct btrfs_key key;
+ struct btrfs_inode_extref *extref;
+ int ret = 0;
+
+ key.objectid = ino;
+ key.type = BTRFS_INODE_EXTREF_KEY;
+ key.offset = btrfs_extref_hash(parent_ino, name, namelen);
+
+ ret = btrfs_search_slot(trans, root, &key, path, ins_len,
+ ins_len ? 1 : 0);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret > 0)
+ return NULL;
+ if (!btrfs_find_name_in_ext_backref(path, parent_ino, name,
+ namelen, &extref))
+ return NULL;
+
+ return extref;
+}
+
+int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const char *name, int name_len,
+ u64 inode_objectid, u64 ref_objectid,
+ u64 *index)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_inode_extref *extref;
+ struct extent_buffer *leaf;
+ int ret;
+ int del_len = name_len + sizeof(*extref);
+ unsigned long ptr;
+ unsigned long item_start;
+ u32 item_size;
+
+ key.objectid = inode_objectid;
+ key.type = BTRFS_INODE_EXTREF_KEY;
+ key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret > 0)
+ ret = -ENOENT;
+ if (ret < 0)
+ goto out;
+
+ /*
+ * Sanity check - did we find the right item for this name? This
+ * should always succeed so error here will make the FS readonly.
+ */
+ if (!btrfs_find_name_in_ext_backref(path, ref_objectid,
+ name, name_len, &extref)) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ if (index)
+ *index = btrfs_inode_extref_index(leaf, extref);
+
+ if (del_len == item_size) {
+ /*
+ * Common case only one ref in the item, remove the whole item.
+ */
+ ret = btrfs_del_item(trans, root, path);
+ goto out;
+ }
+
+ ptr = (unsigned long)extref;
+ item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
+
+ memmove_extent_buffer(leaf, ptr, ptr + del_len,
+ item_size - (ptr + del_len - item_start));
+
+ btrfs_truncate_item(trans, root, path, item_size - del_len, 1);
+
+out:
+ btrfs_free_path(path);
+
+ return ret;
+}
+
+/*
+ * btrfs_insert_inode_extref() - Inserts an extended inode ref into a tree.
+ *
+ * The caller must have checked against BTRFS_LINK_MAX already.
+ */
+int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const char *name, int name_len,
+ u64 inode_objectid, u64 ref_objectid, u64 index)
+{
+ struct btrfs_inode_extref *extref;
+ int ret;
+ int ins_len = name_len + sizeof(*extref);
+ unsigned long ptr;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct btrfs_item *item;
+
+ key.objectid = inode_objectid;
+ key.type = BTRFS_INODE_EXTREF_KEY;
+ key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ ins_len);
+ if (ret == -EEXIST) {
+ if (btrfs_find_name_in_ext_backref(path, ref_objectid,
+ name, name_len, NULL))
+ goto out;
+
+ btrfs_extend_item(trans, root, path, ins_len);
+ ret = 0;
+ }
+
+ if (ret < 0)
+ goto out;
+
+ leaf = path->nodes[0];
+ item = btrfs_item_nr(path->slots[0]);
+ ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char);
+ ptr += btrfs_item_size(leaf, item) - ins_len;
+ extref = (struct btrfs_inode_extref *)ptr;
+
+ btrfs_set_inode_extref_name_len(path->nodes[0], extref, name_len);
+ btrfs_set_inode_extref_index(path->nodes[0], extref, index);
+ btrfs_set_inode_extref_parent(path->nodes[0], extref, ref_objectid);
+
+ ptr = (unsigned long)&extref->name;
+ write_extent_buffer(path->nodes[0], name, ptr, name_len);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+
+out:
+ btrfs_free_path(path);
+
+ return ret;
+}
+
+int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, const char *name, int name_len,
+ u64 ino, u64 parent_ino, u64 *index)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_inode_ref *ref;
+ struct extent_buffer *leaf;
+ unsigned long ptr;
+ unsigned long item_start;
+ u32 item_size;
+ u32 sub_item_len;
+ int ret;
+ int search_ext_refs = 0;
+ int del_len = name_len + sizeof(*ref);
+
+ key.objectid = ino;
+ key.offset = parent_ino;
+ key.type = BTRFS_INODE_REF_KEY;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret > 0) {
+ ret = -ENOENT;
+ search_ext_refs = 1;
+ goto out;
+ } else if (ret < 0) {
+ goto out;
+ }
+ if (!find_name_in_backref(path, name, name_len, &ref)) {
+ ret = -ENOENT;
+ search_ext_refs = 1;
+ goto out;
+ }
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+
+ if (index)
+ *index = btrfs_inode_ref_index(leaf, ref);
+
+ if (del_len == item_size) {
+ ret = btrfs_del_item(trans, root, path);
+ goto out;
+ }
+ ptr = (unsigned long)ref;
+ sub_item_len = name_len + sizeof(*ref);
+ item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
+ item_size - (ptr + sub_item_len - item_start));
+ btrfs_truncate_item(trans, root, path, item_size - sub_item_len, 1);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+out:
+ btrfs_free_path(path);
+
+ if (search_ext_refs &&
+ btrfs_fs_incompat(root->fs_info,
+ BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)) {
+ /*
+ * No refs were found, or we could not find the name in our ref
+ * array. Find and remove the extended inode ref then.
+ */
+ return btrfs_del_inode_extref(trans, root, name, name_len,
+ ino, parent_ino, index);
+ }
+
+ return ret;
+}
diff --git a/inode-map.c b/inode-map.c
new file mode 100644
index 00000000..346952b7
--- /dev/null
+++ b/inode-map.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+
+/*
+ * walks the btree of allocated inodes and find a hole.
+ */
+int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 dirid, u64 *objectid)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ int ret;
+ int slot = 0;
+ u64 last_ino = 0;
+ int start_found;
+ struct extent_buffer *l;
+ struct btrfs_key search_key;
+ u64 search_start = dirid;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+ search_start = root->last_inode_alloc;
+ search_start = max((unsigned long long)search_start,
+ BTRFS_FIRST_FREE_OBJECTID);
+ search_key.objectid = search_start;
+ search_key.offset = 0;
+ search_key.type = 0;
+
+ btrfs_init_path(path);
+ start_found = 0;
+ ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0);
+ if (ret < 0)
+ goto error;
+
+ if (path->slots[0] > 0)
+ path->slots[0]--;
+
+ while (1) {
+ l = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(l)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret == 0)
+ continue;
+ if (ret < 0)
+ goto error;
+ if (!start_found) {
+ *objectid = search_start;
+ start_found = 1;
+ goto found;
+ }
+ *objectid = last_ino > search_start ?
+ last_ino : search_start;
+ goto found;
+ }
+ btrfs_item_key_to_cpu(l, &key, slot);
+ if (key.objectid >= search_start) {
+ if (start_found) {
+ if (last_ino < search_start)
+ last_ino = search_start;
+ if (key.objectid > last_ino) {
+ *objectid = last_ino;
+ goto found;
+ }
+ }
+ }
+ start_found = 1;
+ last_ino = key.objectid + 1;
+ path->slots[0]++;
+ }
+ // FIXME -ENOSPC
+found:
+ root->last_inode_alloc = *objectid;
+ btrfs_free_path(path);
+ BUG_ON(*objectid < search_start);
+ return 0;
+error:
+ btrfs_free_path(path);
+ return ret;
+}
diff --git a/inode.c b/inode.c
new file mode 100644
index 00000000..be03a522
--- /dev/null
+++ b/inode.c
@@ -0,0 +1,536 @@
+/*
+ * Copyright (C) 2014 Fujitsu. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+/*
+ * Unlike inode.c in kernel, which can use most of the kernel infrastructure
+ * like inode/dentry things, in user-land, we can only use inode number to
+ * do directly operation on extent buffer, which may cause extra searching,
+ * but should not be a huge problem since progs is less performence sensitive.
+ */
+#include <sys/stat.h>
+
+#include "ctree.h"
+#include "transaction.h"
+#include "disk-io.h"
+#include "time.h"
+
+/*
+ * Find a free inode index for later btrfs_add_link().
+ * Currently just search from the largest dir_index and +1.
+ */
+static int btrfs_find_free_dir_index(struct btrfs_root *root, u64 dir_ino,
+ u64 *ret_ino)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ u64 ret_val = 2;
+ int ret = 0;
+
+ if (!ret_ino)
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = dir_ino;
+ key.type = BTRFS_DIR_INDEX_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ ret = 0;
+ if (path->slots[0] == 0) {
+ ret = btrfs_prev_leaf(root, path);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ /*
+ * This shouldn't happen since there must be a leaf
+ * containing the DIR_ITEM.
+ * Can only happen when the previous leaf is corrupted.
+ */
+ ret = -EIO;
+ goto out;
+ }
+ } else {
+ path->slots[0]--;
+ }
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
+ if (found_key.objectid != dir_ino ||
+ found_key.type != BTRFS_DIR_INDEX_KEY)
+ goto out;
+ ret_val = found_key.offset + 1;
+out:
+ btrfs_free_path(path);
+ if (ret == 0)
+ *ret_ino = ret_val;
+ return ret;
+}
+
+/* Check the dir_item/index conflicts before insert */
+int check_dir_conflict(struct btrfs_root *root, char *name, int namelen,
+ u64 dir, u64 index)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_inode_item *inode_item;
+ struct btrfs_dir_item *dir_item;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ /* Given dir exists? */
+ key.objectid = dir;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ /* Is it a dir? */
+ inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ if (!(btrfs_inode_mode(path->nodes[0], inode_item) & S_IFDIR)) {
+ ret = -ENOTDIR;
+ goto out;
+ }
+ btrfs_release_path(path);
+
+ /* Name conflicting? */
+ dir_item = btrfs_lookup_dir_item(NULL, root, path, dir, name,
+ namelen, 0);
+ if (IS_ERR(dir_item)) {
+ ret = PTR_ERR(dir_item);
+ goto out;
+ }
+ if (dir_item) {
+ ret = -EEXIST;
+ goto out;
+ }
+ btrfs_release_path(path);
+
+ /* Index conflicting? */
+ dir_item = btrfs_lookup_dir_index(NULL, root, path, dir, name,
+ namelen, index, 0);
+ if (IS_ERR(dir_item) && PTR_ERR(dir_item) == -ENOENT)
+ dir_item = NULL;
+ if (IS_ERR(dir_item)) {
+ ret = PTR_ERR(dir_item);
+ goto out;
+ }
+ if (dir_item) {
+ ret = -EEXIST;
+ goto out;
+ }
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * Add dir_item/index for 'parent_ino' if add_backref is true, also insert a
+ * backref from the ino to parent dir and update the nlink(Kernel version does
+ * not do this thing)
+ *
+ * Currently only supports adding link from an inode to another inode.
+ */
+int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ u64 ino, u64 parent_ino, char *name, int namelen,
+ u8 type, u64 *index, int add_backref)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_inode_item *inode_item;
+ u32 nlink;
+ u64 inode_size;
+ u64 ret_index = 0;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ if (index && *index) {
+ ret_index = *index;
+ } else {
+ ret = btrfs_find_free_dir_index(root, parent_ino, &ret_index);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = check_dir_conflict(root, name, namelen, parent_ino, ret_index);
+ if (ret < 0)
+ goto out;
+
+ /* Add inode ref */
+ if (add_backref) {
+ ret = btrfs_insert_inode_ref(trans, root, name, namelen,
+ ino, parent_ino, ret_index);
+ if (ret < 0)
+ goto out;
+
+ /* Update nlinks for the inode */
+ key.objectid = ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+ if (ret) {
+ if (ret > 0)
+ ret = -ENOENT;
+ goto out;
+ }
+ inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ nlink = btrfs_inode_nlink(path->nodes[0], inode_item);
+ nlink++;
+ btrfs_set_inode_nlink(path->nodes[0], inode_item, nlink);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ btrfs_release_path(path);
+ }
+
+ /* Add dir_item and dir_index */
+ key.objectid = ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+ ret = btrfs_insert_dir_item(trans, root, name, namelen, parent_ino,
+ &key, type, ret_index);
+ if (ret < 0)
+ goto out;
+
+ /* Update inode size of the parent inode */
+ key.objectid = parent_ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+ if (ret)
+ goto out;
+ inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ inode_size = btrfs_inode_size(path->nodes[0], inode_item);
+ inode_size += namelen * 2;
+ btrfs_set_inode_size(path->nodes[0], inode_item, inode_size);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ btrfs_release_path(path);
+
+out:
+ btrfs_free_path(path);
+ if (ret == 0 && index)
+ *index = ret_index;
+ return ret;
+}
+
+int btrfs_add_orphan_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ u64 ino)
+{
+ struct btrfs_key key;
+
+ key.objectid = BTRFS_ORPHAN_OBJECTID;
+ key.type = BTRFS_ORPHAN_ITEM_KEY;
+ key.offset = ino;
+
+ return btrfs_insert_empty_item(trans, root, path, &key, 0);
+}
+
+/*
+ * Unlink an inode, which will remove its backref and corresponding dir_index/
+ * dir_item if any of them exists.
+ *
+ * If an inode's nlink is reduced to 0 and 'add_orphan' is true, it will be
+ * added to orphan inode and wairing to be deleted by next kernel mount.
+ */
+int btrfs_unlink(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ u64 ino, u64 parent_ino, u64 index, const char *name,
+ int namelen, int add_orphan)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_inode_item *inode_item;
+ struct btrfs_inode_ref *inode_ref;
+ struct btrfs_dir_item *dir_item;
+ u64 inode_size;
+ u32 nlinks;
+ int del_inode_ref = 0;
+ int del_dir_item = 0;
+ int del_dir_index = 0;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ /* check the ref and backref exists */
+ inode_ref = btrfs_lookup_inode_ref(trans, root, path, name, namelen,
+ ino, parent_ino, index, 0);
+ if (IS_ERR(inode_ref)) {
+ ret = PTR_ERR(inode_ref);
+ goto out;
+ }
+ if (inode_ref)
+ del_inode_ref = 1;
+ btrfs_release_path(path);
+
+ dir_item = btrfs_lookup_dir_item(NULL, root, path, parent_ino,
+ name, namelen, 0);
+ if (IS_ERR(dir_item)) {
+ ret = PTR_ERR(dir_item);
+ goto out;
+ }
+ if (dir_item)
+ del_dir_item = 1;
+ btrfs_release_path(path);
+
+ dir_item = btrfs_lookup_dir_index(NULL, root, path, parent_ino,
+ name, namelen, index, 0);
+ /*
+ * Since lookup_dir_index() will return -ENOENT when not found,
+ * we need to do extra check.
+ */
+ if (IS_ERR(dir_item) && PTR_ERR(dir_item) == -ENOENT)
+ dir_item = NULL;
+ if (IS_ERR(dir_item)) {
+ ret = PTR_ERR(dir_item);
+ goto out;
+ }
+ if (dir_item)
+ del_dir_index = 1;
+ btrfs_release_path(path);
+
+ if (!del_inode_ref && !del_dir_item && !del_dir_index) {
+ /* All not found, shouldn't happen */
+ ret = -ENOENT;
+ goto out;
+ }
+
+ if (del_inode_ref) {
+ /* Only decrease nlink when deleting inode_ref */
+ key.objectid = ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret) {
+ if (ret > 0)
+ ret = -ENOENT;
+ goto out;
+ }
+ inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ nlinks = btrfs_inode_nlink(path->nodes[0], inode_item);
+ if (nlinks > 0)
+ nlinks--;
+ btrfs_set_inode_nlink(path->nodes[0], inode_item, nlinks);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ btrfs_release_path(path);
+
+ /* For nlinks == 0, add it to orphan list if needed */
+ if (nlinks == 0 && add_orphan) {
+ ret = btrfs_add_orphan_item(trans, root, path, ino);
+ if (ret < 0)
+ goto out;
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ btrfs_release_path(path);
+ }
+
+ ret = btrfs_del_inode_ref(trans, root, name, namelen, ino,
+ parent_ino, &index);
+ if (ret < 0)
+ goto out;
+ }
+
+ if (del_dir_index) {
+ dir_item = btrfs_lookup_dir_index(trans, root, path,
+ parent_ino, name, namelen,
+ index, -1);
+ if (IS_ERR(dir_item)) {
+ ret = PTR_ERR(dir_item);
+ goto out;
+ }
+ if (!dir_item) {
+ ret = -ENOENT;
+ goto out;
+ }
+ ret = btrfs_delete_one_dir_name(trans, root, path, dir_item);
+ if (ret)
+ goto out;
+ btrfs_release_path(path);
+
+ /* Update inode size of the parent inode */
+ key.objectid = parent_ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+ if (ret)
+ goto out;
+ inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ inode_size = btrfs_inode_size(path->nodes[0], inode_item);
+ if (inode_size >= namelen)
+ inode_size -= namelen;
+ btrfs_set_inode_size(path->nodes[0], inode_item, inode_size);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ btrfs_release_path(path);
+ }
+
+ if (del_dir_item) {
+ dir_item = btrfs_lookup_dir_item(trans, root, path, parent_ino,
+ name, namelen, -1);
+ if (IS_ERR(dir_item)) {
+ ret = PTR_ERR(dir_item);
+ goto out;
+ }
+ if (!dir_item) {
+ ret = -ENOENT;
+ goto out;
+ }
+ ret = btrfs_delete_one_dir_name(trans, root, path, dir_item);
+ if (ret < 0)
+ goto out;
+ btrfs_release_path(path);
+
+ /* Update inode size of the parent inode */
+ key.objectid = parent_ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+ if (ret)
+ goto out;
+ inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ inode_size = btrfs_inode_size(path->nodes[0], inode_item);
+ if (inode_size >= namelen)
+ inode_size -= namelen;
+ btrfs_set_inode_size(path->nodes[0], inode_item, inode_size);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ btrfs_release_path(path);
+ }
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/* Fill inode item with 'mode'. Uid/gid to root/root */
+static void fill_inode_item(struct btrfs_trans_handle *trans,
+ struct btrfs_inode_item *inode_item,
+ u32 mode, u32 nlink)
+{
+ time_t now = time(NULL);
+
+ btrfs_set_stack_inode_generation(inode_item, trans->transid);
+ btrfs_set_stack_inode_uid(inode_item, 0);
+ btrfs_set_stack_inode_gid(inode_item, 0);
+ btrfs_set_stack_inode_size(inode_item, 0);
+ btrfs_set_stack_inode_mode(inode_item, mode);
+ btrfs_set_stack_inode_nlink(inode_item, nlink);
+ btrfs_set_stack_timespec_sec(&inode_item->atime, now);
+ btrfs_set_stack_timespec_nsec(&inode_item->atime, 0);
+ btrfs_set_stack_timespec_sec(&inode_item->mtime, now);
+ btrfs_set_stack_timespec_nsec(&inode_item->mtime, 0);
+ btrfs_set_stack_timespec_sec(&inode_item->ctime, now);
+ btrfs_set_stack_timespec_nsec(&inode_item->ctime, 0);
+}
+
+/*
+ * Unlike kernel btrfs_new_inode(), we only create the INODE_ITEM, without
+ * its backref.
+ * The backref is added by btrfs_add_link().
+ */
+int btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ u64 ino, u32 mode)
+{
+ struct btrfs_inode_item inode_item = {0};
+ int ret = 0;
+
+ fill_inode_item(trans, &inode_item, mode, 0);
+ ret = btrfs_insert_inode(trans, root, ino, &inode_item);
+ return ret;
+}
+
+/*
+ * Make a dir under the parent inode 'parent_ino' with 'name'
+ * and 'mode', The owner will be root/root.
+ */
+int btrfs_mkdir(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ char *name, int namelen, u64 parent_ino, u64 *ino, int mode)
+{
+ struct btrfs_dir_item *dir_item;
+ struct btrfs_path *path;
+ u64 ret_ino = 0;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ if (ino && *ino)
+ ret_ino = *ino;
+
+ dir_item = btrfs_lookup_dir_item(NULL, root, path, parent_ino,
+ name, namelen, 0);
+ if (IS_ERR(dir_item)) {
+ ret = PTR_ERR(dir_item);
+ goto out;
+ }
+
+ if (dir_item) {
+ struct btrfs_key found_key;
+
+ /*
+ * Already have conflicting name, check if it is a dir.
+ * Either way, no need to continue.
+ */
+ btrfs_dir_item_key_to_cpu(path->nodes[0], dir_item, &found_key);
+ ret_ino = found_key.objectid;
+ if (btrfs_dir_type(path->nodes[0], dir_item) != BTRFS_FT_DIR)
+ ret = -EEXIST;
+ goto out;
+ }
+
+ if (!ret_ino)
+ /*
+ * This is *UNSAFE* if some leaf is corrupted,
+ * only used as a fallback method. Caller should either
+ * ensure the fs is OK or pass ino with unused inode number.
+ */
+ ret = btrfs_find_free_objectid(NULL, root, parent_ino,
+ &ret_ino);
+ if (ret)
+ goto out;
+ ret = btrfs_new_inode(trans, root, ret_ino, mode | S_IFDIR);
+ if (ret)
+ goto out;
+ ret = btrfs_add_link(trans, root, ret_ino, parent_ino, name, namelen,
+ BTRFS_FT_DIR, NULL, 1);
+ if (ret)
+ goto out;
+out:
+ btrfs_free_path(path);
+ if (ret == 0 && ino)
+ *ino = ret_ino;
+ return ret;
+}
diff --git a/interval_tree_generic.h b/interval_tree_generic.h
new file mode 100644
index 00000000..e26c7322
--- /dev/null
+++ b/interval_tree_generic.h
@@ -0,0 +1,193 @@
+/*
+ Interval Trees
+ (C) 2012 Michel Lespinasse <walken@google.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ include/linux/interval_tree_generic.h
+*/
+
+#include <stdbool.h>
+
+#include "rbtree_augmented.h"
+
+/*
+ * Template for implementing interval trees
+ *
+ * ITSTRUCT: struct type of the interval tree nodes
+ * ITRB: name of struct rb_node field within ITSTRUCT
+ * ITTYPE: type of the interval endpoints
+ * ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree
+ * ITSTART(n): start endpoint of ITSTRUCT node n
+ * ITLAST(n): last endpoint of ITSTRUCT node n
+ * ITSTATIC: 'static' or empty
+ * ITPREFIX: prefix to use for the inline tree definitions
+ *
+ * Note - before using this, please consider if non-generic version
+ * (interval_tree.h) would work for you...
+ */
+
+#define INTERVAL_TREE_DEFINE(ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, \
+ ITSTART, ITLAST, ITSTATIC, ITPREFIX) \
+ \
+/* Callbacks for augmented rbtree insert and remove */ \
+ \
+static inline ITTYPE ITPREFIX ## _compute_subtree_last(ITSTRUCT *node) \
+{ \
+ ITTYPE max = ITLAST(node), subtree_last; \
+ if (node->ITRB.rb_left) { \
+ subtree_last = rb_entry(node->ITRB.rb_left, \
+ ITSTRUCT, ITRB)->ITSUBTREE; \
+ if (max < subtree_last) \
+ max = subtree_last; \
+ } \
+ if (node->ITRB.rb_right) { \
+ subtree_last = rb_entry(node->ITRB.rb_right, \
+ ITSTRUCT, ITRB)->ITSUBTREE; \
+ if (max < subtree_last) \
+ max = subtree_last; \
+ } \
+ return max; \
+} \
+ \
+RB_DECLARE_CALLBACKS(static, ITPREFIX ## _augment, ITSTRUCT, ITRB, \
+ ITTYPE, ITSUBTREE, ITPREFIX ## _compute_subtree_last) \
+ \
+/* Insert / remove interval nodes from the tree */ \
+ \
+ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, struct rb_root *root) \
+{ \
+ struct rb_node **link = &root->rb_node, *rb_parent = NULL; \
+ ITTYPE start = ITSTART(node), last = ITLAST(node); \
+ ITSTRUCT *parent; \
+ \
+ while (*link) { \
+ rb_parent = *link; \
+ parent = rb_entry(rb_parent, ITSTRUCT, ITRB); \
+ if (parent->ITSUBTREE < last) \
+ parent->ITSUBTREE = last; \
+ if (start < ITSTART(parent)) \
+ link = &parent->ITRB.rb_left; \
+ else \
+ link = &parent->ITRB.rb_right; \
+ } \
+ \
+ node->ITSUBTREE = last; \
+ rb_link_node(&node->ITRB, rb_parent, link); \
+ rb_insert_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \
+} \
+ \
+ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, struct rb_root *root) \
+{ \
+ rb_erase_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \
+} \
+ \
+/* \
+ * Iterate over intervals intersecting [start;last] \
+ * \
+ * Note that a node's interval intersects [start;last] iff: \
+ * Cond1: ITSTART(node) <= last \
+ * and \
+ * Cond2: start <= ITLAST(node) \
+ */ \
+ \
+static ITSTRUCT * \
+ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \
+{ \
+ while (true) { \
+ /* \
+ * Loop invariant: start <= node->ITSUBTREE \
+ * (Cond2 is satisfied by one of the subtree nodes) \
+ */ \
+ if (node->ITRB.rb_left) { \
+ ITSTRUCT *left = rb_entry(node->ITRB.rb_left, \
+ ITSTRUCT, ITRB); \
+ if (start <= left->ITSUBTREE) { \
+ /* \
+ * Some nodes in left subtree satisfy Cond2. \
+ * Iterate to find the leftmost such node N. \
+ * If it also satisfies Cond1, that's the \
+ * match we are looking for. Otherwise, there \
+ * is no matching interval as nodes to the \
+ * right of N can't satisfy Cond1 either. \
+ */ \
+ node = left; \
+ continue; \
+ } \
+ } \
+ if (ITSTART(node) <= last) { /* Cond1 */ \
+ if (start <= ITLAST(node)) /* Cond2 */ \
+ return node; /* node is leftmost match */ \
+ if (node->ITRB.rb_right) { \
+ node = rb_entry(node->ITRB.rb_right, \
+ ITSTRUCT, ITRB); \
+ if (start <= node->ITSUBTREE) \
+ continue; \
+ } \
+ } \
+ return NULL; /* No match */ \
+ } \
+} \
+ \
+ITSTATIC ITSTRUCT * \
+ITPREFIX ## _iter_first(struct rb_root *root, ITTYPE start, ITTYPE last) \
+{ \
+ ITSTRUCT *node; \
+ \
+ if (!root->rb_node) \
+ return NULL; \
+ node = rb_entry(root->rb_node, ITSTRUCT, ITRB); \
+ if (node->ITSUBTREE < start) \
+ return NULL; \
+ return ITPREFIX ## _subtree_search(node, start, last); \
+} \
+ \
+ITSTATIC ITSTRUCT * \
+ITPREFIX ## _iter_next(ITSTRUCT *node, ITTYPE start, ITTYPE last) \
+{ \
+ struct rb_node *rb = node->ITRB.rb_right, *prev; \
+ \
+ while (true) { \
+ /* \
+ * Loop invariants: \
+ * Cond1: ITSTART(node) <= last \
+ * rb == node->ITRB.rb_right \
+ * \
+ * First, search right subtree if suitable \
+ */ \
+ if (rb) { \
+ ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB); \
+ if (start <= right->ITSUBTREE) \
+ return ITPREFIX ## _subtree_search(right, \
+ start, last); \
+ } \
+ \
+ /* Move up the tree until we come from a node's left child */ \
+ do { \
+ rb = rb_parent(&node->ITRB); \
+ if (!rb) \
+ return NULL; \
+ prev = &node->ITRB; \
+ node = rb_entry(rb, ITSTRUCT, ITRB); \
+ rb = node->ITRB.rb_right; \
+ } while (prev == rb); \
+ \
+ /* Check if the node intersects [start;last] */ \
+ if (last < ITSTART(node)) /* !Cond1 */ \
+ return NULL; \
+ else if (start <= ITLAST(node)) /* Cond2 */ \
+ return node; \
+ } \
+}
diff --git a/ioctl-test.c b/ioctl-test.c
new file mode 100644
index 00000000..54fc0135
--- /dev/null
+++ b/ioctl-test.c
@@ -0,0 +1,37 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "kerncompat.h"
+#include "ioctl.h"
+
+static unsigned long ioctls[] = {
+ BTRFS_IOC_SNAP_CREATE,
+ BTRFS_IOC_DEFRAG,
+ BTRFS_IOC_RESIZE,
+ BTRFS_IOC_SCAN_DEV,
+ BTRFS_IOC_TRANS_START,
+ BTRFS_IOC_TRANS_END,
+ BTRFS_IOC_SYNC,
+ BTRFS_IOC_CLONE,
+ BTRFS_IOC_ADD_DEV,
+ BTRFS_IOC_RM_DEV,
+ BTRFS_IOC_BALANCE,
+ BTRFS_IOC_SUBVOL_CREATE,
+ BTRFS_IOC_SNAP_DESTROY,
+ BTRFS_IOC_DEFRAG_RANGE,
+ BTRFS_IOC_TREE_SEARCH,
+ BTRFS_IOC_INO_LOOKUP,
+ BTRFS_IOC_DEFAULT_SUBVOL,
+ BTRFS_IOC_SPACE_INFO,
+ BTRFS_IOC_SNAP_CREATE_V2,
+ 0 };
+
+int main(int ac, char **av)
+{
+ int i = 0;
+ while(ioctls[i]) {
+ printf("%lu\n" ,ioctls[i]);
+ i++;
+ }
+ return 0;
+}
+
diff --git a/ioctl.h b/ioctl.h
new file mode 100644
index 00000000..771da231
--- /dev/null
+++ b/ioctl.h
@@ -0,0 +1,716 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_IOCTL_H__
+#define __BTRFS_IOCTL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <asm/types.h>
+#include <linux/ioctl.h>
+
+#ifndef __user
+#define __user
+#endif
+
+#define BTRFS_IOCTL_MAGIC 0x94
+#define BTRFS_VOL_NAME_MAX 255
+
+/* this should be 4k */
+#define BTRFS_PATH_NAME_MAX 4087
+struct btrfs_ioctl_vol_args {
+ __s64 fd;
+ char name[BTRFS_PATH_NAME_MAX + 1];
+};
+
+#define BTRFS_DEVICE_PATH_NAME_MAX 1024
+
+#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
+#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
+#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2)
+#define BTRFS_FSID_SIZE 16
+#define BTRFS_UUID_SIZE 16
+
+#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0)
+
+struct btrfs_qgroup_limit {
+ __u64 flags;
+ __u64 max_referenced;
+ __u64 max_exclusive;
+ __u64 rsv_referenced;
+ __u64 rsv_exclusive;
+};
+
+struct btrfs_qgroup_inherit {
+ __u64 flags;
+ __u64 num_qgroups;
+ __u64 num_ref_copies;
+ __u64 num_excl_copies;
+ struct btrfs_qgroup_limit lim;
+ __u64 qgroups[0];
+};
+
+struct btrfs_ioctl_qgroup_limit_args {
+ __u64 qgroupid;
+ struct btrfs_qgroup_limit lim;
+};
+
+#define BTRFS_SUBVOL_NAME_MAX 4039
+struct btrfs_ioctl_vol_args_v2 {
+ __s64 fd;
+ __u64 transid;
+ __u64 flags;
+ union {
+ struct {
+ __u64 size;
+ struct btrfs_qgroup_inherit __user *qgroup_inherit;
+ };
+ __u64 unused[4];
+ };
+ char name[BTRFS_SUBVOL_NAME_MAX + 1];
+};
+
+/*
+ * structure to report errors and progress to userspace, either as a
+ * result of a finished scrub, a canceled scrub or a progress inquiry
+ */
+struct btrfs_scrub_progress {
+ __u64 data_extents_scrubbed; /* # of data extents scrubbed */
+ __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */
+ __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */
+ __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */
+ __u64 read_errors; /* # of read errors encountered (EIO) */
+ __u64 csum_errors; /* # of failed csum checks */
+ __u64 verify_errors; /* # of occurences, where the metadata
+ * of a tree block did not match the
+ * expected values, like generation or
+ * logical */
+ __u64 no_csum; /* # of 4k data block for which no csum
+ * is present, probably the result of
+ * data written with nodatasum */
+ __u64 csum_discards; /* # of csum for which no data was found
+ * in the extent tree. */
+ __u64 super_errors; /* # of bad super blocks encountered */
+ __u64 malloc_errors; /* # of internal kmalloc errors. These
+ * will likely cause an incomplete
+ * scrub */
+ __u64 uncorrectable_errors; /* # of errors where either no intact
+ * copy was found or the writeback
+ * failed */
+ __u64 corrected_errors; /* # of errors corrected */
+ __u64 last_physical; /* last physical address scrubbed. In
+ * case a scrub was aborted, this can
+ * be used to restart the scrub */
+ __u64 unverified_errors; /* # of occurences where a read for a
+ * full (64k) bio failed, but the re-
+ * check succeeded for each 4k piece.
+ * Intermittent error. */
+};
+
+#define BTRFS_SCRUB_READONLY 1
+struct btrfs_ioctl_scrub_args {
+ __u64 devid; /* in */
+ __u64 start; /* in */
+ __u64 end; /* in */
+ __u64 flags; /* in */
+ struct btrfs_scrub_progress progress; /* out */
+ /* pad to 1k */
+ __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8];
+};
+
+#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0
+#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1
+struct btrfs_ioctl_dev_replace_start_params {
+ __u64 srcdevid; /* in, if 0, use srcdev_name instead */
+ __u64 cont_reading_from_srcdev_mode; /* in, see #define
+ * above */
+ __u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */
+ __u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */
+};
+
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4
+struct btrfs_ioctl_dev_replace_status_params {
+ __u64 replace_state; /* out, see #define above */
+ __u64 progress_1000; /* out, 0 <= x <= 1000 */
+ __u64 time_started; /* out, seconds since 1-Jan-1970 */
+ __u64 time_stopped; /* out, seconds since 1-Jan-1970 */
+ __u64 num_write_errors; /* out */
+ __u64 num_uncorrectable_read_errors; /* out */
+};
+
+#define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0
+#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1
+#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2
+#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_RESULT -1
+#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0
+#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1
+#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2
+#define BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS 3
+struct btrfs_ioctl_dev_replace_args {
+ __u64 cmd; /* in */
+ __u64 result; /* out */
+
+ union {
+ struct btrfs_ioctl_dev_replace_start_params start;
+ struct btrfs_ioctl_dev_replace_status_params status;
+ }; /* in/out */
+
+ __u64 spare[64];
+};
+
+struct btrfs_ioctl_dev_info_args {
+ __u64 devid; /* in/out */
+ __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */
+ __u64 bytes_used; /* out */
+ __u64 total_bytes; /* out */
+ __u64 unused[379]; /* pad to 4k */
+ __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */
+};
+
+struct btrfs_ioctl_fs_info_args {
+ __u64 max_id; /* out */
+ __u64 num_devices; /* out */
+ __u8 fsid[BTRFS_FSID_SIZE]; /* out */
+ __u32 nodesize; /* out */
+ __u32 sectorsize; /* out */
+ __u32 clone_alignment; /* out */
+ __u32 reserved32;
+ __u64 reserved[122]; /* pad to 1k */
+};
+
+struct btrfs_ioctl_feature_flags {
+ __u64 compat_flags;
+ __u64 compat_ro_flags;
+ __u64 incompat_flags;
+};
+
+/* balance control ioctl modes */
+#define BTRFS_BALANCE_CTL_PAUSE 1
+#define BTRFS_BALANCE_CTL_CANCEL 2
+#define BTRFS_BALANCE_CTL_RESUME 3
+
+/*
+ * this is packed, because it should be exactly the same as its disk
+ * byte order counterpart (struct btrfs_disk_balance_args)
+ */
+struct btrfs_balance_args {
+ __u64 profiles;
+
+ /*
+ * usage filter
+ * BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N'
+ * BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max
+ */
+ union {
+ __u64 usage;
+ struct {
+ __u32 usage_min;
+ __u32 usage_max;
+ };
+ };
+
+ __u64 devid;
+ __u64 pstart;
+ __u64 pend;
+ __u64 vstart;
+ __u64 vend;
+
+ __u64 target;
+
+ __u64 flags;
+
+ /*
+ * BTRFS_BALANCE_ARGS_LIMIT with value 'limit'
+ * BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum
+ * and maximum
+ */
+ union {
+ __u64 limit; /* limit number of processed chunks */
+ struct {
+ __u32 limit_min;
+ __u32 limit_max;
+ };
+ };
+ __u32 stripes_min;
+ __u32 stripes_max;
+ __u64 unused[6];
+} __attribute__ ((__packed__));
+
+/* report balance progress to userspace */
+struct btrfs_balance_progress {
+ __u64 expected; /* estimated # of chunks that will be
+ * relocated to fulfill the request */
+ __u64 considered; /* # of chunks we have considered so far */
+ __u64 completed; /* # of chunks relocated so far */
+};
+
+#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0)
+#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1)
+#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2)
+
+struct btrfs_ioctl_balance_args {
+ __u64 flags; /* in/out */
+ __u64 state; /* out */
+
+ struct btrfs_balance_args data; /* in/out */
+ struct btrfs_balance_args meta; /* in/out */
+ struct btrfs_balance_args sys; /* in/out */
+
+ struct btrfs_balance_progress stat; /* out */
+
+ __u64 unused[72]; /* pad to 1k */
+};
+
+#define BTRFS_INO_LOOKUP_PATH_MAX 4080
+struct btrfs_ioctl_ino_lookup_args {
+ __u64 treeid;
+ __u64 objectid;
+ char name[BTRFS_INO_LOOKUP_PATH_MAX];
+};
+
+struct btrfs_ioctl_search_key {
+ /* which root are we searching. 0 is the tree of tree roots */
+ __u64 tree_id;
+
+ /* keys returned will be >= min and <= max */
+ __u64 min_objectid;
+ __u64 max_objectid;
+
+ /* keys returned will be >= min and <= max */
+ __u64 min_offset;
+ __u64 max_offset;
+
+ /* max and min transids to search for */
+ __u64 min_transid;
+ __u64 max_transid;
+
+ /* keys returned will be >= min and <= max */
+ __u32 min_type;
+ __u32 max_type;
+
+ /*
+ * how many items did userland ask for, and how many are we
+ * returning
+ */
+ __u32 nr_items;
+
+ /* align to 64 bits */
+ __u32 unused;
+
+ /* some extra for later */
+ __u64 unused1;
+ __u64 unused2;
+ __u64 unused3;
+ __u64 unused4;
+};
+
+struct btrfs_ioctl_search_header {
+ __u64 transid;
+ __u64 objectid;
+ __u64 offset;
+ __u32 type;
+ __u32 len;
+} __attribute__((may_alias));
+
+#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key))
+/*
+ * the buf is an array of search headers where
+ * each header is followed by the actual item
+ * the type field is expanded to 32 bits for alignment
+ */
+struct btrfs_ioctl_search_args {
+ struct btrfs_ioctl_search_key key;
+ char buf[BTRFS_SEARCH_ARGS_BUFSIZE];
+};
+
+/*
+ * Extended version of TREE_SEARCH ioctl that can return more than 4k of bytes.
+ * The allocated size of the buffer is set in buf_size.
+ */
+struct btrfs_ioctl_search_args_v2 {
+ struct btrfs_ioctl_search_key key; /* in/out - search parameters */
+ __u64 buf_size; /* in - size of buffer
+ * out - on EOVERFLOW: needed size
+ * to store item */
+ __u64 buf[0]; /* out - found items */
+};
+
+/* With a @src_length of zero, the range from @src_offset->EOF is cloned! */
+struct btrfs_ioctl_clone_range_args {
+ __s64 src_fd;
+ __u64 src_offset, src_length;
+ __u64 dest_offset;
+};
+
+/* flags for the defrag range ioctl */
+#define BTRFS_DEFRAG_RANGE_COMPRESS 1
+#define BTRFS_DEFRAG_RANGE_START_IO 2
+
+#define BTRFS_SAME_DATA_DIFFERS 1
+/* For extent-same ioctl */
+struct btrfs_ioctl_same_extent_info {
+ __s64 fd; /* in - destination file */
+ __u64 logical_offset; /* in - start of extent in destination */
+ __u64 bytes_deduped; /* out - total # of bytes we were able
+ * to dedupe from this file */
+ /* status of this dedupe operation:
+ * 0 if dedup succeeds
+ * < 0 for error
+ * == BTRFS_SAME_DATA_DIFFERS if data differs
+ */
+ __s32 status; /* out - see above description */
+ __u32 reserved;
+};
+
+struct btrfs_ioctl_same_args {
+ __u64 logical_offset; /* in - start of extent in source */
+ __u64 length; /* in - length of extent */
+ __u16 dest_count; /* in - total elements in info array */
+ __u16 reserved1;
+ __u32 reserved2;
+ struct btrfs_ioctl_same_extent_info info[0];
+};
+
+struct btrfs_ioctl_defrag_range_args {
+ /* start of the defrag operation */
+ __u64 start;
+
+ /* number of bytes to defrag, use (u64)-1 to say all */
+ __u64 len;
+
+ /*
+ * flags for the operation, which can include turning
+ * on compression for this one defrag
+ */
+ __u64 flags;
+
+ /*
+ * any extent bigger than this will be considered
+ * already defragged. Use 0 to take the kernel default
+ * Use 1 to say every single extent must be rewritten
+ */
+ __u32 extent_thresh;
+
+ /*
+ * which compression method to use if turning on compression
+ * for this defrag operation. If unspecified, zlib will
+ * be used
+ */
+ __u32 compress_type;
+
+ /* spare for later */
+ __u32 unused[4];
+};
+
+struct btrfs_ioctl_space_info {
+ __u64 flags;
+ __u64 total_bytes;
+ __u64 used_bytes;
+};
+
+struct btrfs_ioctl_space_args {
+ __u64 space_slots;
+ __u64 total_spaces;
+ struct btrfs_ioctl_space_info spaces[0];
+};
+
+struct btrfs_data_container {
+ __u32 bytes_left; /* out -- bytes not needed to deliver output */
+ __u32 bytes_missing; /* out -- additional bytes needed for result */
+ __u32 elem_cnt; /* out */
+ __u32 elem_missed; /* out */
+ __u64 val[0]; /* out */
+};
+
+struct btrfs_ioctl_ino_path_args {
+ __u64 inum; /* in */
+ __u64 size; /* in */
+ __u64 reserved[4];
+ /* struct btrfs_data_container *fspath; out */
+ __u64 fspath; /* out */
+};
+
+struct btrfs_ioctl_logical_ino_args {
+ __u64 logical; /* in */
+ __u64 size; /* in */
+ __u64 reserved[4];
+ /* struct btrfs_data_container *inodes; out */
+ __u64 inodes;
+};
+
+enum btrfs_dev_stat_values {
+ /* disk I/O failure stats */
+ BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */
+ BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */
+ BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */
+
+ /* stats for indirect indications for I/O failures */
+ BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or
+ * contents is illegal: this is an
+ * indication that the block was damaged
+ * during read or write, or written to
+ * wrong location or read from wrong
+ * location */
+ BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not
+ * been written */
+
+ BTRFS_DEV_STAT_VALUES_MAX
+};
+
+/* Reset statistics after reading; needs SYS_ADMIN capability */
+#define BTRFS_DEV_STATS_RESET (1ULL << 0)
+
+struct btrfs_ioctl_get_dev_stats {
+ __u64 devid; /* in */
+ __u64 nr_items; /* in/out */
+ __u64 flags; /* in/out */
+
+ /* out values: */
+ __u64 values[BTRFS_DEV_STAT_VALUES_MAX];
+
+ __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */
+};
+
+/* BTRFS_IOC_SNAP_CREATE is no longer used by the btrfs command */
+#define BTRFS_QUOTA_CTL_ENABLE 1
+#define BTRFS_QUOTA_CTL_DISABLE 2
+/* 3 has formerly been reserved for BTRFS_QUOTA_CTL_RESCAN */
+struct btrfs_ioctl_quota_ctl_args {
+ __u64 cmd;
+ __u64 status;
+};
+
+struct btrfs_ioctl_quota_rescan_args {
+ __u64 flags;
+ __u64 progress;
+ __u64 reserved[6];
+};
+
+struct btrfs_ioctl_qgroup_assign_args {
+ __u64 assign;
+ __u64 src;
+ __u64 dst;
+};
+
+struct btrfs_ioctl_qgroup_create_args {
+ __u64 create;
+ __u64 qgroupid;
+};
+struct btrfs_ioctl_timespec {
+ __u64 sec;
+ __u32 nsec;
+};
+
+struct btrfs_ioctl_received_subvol_args {
+ char uuid[BTRFS_UUID_SIZE]; /* in */
+ __u64 stransid; /* in */
+ __u64 rtransid; /* out */
+ struct btrfs_ioctl_timespec stime; /* in */
+ struct btrfs_ioctl_timespec rtime; /* out */
+ __u64 flags; /* in */
+ __u64 reserved[16]; /* in */
+};
+
+/*
+ * Caller doesn't want file data in the send stream, even if the
+ * search of clone sources doesn't find an extent. UPDATE_EXTENT
+ * commands will be sent instead of WRITE commands.
+ */
+#define BTRFS_SEND_FLAG_NO_FILE_DATA 0x1
+
+/*
+ * Do not add the leading stream header. Used when multiple snapshots
+ * are sent back to back.
+ */
+#define BTRFS_SEND_FLAG_OMIT_STREAM_HEADER 0x2
+
+/*
+ * Omit the command at the end of the stream that indicated the end
+ * of the stream. This option is used when multiple snapshots are
+ * sent back to back.
+ */
+#define BTRFS_SEND_FLAG_OMIT_END_CMD 0x4
+
+#define BTRFS_SEND_FLAG_MASK \
+ (BTRFS_SEND_FLAG_NO_FILE_DATA | \
+ BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \
+ BTRFS_SEND_FLAG_OMIT_END_CMD)
+
+struct btrfs_ioctl_send_args {
+ __s64 send_fd; /* in */
+ __u64 clone_sources_count; /* in */
+ __u64 __user *clone_sources; /* in */
+ __u64 parent_root; /* in */
+ __u64 flags; /* in */
+ __u64 reserved[4]; /* in */
+};
+
+/* Error codes as returned by the kernel */
+enum btrfs_err_code {
+ notused,
+ BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
+ BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
+ BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
+ BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
+ BTRFS_ERROR_DEV_TGT_REPLACE,
+ BTRFS_ERROR_DEV_MISSING_NOT_FOUND,
+ BTRFS_ERROR_DEV_ONLY_WRITABLE,
+ BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS
+};
+
+/* An error code to error string mapping for the kernel
+* error codes
+*/
+static inline char *btrfs_err_str(enum btrfs_err_code err_code)
+{
+ switch (err_code) {
+ case BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET:
+ return "unable to go below two devices on raid1";
+ case BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET:
+ return "unable to go below four devices on raid10";
+ case BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET:
+ return "unable to go below two devices on raid5";
+ case BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET:
+ return "unable to go below three devices on raid6";
+ case BTRFS_ERROR_DEV_TGT_REPLACE:
+ return "unable to remove the dev_replace target dev";
+ case BTRFS_ERROR_DEV_MISSING_NOT_FOUND:
+ return "no missing devices found to remove";
+ case BTRFS_ERROR_DEV_ONLY_WRITABLE:
+ return "unable to remove the only writeable device";
+ case BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS:
+ return "add/delete/balance/replace/resize operation "
+ "in progress";
+ default:
+ return NULL;
+ }
+}
+
+#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \
+ struct btrfs_ioctl_vol_args)
+/* trans start and trans end are dangerous, and only for
+ * use by applications that know how to avoid the
+ * resulting deadlocks
+ */
+#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6)
+#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7)
+#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8)
+
+#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int)
+#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \
+ struct btrfs_ioctl_vol_args)
+
+#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \
+ struct btrfs_ioctl_clone_range_args)
+
+#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \
+ struct btrfs_ioctl_defrag_range_args)
+#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \
+ struct btrfs_ioctl_search_args)
+#define BTRFS_IOC_TREE_SEARCH_V2 _IOWR(BTRFS_IOCTL_MAGIC, 17, \
+ struct btrfs_ioctl_search_args_v2)
+#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \
+ struct btrfs_ioctl_ino_lookup_args)
+#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64)
+#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
+ struct btrfs_ioctl_space_args)
+#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
+#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
+#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
+ struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \
+ struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
+#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
+#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
+ struct btrfs_ioctl_scrub_args)
+#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28)
+#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \
+ struct btrfs_ioctl_scrub_args)
+#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \
+ struct btrfs_ioctl_dev_info_args)
+#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
+ struct btrfs_ioctl_fs_info_args)
+#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
+ struct btrfs_ioctl_balance_args)
+#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int)
+#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \
+ struct btrfs_ioctl_balance_args)
+#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
+ struct btrfs_ioctl_ino_path_args)
+#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
+ struct btrfs_ioctl_ino_path_args)
+#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \
+ struct btrfs_ioctl_received_subvol_args)
+#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args)
+#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \
+ struct btrfs_ioctl_quota_ctl_args)
+#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \
+ struct btrfs_ioctl_qgroup_assign_args)
+#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \
+ struct btrfs_ioctl_qgroup_create_args)
+#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
+ struct btrfs_ioctl_qgroup_limit_args)
+#define BTRFS_IOC_QUOTA_RESCAN _IOW(BTRFS_IOCTL_MAGIC, 44, \
+ struct btrfs_ioctl_quota_rescan_args)
+#define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \
+ struct btrfs_ioctl_quota_rescan_args)
+#define BTRFS_IOC_QUOTA_RESCAN_WAIT _IO(BTRFS_IOCTL_MAGIC, 46)
+#define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \
+ char[BTRFS_LABEL_SIZE])
+#define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \
+ char[BTRFS_LABEL_SIZE])
+#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
+ struct btrfs_ioctl_get_dev_stats)
+#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \
+ struct btrfs_ioctl_dev_replace_args)
+#define BTRFS_IOC_FILE_EXTENT_SAME _IOWR(BTRFS_IOCTL_MAGIC, 54, \
+ struct btrfs_ioctl_same_args)
+#define BTRFS_IOC_GET_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \
+ struct btrfs_ioctl_feature_flags)
+#define BTRFS_IOC_SET_FEATURES _IOW(BTRFS_IOCTL_MAGIC, 57, \
+ struct btrfs_ioctl_feature_flags[2])
+#define BTRFS_IOC_GET_SUPPORTED_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \
+ struct btrfs_ioctl_feature_flags[3])
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/kerncompat.h b/kerncompat.h
new file mode 100644
index 00000000..0f207b7f
--- /dev/null
+++ b/kerncompat.h
@@ -0,0 +1,374 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __KERNCOMPAT_H__
+#define __KERNCOMPAT_H__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+#include <assert.h>
+#include <stddef.h>
+#include <linux/types.h>
+#include <stdint.h>
+
+#include <features.h>
+
+#ifndef __GLIBC__
+#ifndef BTRFS_DISABLE_BACKTRACE
+#define BTRFS_DISABLE_BACKTRACE
+#endif
+#define __always_inline __inline __attribute__ ((__always_inline__))
+#endif
+
+#ifndef BTRFS_DISABLE_BACKTRACE
+#include <execinfo.h>
+#endif
+
+#define ptr_to_u64(x) ((u64)(uintptr_t)x)
+#define u64_to_ptr(x) ((void *)(uintptr_t)x)
+
+#ifndef READ
+#define READ 0
+#define WRITE 1
+#define READA 2
+#endif
+
+#define gfp_t int
+#define get_cpu_var(p) (p)
+#define __get_cpu_var(p) (p)
+#define BITS_PER_LONG (__SIZEOF_LONG__ * 8)
+#define __GFP_BITS_SHIFT 20
+#define __GFP_BITS_MASK ((int)((1 << __GFP_BITS_SHIFT) - 1))
+#define GFP_KERNEL 0
+#define GFP_NOFS 0
+#define __read_mostly
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#ifndef ULONG_MAX
+#define ULONG_MAX (~0UL)
+#endif
+
+#ifndef BTRFS_DISABLE_BACKTRACE
+#define MAX_BACKTRACE 16
+static inline void print_trace(void)
+{
+ void *array[MAX_BACKTRACE];
+ size_t size;
+
+ size = backtrace(array, MAX_BACKTRACE);
+ backtrace_symbols_fd(array, size, 2);
+}
+
+static inline void assert_trace(const char *assertion, const char *filename,
+ const char *func, unsigned line, int val)
+{
+ if (val)
+ return;
+ if (assertion)
+ fprintf(stderr, "%s:%d: %s: Assertion `%s` failed.\n",
+ filename, line, func, assertion);
+ else
+ fprintf(stderr, "%s:%d: %s: Assertion failed.\n", filename,
+ line, func);
+ print_trace();
+ exit(1);
+}
+
+#define BUG() assert_trace(NULL, __FILE__, __func__, __LINE__, 0)
+#else
+#define BUG() assert(0)
+#endif
+
+#ifdef __CHECKER__
+#define __force __attribute__((force))
+#define __bitwise__ __attribute__((bitwise))
+#else
+#define __force
+#define __bitwise__
+#endif
+
+#ifndef __CHECKER__
+/*
+ * Since we're using primitive definitions from kernel-space, we need to
+ * define __KERNEL__ so that system header files know which definitions
+ * to use.
+ */
+#define __KERNEL__
+#include <asm/types.h>
+typedef __u32 u32;
+typedef __u64 u64;
+typedef __u16 u16;
+typedef __u8 u8;
+typedef __s64 s64;
+typedef __s32 s32;
+
+/*
+ * Continuing to define __KERNEL__ breaks others parts of the code, so
+ * we can just undefine it now that we have the correct headers...
+ */
+#undef __KERNEL__
+#else
+typedef unsigned int u32;
+typedef unsigned int __u32;
+typedef unsigned long long u64;
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef long long s64;
+typedef int s32;
+#endif
+
+
+struct vma_shared { int prio_tree_node; };
+struct vm_area_struct {
+ unsigned long vm_pgoff;
+ unsigned long vm_start;
+ unsigned long vm_end;
+ struct vma_shared shared;
+};
+
+struct page {
+ unsigned long index;
+};
+
+struct mutex {
+ unsigned long lock;
+};
+
+#define mutex_init(m) \
+do { \
+ (m)->lock = 1; \
+} while (0)
+
+static inline void mutex_lock(struct mutex *m)
+{
+ m->lock--;
+}
+
+static inline void mutex_unlock(struct mutex *m)
+{
+ m->lock++;
+}
+
+static inline int mutex_is_locked(struct mutex *m)
+{
+ return (m->lock != 1);
+}
+
+#define cond_resched() do { } while (0)
+#define preempt_enable() do { } while (0)
+#define preempt_disable() do { } while (0)
+
+#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+
+#ifndef __attribute_const__
+#define __attribute_const__ __attribute__((__const__))
+#endif
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __set_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+ *p |= mask;
+}
+
+static inline void __clear_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+ *p &= ~mask;
+}
+
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static inline int test_bit(int nr, const volatile unsigned long *addr)
+{
+ return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+
+/*
+ * error pointer
+ */
+#define MAX_ERRNO 4095
+#define IS_ERR_VALUE(x) ((x) >= (unsigned long)-MAX_ERRNO)
+
+static inline void *ERR_PTR(long error)
+{
+ return (void *) error;
+}
+
+static inline long PTR_ERR(const void *ptr)
+{
+ return (long) ptr;
+}
+
+static inline long IS_ERR(const void *ptr)
+{
+ return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+/*
+ * max/min macro
+ */
+#define min(x,y) ({ \
+ typeof(x) _x = (x); \
+ typeof(y) _y = (y); \
+ (void) (&_x == &_y); \
+ _x < _y ? _x : _y; })
+
+#define max(x,y) ({ \
+ typeof(x) _x = (x); \
+ typeof(y) _y = (y); \
+ (void) (&_x == &_y); \
+ _x > _y ? _x : _y; })
+
+#define min_t(type,x,y) \
+ ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
+#define max_t(type,x,y) \
+ ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
+
+/*
+ * This looks more complex than it should be. But we need to
+ * get the type for the ~ right in round_down (it needs to be
+ * as wide as the result!), and we want to evaluate the macro
+ * arguments just once each.
+ */
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+#define round_down(x, y) ((x) & ~__round_mask(x, y))
+
+/*
+ * printk
+ */
+#define printk(fmt, args...) fprintf(stderr, fmt, ##args)
+#define KERN_CRIT ""
+#define KERN_ERR ""
+
+/*
+ * kmalloc/kfree
+ */
+#define kmalloc(x, y) malloc(x)
+#define kzalloc(x, y) calloc(1, x)
+#define kstrdup(x, y) strdup(x)
+#define kfree(x) free(x)
+#define vmalloc(x) malloc(x)
+#define vfree(x) free(x)
+
+#ifndef BTRFS_DISABLE_BACKTRACE
+#define BUG_ON(c) assert_trace(#c, __FILE__, __func__, __LINE__, !(c))
+#else
+#define BUG_ON(c) assert(!(c))
+#endif
+
+#define WARN_ON(c) BUG_ON(c)
+
+#ifndef BTRFS_DISABLE_BACKTRACE
+#define ASSERT(c) assert_trace(#c, __FILE__, __func__, __LINE__, (c))
+#else
+#define ASSERT(c) assert(c)
+#endif
+
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+#ifdef __CHECKER__
+#define __bitwise __bitwise__
+#else
+#define __bitwise
+#endif
+
+/* Alignment check */
+#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
+
+static inline int is_power_of_2(unsigned long n)
+{
+ return (n != 0 && ((n & (n - 1)) == 0));
+}
+
+typedef u16 __bitwise __le16;
+typedef u16 __bitwise __be16;
+typedef u32 __bitwise __le32;
+typedef u32 __bitwise __be32;
+typedef u64 __bitwise __le64;
+typedef u64 __bitwise __be64;
+
+/* Macros to generate set/get funcs for the struct fields
+ * assume there is a lefoo_to_cpu for every type, so lets make a simple
+ * one for u8:
+ */
+#define le8_to_cpu(v) (v)
+#define cpu_to_le8(v) (v)
+#define __le8 u8
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define cpu_to_le64(x) ((__force __le64)(u64)(bswap_64(x)))
+#define le64_to_cpu(x) ((__force u64)(__le64)(bswap_64(x)))
+#define cpu_to_le32(x) ((__force __le32)(u32)(bswap_32(x)))
+#define le32_to_cpu(x) ((__force u32)(__le32)(bswap_32(x)))
+#define cpu_to_le16(x) ((__force __le16)(u16)(bswap_16(x)))
+#define le16_to_cpu(x) ((__force u16)(__le16)(bswap_16(x)))
+#else
+#define cpu_to_le64(x) ((__force __le64)(u64)(x))
+#define le64_to_cpu(x) ((__force u64)(__le64)(x))
+#define cpu_to_le32(x) ((__force __le32)(u32)(x))
+#define le32_to_cpu(x) ((__force u32)(__le32)(x))
+#define cpu_to_le16(x) ((__force __le16)(u16)(x))
+#define le16_to_cpu(x) ((__force u16)(__le16)(x))
+#endif
+
+struct __una_u16 { __le16 x; } __attribute__((__packed__));
+struct __una_u32 { __le32 x; } __attribute__((__packed__));
+struct __una_u64 { __le64 x; } __attribute__((__packed__));
+
+#define get_unaligned_le8(p) (*((u8 *)(p)))
+#define put_unaligned_le8(val,p) ((*((u8 *)(p))) = (val))
+#define get_unaligned_le16(p) le16_to_cpu(((const struct __una_u16 *)(p))->x)
+#define put_unaligned_le16(val,p) (((struct __una_u16 *)(p))->x = cpu_to_le16(val))
+#define get_unaligned_le32(p) le32_to_cpu(((const struct __una_u32 *)(p))->x)
+#define put_unaligned_le32(val,p) (((struct __una_u32 *)(p))->x = cpu_to_le32(val))
+#define get_unaligned_le64(p) le64_to_cpu(((const struct __una_u64 *)(p))->x)
+#define put_unaligned_le64(val,p) (((struct __una_u64 *)(p))->x = cpu_to_le64(val))
+
+#ifndef true
+#define true 1
+#define false 0
+#endif
+
+#ifndef noinline
+#define noinline
+#endif
+
+#endif
diff --git a/library-test.c b/library-test.c
new file mode 100644
index 00000000..142188a7
--- /dev/null
+++ b/library-test.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2014 SUSE. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "kerncompat.h"
+#include "version.h"
+#include "send-stream.h"
+
+/*
+ * Reduced code snippet from snapper.git/snapper/Btrfs.cc
+ */
+struct btrfs_send_ops send_ops = {
+ .subvol = NULL,
+ .snapshot = NULL,
+ .mkfile = NULL,
+ .mkdir = NULL,
+ .mknod = NULL,
+ .mkfifo = NULL,
+ .mksock = NULL,
+ .symlink = NULL,
+ .rename = NULL,
+ .link = NULL,
+ .unlink = NULL,
+ .rmdir = NULL,
+ .write = NULL,
+ .clone = NULL,
+ .set_xattr = NULL,
+ .remove_xattr = NULL,
+ .truncate = NULL,
+ .chmod = NULL,
+ .chown = NULL,
+ .utimes = NULL,
+ .update_extent = NULL,
+};
+
+/*
+ * Link test only, not intended to be executed.
+ */
+static int test_send_stream_api() {
+ int ret;
+ int fd = -1;
+
+#if BTRFS_LIB_VERSION < 101
+ ret = btrfs_read_and_process_send_stream(fd, &send_ops, NULL, 0);
+#else
+ ret = btrfs_read_and_process_send_stream(fd, &send_ops, NULL, 0, 1);
+#endif
+ return ret;
+}
+
+int main() {
+ test_send_stream_api();
+
+ return 0;
+}
diff --git a/list.h b/list.h
new file mode 100644
index 00000000..db7a58c7
--- /dev/null
+++ b/list.h
@@ -0,0 +1,486 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+#define LIST_POISON1 ((struct list_head *) 0x00100100)
+#define LIST_POISON2 ((struct list_head *) 0x00200200)
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+ list->next = list;
+ list->prev = list;
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void __list_add(struct list_head *xnew,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = xnew;
+ xnew->next = next;
+ xnew->prev = prev;
+ prev->next = xnew;
+}
+#else
+extern void __list_add(struct list_head *xnew,
+ struct list_head *prev,
+ struct list_head *next);
+#endif
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void list_add(struct list_head *xnew, struct list_head *head)
+{
+ __list_add(xnew, head, head->next);
+}
+#else
+extern void list_add(struct list_head *xnew, struct list_head *head);
+#endif
+
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *xnew, struct list_head *head)
+{
+ __list_add(xnew, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->next = LIST_POISON1;
+ entry->prev = LIST_POISON2;
+}
+#else
+extern void list_del(struct list_head *entry);
+#endif
+
+/**
+ * list_replace - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ * Note: if 'old' was empty, it will be overwritten.
+ */
+static inline void list_replace(struct list_head *old,
+ struct list_head *xnew)
+{
+ xnew->next = old->next;
+ xnew->next->prev = xnew;
+ xnew->prev = old->prev;
+ xnew->prev->next = xnew;
+}
+
+static inline void list_replace_init(struct list_head *old,
+ struct list_head *xnew)
+{
+ list_replace(old, xnew);
+ INIT_LIST_HEAD(old);
+}
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+ struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add_tail(list, head);
+}
+
+/**
+ * list_is_last - tests whether @list is the last entry in list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_last(const struct list_head *list,
+ const struct list_head *head)
+{
+ return list->next == head;
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(const struct list_head *head)
+{
+ return head->next == head;
+}
+
+/**
+ * list_empty_careful - tests whether a list is empty and not being modified
+ * @head: the list to test
+ *
+ * Description:
+ * tests whether a list is empty _and_ checks that no other CPU might be
+ * in the process of modifying either member (next or prev)
+ *
+ * NOTE: using list_empty_careful() without synchronization
+ * can only be safe if the only activity that can happen
+ * to the list entry is list_del_init(). Eg. it cannot be used
+ * if another CPU could re-list_add() it.
+ */
+static inline int list_empty_careful(const struct list_head *head)
+{
+ struct list_head *next = head->next;
+ return (next == head) && (next == head->prev);
+}
+
+static inline void __list_splice(const struct list_head *list,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ struct list_head *first = list->next;
+ struct list_head *last = list->prev;
+
+ first->prev = prev;
+ prev->next = first;
+
+ last->next = next;
+ next->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+ if (!list_empty(list))
+ __list_splice(list, head, head->next);
+}
+
+/**
+ * list_splice_tail - join two lists, each list being a queue
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice_tail(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list))
+ __list_splice(list, head->prev, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list)) {
+ __list_splice(list, head, head->next);
+ INIT_LIST_HEAD(list);
+ }
+}
+
+/**
+ * list_splice_tail_init - join two lists and reinitialise the emptied list
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * Each of the lists is a queue.
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_tail_init(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list)) {
+ __list_splice(list, head->prev, head);
+ INIT_LIST_HEAD(list);
+ }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+/**
+ * list_first_entry - get the first element from a list
+ * @ptr: the list head to take the element from.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
+/**
+ * list_next_entry - get the next element from a list
+ * @ptr: the list head to take the element from.
+ * @member: the name of the list_struct within the struct.
+ *
+ * Note, that next is expected to be not null.
+ */
+#define list_next_entry(ptr, member) \
+ list_entry((ptr)->member.next, typeof(*ptr), member)
+
+/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @head: the head for your list.
+ */
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); \
+ pos = pos->next)
+
+/**
+ * __list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @head: the head for your list.
+ *
+ * This variant differs from list_for_each() in that it's the
+ * simplest possible list iteration code, no prefetching is done.
+ * Use this for code that knows the list to be very short (empty
+ * or 1 entry) most of the time.
+ */
+#define __list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_prev - iterate over a list backwards
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @head: the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+ for (pos = (head)->prev; pos != (head); \
+ pos = pos->prev)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+/**
+ * list_for_each_entry - iterate over list of given type
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue
+ * @pos: the type * to use as a start point
+ * @head: the head of the list
+ * @member: the name of the list_struct within the struct.
+ *
+ * Prepares a pos entry for use as a start point in list_for_each_entry_continue.
+ */
+#define list_prepare_entry(pos, head, member) \
+ ((pos) ? : list_entry(head, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_continue - continue iteration over list of given type
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define list_for_each_entry_continue(pos, head, member) \
+ for (pos = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_from - iterate over list of given type from the current point
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing from current position.
+ */
+#define list_for_each_entry_from(pos, head, member) \
+ for (; &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos: the type * to use as a loop cursor.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_continue
+ * @pos: the type * to use as a loop cursor.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing after current point,
+ * safe against removal of list entry.
+ */
+#define list_for_each_entry_safe_continue(pos, n, head, member) \
+ for (pos = list_entry(pos->member.next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_from
+ * @pos: the type * to use as a loop cursor.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type from current point, safe against
+ * removal of list entry.
+ */
+#define list_for_each_entry_safe_from(pos, n, head, member) \
+ for (n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_reverse
+ * @pos: the type * to use as a loop cursor.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ *
+ * Iterate backwards over list of given type, safe against removal
+ * of list entry.
+ */
+#define list_for_each_entry_safe_reverse(pos, n, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member), \
+ n = list_entry(pos->member.prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
+#endif
diff --git a/list_sort.c b/list_sort.c
new file mode 100644
index 00000000..f526b407
--- /dev/null
+++ b/list_sort.c
@@ -0,0 +1,144 @@
+/*
+ * taken from linux kernel lib/list_sort.c, removed uneeded code and adapted
+ * for btrfsprogs
+ */
+
+#include "kerncompat.h"
+#include "list_sort.h"
+#include "list.h"
+
+#define MAX_LIST_LENGTH_BITS 20
+
+/*
+ * Returns a list organized in an intermediate format suited
+ * to chaining of merge() calls: null-terminated, no reserved or
+ * sentinel head node, "prev" links not maintained.
+ */
+static struct list_head *merge(void *priv,
+ int (*cmp)(void *priv, struct list_head *a,
+ struct list_head *b),
+ struct list_head *a, struct list_head *b)
+{
+ struct list_head head, *tail = &head;
+
+ while (a && b) {
+ /* if equal, take 'a' -- important for sort stability */
+ if ((*cmp)(priv, a, b) <= 0) {
+ tail->next = a;
+ a = a->next;
+ } else {
+ tail->next = b;
+ b = b->next;
+ }
+ tail = tail->next;
+ }
+ tail->next = a?:b;
+ return head.next;
+}
+
+/*
+ * Combine final list merge with restoration of standard doubly-linked
+ * list structure. This approach duplicates code from merge(), but
+ * runs faster than the tidier alternatives of either a separate final
+ * prev-link restoration pass, or maintaining the prev links
+ * throughout.
+ */
+static void merge_and_restore_back_links(void *priv,
+ int (*cmp)(void *priv, struct list_head *a,
+ struct list_head *b),
+ struct list_head *head,
+ struct list_head *a, struct list_head *b)
+{
+ struct list_head *tail = head;
+
+ while (a && b) {
+ /* if equal, take 'a' -- important for sort stability */
+ if ((*cmp)(priv, a, b) <= 0) {
+ tail->next = a;
+ a->prev = tail;
+ a = a->next;
+ } else {
+ tail->next = b;
+ b->prev = tail;
+ b = b->next;
+ }
+ tail = tail->next;
+ }
+ tail->next = a ? : b;
+
+ do {
+ /*
+ * In worst cases this loop may run many iterations.
+ * Continue callbacks to the client even though no
+ * element comparison is needed, so the client's cmp()
+ * routine can invoke cond_resched() periodically.
+ */
+ (*cmp)(priv, tail->next, tail->next);
+
+ tail->next->prev = tail;
+ tail = tail->next;
+ } while (tail->next);
+
+ tail->next = head;
+ head->prev = tail;
+}
+
+/**
+ * list_sort - sort a list
+ * @priv: private data, opaque to list_sort(), passed to @cmp
+ * @head: the list to sort
+ * @cmp: the elements comparison function
+ *
+ * This function implements "merge sort", which has O(nlog(n))
+ * complexity.
+ *
+ * The comparison function @cmp must return a negative value if @a
+ * should sort before @b, and a positive value if @a should sort after
+ * @b. If @a and @b are equivalent, and their original relative
+ * ordering is to be preserved, @cmp must return 0.
+ */
+void list_sort(void *priv, struct list_head *head,
+ int (*cmp)(void *priv, struct list_head *a,
+ struct list_head *b))
+{
+ struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
+ -- last slot is a sentinel */
+ int lev; /* index into part[] */
+ int max_lev = 0;
+ struct list_head *list;
+
+ if (list_empty(head))
+ return;
+
+ memset(part, 0, sizeof(part));
+
+ head->prev->next = NULL;
+ list = head->next;
+
+ while (list) {
+ struct list_head *cur = list;
+ list = list->next;
+ cur->next = NULL;
+
+ for (lev = 0; part[lev]; lev++) {
+ cur = merge(priv, cmp, part[lev], cur);
+ part[lev] = NULL;
+ }
+ if (lev > max_lev) {
+ if (lev >= ARRAY_SIZE(part)-1) {
+ printf("list_sort: list passed to"
+ " list_sort() too long for"
+ " efficiency\n");
+ lev--;
+ }
+ max_lev = lev;
+ }
+ part[lev] = cur;
+ }
+
+ for (lev = 0; lev < max_lev; lev++)
+ if (part[lev])
+ list = merge(priv, cmp, part[lev], list);
+
+ merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
+}
diff --git a/list_sort.h b/list_sort.h
new file mode 100644
index 00000000..987cd5c4
--- /dev/null
+++ b/list_sort.h
@@ -0,0 +1,14 @@
+/*
+ * taken from linux kernel include/list_sort.h
+ */
+#ifndef _LINUX_LIST_SORT_H
+#define _LINUX_LIST_SORT_H
+
+#include "kerncompat.h"
+
+struct list_head;
+
+void list_sort(void *priv, struct list_head *head,
+ int (*cmp)(void *priv, struct list_head *a,
+ struct list_head *b));
+#endif
diff --git a/mkfs.c b/mkfs.c
new file mode 100644
index 00000000..ea584042
--- /dev/null
+++ b/mkfs.c
@@ -0,0 +1,1844 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "kerncompat.h"
+#include "androidcompat.h"
+
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include "ioctl.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+/* #include <sys/dir.h> included via androidcompat.h */
+#include <fcntl.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <uuid/uuid.h>
+#include <ctype.h>
+#include <sys/xattr.h>
+#include <limits.h>
+#include <linux/limits.h>
+#include <blkid/blkid.h>
+#include <ftw.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "volumes.h"
+#include "transaction.h"
+#include "utils.h"
+#include "list_sort.h"
+
+static u64 index_cnt = 2;
+static int verbose = 1;
+
+struct directory_name_entry {
+ char *dir_name;
+ char *path;
+ ino_t inum;
+ struct list_head list;
+};
+
+struct mkfs_allocation {
+ u64 data;
+ u64 metadata;
+ u64 mixed;
+ u64 system;
+};
+
+static int create_metadata_block_groups(struct btrfs_root *root, int mixed,
+ struct mkfs_allocation *allocation)
+{
+ struct btrfs_trans_handle *trans;
+ u64 bytes_used;
+ u64 chunk_start = 0;
+ u64 chunk_size = 0;
+ int ret;
+
+ trans = btrfs_start_transaction(root, 1);
+ bytes_used = btrfs_super_bytes_used(root->fs_info->super_copy);
+
+ root->fs_info->system_allocs = 1;
+ ret = btrfs_make_block_group(trans, root, bytes_used,
+ BTRFS_BLOCK_GROUP_SYSTEM,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+ 0, BTRFS_MKFS_SYSTEM_GROUP_SIZE);
+ allocation->system += BTRFS_MKFS_SYSTEM_GROUP_SIZE;
+ BUG_ON(ret);
+
+ if (mixed) {
+ ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+ &chunk_start, &chunk_size,
+ BTRFS_BLOCK_GROUP_METADATA |
+ BTRFS_BLOCK_GROUP_DATA);
+ if (ret == -ENOSPC) {
+ fprintf(stderr,
+ "no space to alloc data/metadata chunk\n");
+ goto err;
+ }
+ BUG_ON(ret);
+ ret = btrfs_make_block_group(trans, root, 0,
+ BTRFS_BLOCK_GROUP_METADATA |
+ BTRFS_BLOCK_GROUP_DATA,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+ chunk_start, chunk_size);
+ BUG_ON(ret);
+ allocation->mixed += chunk_size;
+ } else {
+ ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+ &chunk_start, &chunk_size,
+ BTRFS_BLOCK_GROUP_METADATA);
+ if (ret == -ENOSPC) {
+ fprintf(stderr, "no space to alloc metadata chunk\n");
+ goto err;
+ }
+ BUG_ON(ret);
+ ret = btrfs_make_block_group(trans, root, 0,
+ BTRFS_BLOCK_GROUP_METADATA,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+ chunk_start, chunk_size);
+ allocation->metadata += chunk_size;
+ BUG_ON(ret);
+ }
+
+ root->fs_info->system_allocs = 0;
+ btrfs_commit_transaction(trans, root);
+
+err:
+ return ret;
+}
+
+static int create_data_block_groups(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, int mixed,
+ struct mkfs_allocation *allocation)
+{
+ u64 chunk_start = 0;
+ u64 chunk_size = 0;
+ int ret = 0;
+
+ if (!mixed) {
+ ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+ &chunk_start, &chunk_size,
+ BTRFS_BLOCK_GROUP_DATA);
+ if (ret == -ENOSPC) {
+ fprintf(stderr, "no space to alloc data chunk\n");
+ goto err;
+ }
+ BUG_ON(ret);
+ ret = btrfs_make_block_group(trans, root, 0,
+ BTRFS_BLOCK_GROUP_DATA,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+ chunk_start, chunk_size);
+ allocation->data += chunk_size;
+ BUG_ON(ret);
+ }
+
+err:
+ return ret;
+}
+
+static int make_root_dir(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct mkfs_allocation *allocation)
+{
+ struct btrfs_key location;
+ int ret;
+
+ ret = btrfs_make_root_dir(trans, root->fs_info->tree_root,
+ BTRFS_ROOT_TREE_DIR_OBJECTID);
+ if (ret)
+ goto err;
+ ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
+ if (ret)
+ goto err;
+ memcpy(&location, &root->fs_info->fs_root->root_key, sizeof(location));
+ location.offset = (u64)-1;
+ ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
+ "default", 7,
+ btrfs_super_root_dir(root->fs_info->super_copy),
+ &location, BTRFS_FT_DIR, 0);
+ if (ret)
+ goto err;
+
+ ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
+ "default", 7, location.objectid,
+ BTRFS_ROOT_TREE_DIR_OBJECTID, 0);
+ if (ret)
+ goto err;
+
+err:
+ return ret;
+}
+
+static void __recow_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ int ret;
+ struct extent_buffer *tmp;
+
+ if (trans->transid != btrfs_root_generation(&root->root_item)) {
+ extent_buffer_get(root->node);
+ ret = __btrfs_cow_block(trans, root, root->node,
+ NULL, 0, &tmp, 0, 0);
+ BUG_ON(ret);
+ free_extent_buffer(tmp);
+ }
+}
+
+static void recow_roots(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct btrfs_fs_info *info = root->fs_info;
+
+ __recow_root(trans, info->fs_root);
+ __recow_root(trans, info->tree_root);
+ __recow_root(trans, info->extent_root);
+ __recow_root(trans, info->chunk_root);
+ __recow_root(trans, info->dev_root);
+ __recow_root(trans, info->csum_root);
+}
+
+static int create_one_raid_group(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 type,
+ struct mkfs_allocation *allocation)
+
+{
+ u64 chunk_start;
+ u64 chunk_size;
+ int ret;
+
+ ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+ &chunk_start, &chunk_size, type);
+ if (ret == -ENOSPC) {
+ fprintf(stderr, "not enough free space\n");
+ exit(1);
+ }
+ BUG_ON(ret);
+ ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0,
+ type, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+ chunk_start, chunk_size);
+ if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == BTRFS_BLOCK_GROUP_DATA)
+ allocation->data += chunk_size;
+ else if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == BTRFS_BLOCK_GROUP_METADATA)
+ allocation->metadata += chunk_size;
+ else if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == BTRFS_BLOCK_GROUP_SYSTEM)
+ allocation->system += chunk_size;
+ else if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) ==
+ (BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA))
+ allocation->mixed += chunk_size;
+ else
+ BUG_ON(1);
+
+ BUG_ON(ret);
+ return ret;
+}
+
+static int create_raid_groups(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 data_profile,
+ u64 metadata_profile, int mixed,
+ struct mkfs_allocation *allocation)
+{
+ int ret;
+
+ if (metadata_profile) {
+ u64 meta_flags = BTRFS_BLOCK_GROUP_METADATA;
+
+ ret = create_one_raid_group(trans, root,
+ BTRFS_BLOCK_GROUP_SYSTEM |
+ metadata_profile, allocation);
+ BUG_ON(ret);
+
+ if (mixed)
+ meta_flags |= BTRFS_BLOCK_GROUP_DATA;
+
+ ret = create_one_raid_group(trans, root, meta_flags |
+ metadata_profile, allocation);
+ BUG_ON(ret);
+
+ }
+ if (!mixed && data_profile) {
+ ret = create_one_raid_group(trans, root,
+ BTRFS_BLOCK_GROUP_DATA |
+ data_profile, allocation);
+ BUG_ON(ret);
+ }
+ recow_roots(trans, root);
+
+ return 0;
+}
+
+static int create_data_reloc_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct btrfs_key location;
+ struct btrfs_root_item root_item;
+ struct extent_buffer *tmp;
+ u64 objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
+ int ret;
+
+ ret = btrfs_copy_root(trans, root, root->node, &tmp, objectid);
+ BUG_ON(ret);
+
+ memcpy(&root_item, &root->root_item, sizeof(root_item));
+ btrfs_set_root_bytenr(&root_item, tmp->start);
+ btrfs_set_root_level(&root_item, btrfs_header_level(tmp));
+ btrfs_set_root_generation(&root_item, trans->transid);
+ free_extent_buffer(tmp);
+
+ location.objectid = objectid;
+ location.type = BTRFS_ROOT_ITEM_KEY;
+ location.offset = 0;
+ ret = btrfs_insert_root(trans, root->fs_info->tree_root,
+ &location, &root_item);
+ BUG_ON(ret);
+ return 0;
+}
+
+static void print_usage(int ret)
+{
+ fprintf(stderr, "usage: mkfs.btrfs [options] dev [ dev ... ]\n");
+ fprintf(stderr, "options:\n");
+ fprintf(stderr, "\t-A|--alloc-start START the offset to start the FS\n");
+ fprintf(stderr, "\t-b|--byte-count SIZE total number of bytes in the FS\n");
+ fprintf(stderr, "\t-d|--data PROFILE data profile, raid0, raid1, raid5, raid6, raid10, dup or single\n");
+ fprintf(stderr, "\t-f|--force force overwrite of existing filesystem\n");
+ fprintf(stderr, "\t-l|--leafsize SIZE deprecated, alias for nodesize\n");
+ fprintf(stderr, "\t-L|--label LABEL set a label\n");
+ fprintf(stderr, "\t-m|--metadata PROFILE metadata profile, values like data profile\n");
+ fprintf(stderr, "\t-M|--mixed mix metadata and data together\n");
+ fprintf(stderr, "\t-n|--nodesize SIZE size of btree nodes\n");
+ fprintf(stderr, "\t-s|--sectorsize SIZE min block allocation (may not mountable by current kernel)\n");
+ fprintf(stderr, "\t-r|--rootdir DIR the source directory\n");
+ fprintf(stderr, "\t-K|--nodiscard do not perform whole device TRIM\n");
+ fprintf(stderr, "\t-O|--features LIST comma separated list of filesystem features, use '-O list-all' to list features\n");
+ fprintf(stderr, "\t-U|--uuid UUID specify the filesystem UUID\n");
+ fprintf(stderr, "\t-q|--quiet no messages except errors\n");
+ fprintf(stderr, "\t-V|--version print the mkfs.btrfs version and exit\n");
+ exit(ret);
+}
+
+static void print_version(void) __attribute__((noreturn));
+static void print_version(void)
+{
+ fprintf(stderr, "mkfs.btrfs, part of %s\n", PACKAGE_STRING);
+ exit(0);
+}
+
+static u64 parse_profile(char *s)
+{
+ if (strcmp(s, "raid0") == 0) {
+ return BTRFS_BLOCK_GROUP_RAID0;
+ } else if (strcasecmp(s, "raid1") == 0) {
+ return BTRFS_BLOCK_GROUP_RAID1;
+ } else if (strcasecmp(s, "raid5") == 0) {
+ return BTRFS_BLOCK_GROUP_RAID5;
+ } else if (strcasecmp(s, "raid6") == 0) {
+ return BTRFS_BLOCK_GROUP_RAID6;
+ } else if (strcasecmp(s, "raid10") == 0) {
+ return BTRFS_BLOCK_GROUP_RAID10;
+ } else if (strcasecmp(s, "dup") == 0) {
+ return BTRFS_BLOCK_GROUP_DUP;
+ } else if (strcasecmp(s, "single") == 0) {
+ return 0;
+ } else {
+ fprintf(stderr, "Unknown profile %s\n", s);
+ exit(1);
+ }
+ /* not reached */
+ return 0;
+}
+
+static char *parse_label(char *input)
+{
+ int len = strlen(input);
+
+ if (len >= BTRFS_LABEL_SIZE) {
+ fprintf(stderr, "Label %s is too long (max %d)\n", input,
+ BTRFS_LABEL_SIZE - 1);
+ exit(1);
+ }
+ return strdup(input);
+}
+
+static int add_directory_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ ino_t parent_inum, const char *name,
+ struct stat *st, int *dir_index_cnt)
+{
+ int ret;
+ int name_len;
+ struct btrfs_key location;
+ u8 filetype = 0;
+
+ name_len = strlen(name);
+
+ location.objectid = objectid;
+ location.offset = 0;
+ btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
+
+ if (S_ISDIR(st->st_mode))
+ filetype = BTRFS_FT_DIR;
+ if (S_ISREG(st->st_mode))
+ filetype = BTRFS_FT_REG_FILE;
+ if (S_ISLNK(st->st_mode))
+ filetype = BTRFS_FT_SYMLINK;
+
+ ret = btrfs_insert_dir_item(trans, root, name, name_len,
+ parent_inum, &location,
+ filetype, index_cnt);
+ if (ret)
+ return ret;
+ ret = btrfs_insert_inode_ref(trans, root, name, name_len,
+ objectid, parent_inum, index_cnt);
+ *dir_index_cnt = index_cnt;
+ index_cnt++;
+
+ return ret;
+}
+
+static int fill_inode_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_inode_item *dst, struct stat *src)
+{
+ u64 blocks = 0;
+ u64 sectorsize = root->sectorsize;
+
+ /*
+ * btrfs_inode_item has some reserved fields
+ * and represents on-disk inode entry, so
+ * zero everything to prevent information leak
+ */
+ memset(dst, 0, sizeof (*dst));
+
+ btrfs_set_stack_inode_generation(dst, trans->transid);
+ btrfs_set_stack_inode_size(dst, src->st_size);
+ btrfs_set_stack_inode_nbytes(dst, 0);
+ btrfs_set_stack_inode_block_group(dst, 0);
+ btrfs_set_stack_inode_nlink(dst, src->st_nlink);
+ btrfs_set_stack_inode_uid(dst, src->st_uid);
+ btrfs_set_stack_inode_gid(dst, src->st_gid);
+ btrfs_set_stack_inode_mode(dst, src->st_mode);
+ btrfs_set_stack_inode_rdev(dst, 0);
+ btrfs_set_stack_inode_flags(dst, 0);
+ btrfs_set_stack_timespec_sec(&dst->atime, src->st_atime);
+ btrfs_set_stack_timespec_nsec(&dst->atime, 0);
+ btrfs_set_stack_timespec_sec(&dst->ctime, src->st_ctime);
+ btrfs_set_stack_timespec_nsec(&dst->ctime, 0);
+ btrfs_set_stack_timespec_sec(&dst->mtime, src->st_mtime);
+ btrfs_set_stack_timespec_nsec(&dst->mtime, 0);
+ btrfs_set_stack_timespec_sec(&dst->otime, 0);
+ btrfs_set_stack_timespec_nsec(&dst->otime, 0);
+
+ if (S_ISDIR(src->st_mode)) {
+ btrfs_set_stack_inode_size(dst, 0);
+ btrfs_set_stack_inode_nlink(dst, 1);
+ }
+ if (S_ISREG(src->st_mode)) {
+ btrfs_set_stack_inode_size(dst, (u64)src->st_size);
+ if (src->st_size <= BTRFS_MAX_INLINE_DATA_SIZE(root))
+ btrfs_set_stack_inode_nbytes(dst, src->st_size);
+ else {
+ blocks = src->st_size / sectorsize;
+ if (src->st_size % sectorsize)
+ blocks += 1;
+ blocks *= sectorsize;
+ btrfs_set_stack_inode_nbytes(dst, blocks);
+ }
+ }
+ if (S_ISLNK(src->st_mode))
+ btrfs_set_stack_inode_nbytes(dst, src->st_size + 1);
+
+ return 0;
+}
+
+static int directory_select(const struct direct *entry)
+{
+ if ((strncmp(entry->d_name, ".", entry->d_reclen) == 0) ||
+ (strncmp(entry->d_name, "..", entry->d_reclen) == 0))
+ return 0;
+ else
+ return 1;
+}
+
+static void free_namelist(struct direct **files, int count)
+{
+ int i;
+
+ if (count < 0)
+ return;
+
+ for (i = 0; i < count; ++i)
+ free(files[i]);
+ free(files);
+}
+
+static u64 calculate_dir_inode_size(char *dirname)
+{
+ int count, i;
+ struct direct **files, *cur_file;
+ u64 dir_inode_size = 0;
+
+ count = scandir(dirname, &files, directory_select, NULL);
+
+ for (i = 0; i < count; i++) {
+ cur_file = files[i];
+ dir_inode_size += strlen(cur_file->d_name);
+ }
+
+ free_namelist(files, count);
+
+ dir_inode_size *= 2;
+ return dir_inode_size;
+}
+
+static int add_inode_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct stat *st, char *name,
+ u64 self_objectid, ino_t parent_inum,
+ int dir_index_cnt, struct btrfs_inode_item *inode_ret)
+{
+ int ret;
+ struct btrfs_key inode_key;
+ struct btrfs_inode_item btrfs_inode;
+ u64 objectid;
+ u64 inode_size = 0;
+
+ fill_inode_item(trans, root, &btrfs_inode, st);
+ objectid = self_objectid;
+
+ if (S_ISDIR(st->st_mode)) {
+ inode_size = calculate_dir_inode_size(name);
+ btrfs_set_stack_inode_size(&btrfs_inode, inode_size);
+ }
+
+ inode_key.objectid = objectid;
+ inode_key.offset = 0;
+ btrfs_set_key_type(&inode_key, BTRFS_INODE_ITEM_KEY);
+
+ ret = btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
+
+ *inode_ret = btrfs_inode;
+ return ret;
+}
+
+static int add_xattr_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid,
+ const char *file_name)
+{
+ int ret;
+ int cur_name_len;
+ char xattr_list[XATTR_LIST_MAX];
+ char *cur_name;
+ char cur_value[XATTR_SIZE_MAX];
+ char delimiter = '\0';
+ char *next_location = xattr_list;
+
+ ret = llistxattr(file_name, xattr_list, XATTR_LIST_MAX);
+ if (ret < 0) {
+ if(errno == ENOTSUP)
+ return 0;
+ fprintf(stderr, "get a list of xattr failed for %s\n",
+ file_name);
+ return ret;
+ }
+ if (ret == 0)
+ return ret;
+
+ cur_name = strtok(xattr_list, &delimiter);
+ while (cur_name != NULL) {
+ cur_name_len = strlen(cur_name);
+ next_location += cur_name_len + 1;
+
+ ret = getxattr(file_name, cur_name, cur_value, XATTR_SIZE_MAX);
+ if (ret < 0) {
+ if(errno == ENOTSUP)
+ return 0;
+ fprintf(stderr, "get a xattr value failed for %s attr %s\n",
+ file_name, cur_name);
+ return ret;
+ }
+
+ ret = btrfs_insert_xattr_item(trans, root, cur_name,
+ cur_name_len, cur_value,
+ ret, objectid);
+ if (ret) {
+ fprintf(stderr, "insert a xattr item failed for %s\n",
+ file_name);
+ }
+
+ cur_name = strtok(next_location, &delimiter);
+ }
+
+ return ret;
+}
+
+static int add_symbolic_link(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 objectid, const char *path_name)
+{
+ int ret;
+ char buf[PATH_MAX];
+
+ ret = readlink(path_name, buf, sizeof(buf));
+ if (ret <= 0) {
+ fprintf(stderr, "readlink failed for %s\n", path_name);
+ goto fail;
+ }
+ if (ret >= sizeof(buf)) {
+ fprintf(stderr, "symlink too long for %s\n", path_name);
+ ret = -1;
+ goto fail;
+ }
+
+ buf[ret] = '\0'; /* readlink does not do it for us */
+ ret = btrfs_insert_inline_extent(trans, root, objectid, 0,
+ buf, ret + 1);
+fail:
+ return ret;
+}
+
+static int add_file_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_inode_item *btrfs_inode, u64 objectid,
+ ino_t parent_inum, struct stat *st,
+ const char *path_name, int out_fd)
+{
+ int ret = -1;
+ ssize_t ret_read;
+ u64 bytes_read = 0;
+ struct btrfs_key key;
+ int blocks;
+ u32 sectorsize = root->sectorsize;
+ u64 first_block = 0;
+ u64 file_pos = 0;
+ u64 cur_bytes;
+ u64 total_bytes;
+ struct extent_buffer *eb = NULL;
+ int fd;
+
+ if (st->st_size == 0)
+ return 0;
+
+ fd = open(path_name, O_RDONLY);
+ if (fd == -1) {
+ fprintf(stderr, "%s open failed\n", path_name);
+ return ret;
+ }
+
+ blocks = st->st_size / sectorsize;
+ if (st->st_size % sectorsize)
+ blocks += 1;
+
+ if (st->st_size <= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
+ char *buffer = malloc(st->st_size);
+
+ if (!buffer) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ ret_read = pread64(fd, buffer, st->st_size, bytes_read);
+ if (ret_read == -1) {
+ fprintf(stderr, "%s read failed\n", path_name);
+ free(buffer);
+ goto end;
+ }
+
+ ret = btrfs_insert_inline_extent(trans, root, objectid, 0,
+ buffer, st->st_size);
+ free(buffer);
+ goto end;
+ }
+
+ /* round up our st_size to the FS blocksize */
+ total_bytes = (u64)blocks * sectorsize;
+
+ /*
+ * do our IO in extent buffers so it can work
+ * against any raid type
+ */
+ eb = calloc(1, sizeof(*eb) + sectorsize);
+ if (!eb) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+again:
+
+ /*
+ * keep our extent size at 1MB max, this makes it easier to work inside
+ * the tiny block groups created during mkfs
+ */
+ cur_bytes = min(total_bytes, 1024ULL * 1024);
+ ret = btrfs_reserve_extent(trans, root, cur_bytes, 0, 0, (u64)-1,
+ &key, 1);
+ if (ret)
+ goto end;
+
+ first_block = key.objectid;
+ bytes_read = 0;
+
+ while (bytes_read < cur_bytes) {
+
+ memset(eb->data, 0, sectorsize);
+
+ ret_read = pread64(fd, eb->data, sectorsize, file_pos + bytes_read);
+ if (ret_read == -1) {
+ fprintf(stderr, "%s read failed\n", path_name);
+ goto end;
+ }
+
+ eb->start = first_block + bytes_read;
+ eb->len = sectorsize;
+
+ /*
+ * we're doing the csum before we record the extent, but
+ * that's ok
+ */
+ ret = btrfs_csum_file_block(trans, root->fs_info->csum_root,
+ first_block + bytes_read + sectorsize,
+ first_block + bytes_read,
+ eb->data, sectorsize);
+ if (ret)
+ goto end;
+
+ ret = write_and_map_eb(trans, root, eb);
+ if (ret) {
+ fprintf(stderr, "output file write failed\n");
+ goto end;
+ }
+
+ bytes_read += sectorsize;
+ }
+
+ if (bytes_read) {
+ ret = btrfs_record_file_extent(trans, root, objectid, btrfs_inode,
+ file_pos, first_block, cur_bytes);
+ if (ret)
+ goto end;
+
+ }
+
+ file_pos += cur_bytes;
+ total_bytes -= cur_bytes;
+
+ if (total_bytes)
+ goto again;
+
+end:
+ free(eb);
+ close(fd);
+ return ret;
+}
+
+static char *make_path(char *dir, char *name)
+{
+ char *path;
+
+ path = malloc(strlen(dir) + strlen(name) + 2);
+ if (!path)
+ return NULL;
+ strcpy(path, dir);
+ if (dir[strlen(dir) - 1] != '/')
+ strcat(path, "/");
+ strcat(path, name);
+ return path;
+}
+
+static int traverse_directory(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, char *dir_name,
+ struct directory_name_entry *dir_head, int out_fd)
+{
+ int ret = 0;
+
+ struct btrfs_inode_item cur_inode;
+ struct btrfs_inode_item *inode_item;
+ int count, i, dir_index_cnt;
+ struct direct **files;
+ struct stat st;
+ struct directory_name_entry *dir_entry, *parent_dir_entry;
+ struct direct *cur_file;
+ ino_t parent_inum, cur_inum;
+ ino_t highest_inum = 0;
+ char *parent_dir_name;
+ char real_path[PATH_MAX];
+ struct btrfs_path path;
+ struct extent_buffer *leaf;
+ struct btrfs_key root_dir_key;
+ u64 root_dir_inode_size = 0;
+
+ /* Add list for source directory */
+ dir_entry = malloc(sizeof(struct directory_name_entry));
+ if (!dir_entry)
+ return -ENOMEM;
+ dir_entry->dir_name = dir_name;
+ dir_entry->path = realpath(dir_name, real_path);
+ if (!dir_entry->path) {
+ fprintf(stderr, "get directory real path error\n");
+ ret = -1;
+ goto fail_no_dir;
+ }
+
+ parent_inum = highest_inum + BTRFS_FIRST_FREE_OBJECTID;
+ dir_entry->inum = parent_inum;
+ list_add_tail(&dir_entry->list, &dir_head->list);
+
+ btrfs_init_path(&path);
+
+ root_dir_key.objectid = btrfs_root_dirid(&root->root_item);
+ root_dir_key.offset = 0;
+ btrfs_set_key_type(&root_dir_key, BTRFS_INODE_ITEM_KEY);
+ ret = btrfs_lookup_inode(trans, root, &path, &root_dir_key, 1);
+ if (ret) {
+ fprintf(stderr, "root dir lookup error\n");
+ goto fail_no_dir;
+ }
+
+ leaf = path.nodes[0];
+ inode_item = btrfs_item_ptr(leaf, path.slots[0],
+ struct btrfs_inode_item);
+
+ root_dir_inode_size = calculate_dir_inode_size(dir_name);
+ btrfs_set_inode_size(leaf, inode_item, root_dir_inode_size);
+ btrfs_mark_buffer_dirty(leaf);
+
+ btrfs_release_path(&path);
+
+ do {
+ parent_dir_entry = list_entry(dir_head->list.next,
+ struct directory_name_entry,
+ list);
+ list_del(&parent_dir_entry->list);
+
+ parent_inum = parent_dir_entry->inum;
+ parent_dir_name = parent_dir_entry->dir_name;
+ if (chdir(parent_dir_entry->path)) {
+ fprintf(stderr, "chdir error for %s\n",
+ parent_dir_name);
+ ret = -1;
+ goto fail_no_files;
+ }
+
+ count = scandir(parent_dir_entry->path, &files,
+ directory_select, NULL);
+ if (count == -1)
+ {
+ fprintf(stderr, "scandir for %s failed: %s\n",
+ parent_dir_name, strerror (errno));
+ ret = -1;
+ goto fail;
+ }
+
+ for (i = 0; i < count; i++) {
+ cur_file = files[i];
+
+ if (lstat(cur_file->d_name, &st) == -1) {
+ fprintf(stderr, "lstat failed for file %s\n",
+ cur_file->d_name);
+ ret = -1;
+ goto fail;
+ }
+
+ cur_inum = st.st_ino;
+ ret = add_directory_items(trans, root,
+ cur_inum, parent_inum,
+ cur_file->d_name,
+ &st, &dir_index_cnt);
+ if (ret) {
+ fprintf(stderr, "add_directory_items failed\n");
+ goto fail;
+ }
+
+ ret = add_inode_items(trans, root, &st,
+ cur_file->d_name, cur_inum,
+ parent_inum, dir_index_cnt,
+ &cur_inode);
+ if (ret == -EEXIST) {
+ BUG_ON(st.st_nlink <= 1);
+ continue;
+ }
+ if (ret) {
+ fprintf(stderr, "add_inode_items failed\n");
+ goto fail;
+ }
+
+ ret = add_xattr_item(trans, root,
+ cur_inum, cur_file->d_name);
+ if (ret) {
+ fprintf(stderr, "add_xattr_item failed\n");
+ if(ret != -ENOTSUP)
+ goto fail;
+ }
+
+ if (S_ISDIR(st.st_mode)) {
+ dir_entry = malloc(sizeof(struct directory_name_entry));
+ if (!dir_entry) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ dir_entry->dir_name = cur_file->d_name;
+ dir_entry->path = make_path(parent_dir_entry->path,
+ cur_file->d_name);
+ dir_entry->inum = cur_inum;
+ list_add_tail(&dir_entry->list, &dir_head->list);
+ } else if (S_ISREG(st.st_mode)) {
+ ret = add_file_items(trans, root, &cur_inode,
+ cur_inum, parent_inum, &st,
+ cur_file->d_name, out_fd);
+ if (ret) {
+ fprintf(stderr, "add_file_items failed\n");
+ goto fail;
+ }
+ } else if (S_ISLNK(st.st_mode)) {
+ ret = add_symbolic_link(trans, root,
+ cur_inum, cur_file->d_name);
+ if (ret) {
+ fprintf(stderr, "add_symbolic_link failed\n");
+ goto fail;
+ }
+ }
+ }
+
+ free_namelist(files, count);
+ free(parent_dir_entry);
+
+ index_cnt = 2;
+
+ } while (!list_empty(&dir_head->list));
+
+out:
+ return !!ret;
+fail:
+ free_namelist(files, count);
+fail_no_files:
+ free(parent_dir_entry);
+ goto out;
+fail_no_dir:
+ free(dir_entry);
+ goto out;
+}
+
+static int open_target(char *output_name)
+{
+ int output_fd;
+ output_fd = open(output_name, O_CREAT | O_RDWR,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH);
+
+ return output_fd;
+}
+
+static int create_chunks(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 num_of_meta_chunks,
+ u64 size_of_data,
+ struct mkfs_allocation *allocation)
+{
+ u64 chunk_start;
+ u64 chunk_size;
+ u64 meta_type = BTRFS_BLOCK_GROUP_METADATA;
+ u64 data_type = BTRFS_BLOCK_GROUP_DATA;
+ u64 minimum_data_chunk_size = 8 * 1024 * 1024;
+ u64 i;
+ int ret;
+
+ for (i = 0; i < num_of_meta_chunks; i++) {
+ ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+ &chunk_start, &chunk_size, meta_type);
+ BUG_ON(ret);
+ ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0,
+ meta_type, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+ chunk_start, chunk_size);
+ allocation->metadata += chunk_size;
+ BUG_ON(ret);
+ set_extent_dirty(&root->fs_info->free_space_cache,
+ chunk_start, chunk_start + chunk_size - 1, 0);
+ }
+
+ if (size_of_data < minimum_data_chunk_size)
+ size_of_data = minimum_data_chunk_size;
+
+ ret = btrfs_alloc_data_chunk(trans, root->fs_info->extent_root,
+ &chunk_start, size_of_data, data_type);
+ BUG_ON(ret);
+ ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0,
+ data_type, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+ chunk_start, size_of_data);
+ allocation->data += size_of_data;
+ BUG_ON(ret);
+ set_extent_dirty(&root->fs_info->free_space_cache,
+ chunk_start, chunk_start + size_of_data - 1, 0);
+ return ret;
+}
+
+static int make_image(char *source_dir, struct btrfs_root *root, int out_fd)
+{
+ int ret;
+ struct btrfs_trans_handle *trans;
+
+ struct stat root_st;
+
+ struct directory_name_entry dir_head;
+
+ struct directory_name_entry *dir_entry = NULL;
+
+ ret = lstat(source_dir, &root_st);
+ if (ret) {
+ fprintf(stderr, "unable to lstat the %s\n", source_dir);
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&dir_head.list);
+
+ trans = btrfs_start_transaction(root, 1);
+ ret = traverse_directory(trans, root, source_dir, &dir_head, out_fd);
+ if (ret) {
+ fprintf(stderr, "unable to traverse_directory\n");
+ goto fail;
+ }
+ btrfs_commit_transaction(trans, root);
+
+ if (verbose)
+ printf("Making image is completed.\n");
+ return 0;
+fail:
+ while (!list_empty(&dir_head.list)) {
+ dir_entry = list_entry(dir_head.list.next,
+ struct directory_name_entry, list);
+ list_del(&dir_entry->list);
+ free(dir_entry);
+ }
+out:
+ fprintf(stderr, "Making image is aborted.\n");
+ return -1;
+}
+
+/*
+ * This ignores symlinks with unreadable targets and subdirs that can't
+ * be read. It's a best-effort to give a rough estimate of the size of
+ * a subdir. It doesn't guarantee that prepopulating btrfs from this
+ * tree won't still run out of space.
+ */
+static u64 global_total_size;
+static u64 fs_block_size;
+static int ftw_add_entry_size(const char *fpath, const struct stat *st,
+ int type)
+{
+ if (type == FTW_F || type == FTW_D)
+ global_total_size += round_up(st->st_size, fs_block_size);
+
+ return 0;
+}
+
+static u64 size_sourcedir(char *dir_name, u64 sectorsize,
+ u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret)
+{
+ u64 dir_size = 0;
+ u64 total_size = 0;
+ int ret;
+ u64 default_chunk_size = 8 * 1024 * 1024; /* 8MB */
+ u64 allocated_meta_size = 8 * 1024 * 1024; /* 8MB */
+ u64 allocated_total_size = 20 * 1024 * 1024; /* 20MB */
+ u64 num_of_meta_chunks = 0;
+ u64 num_of_data_chunks = 0;
+ u64 num_of_allocated_meta_chunks =
+ allocated_meta_size / default_chunk_size;
+
+ global_total_size = 0;
+ fs_block_size = sectorsize;
+ ret = ftw(dir_name, ftw_add_entry_size, 10);
+ dir_size = global_total_size;
+ if (ret < 0) {
+ fprintf(stderr, "ftw subdir walk of '%s' failed: %s\n",
+ dir_name, strerror(errno));
+ exit(1);
+ }
+
+ num_of_data_chunks = (dir_size + default_chunk_size - 1) /
+ default_chunk_size;
+
+ num_of_meta_chunks = (dir_size / 2) / default_chunk_size;
+ if (((dir_size / 2) % default_chunk_size) != 0)
+ num_of_meta_chunks++;
+ if (num_of_meta_chunks <= num_of_allocated_meta_chunks)
+ num_of_meta_chunks = 0;
+ else
+ num_of_meta_chunks -= num_of_allocated_meta_chunks;
+
+ total_size = allocated_total_size +
+ (num_of_data_chunks * default_chunk_size) +
+ (num_of_meta_chunks * default_chunk_size);
+
+ *num_of_meta_chunks_ret = num_of_meta_chunks;
+ *size_of_data_ret = num_of_data_chunks * default_chunk_size;
+ return total_size;
+}
+
+static int zero_output_file(int out_fd, u64 size)
+{
+ int loop_num;
+ u64 location = 0;
+ char buf[4096];
+ int ret = 0, i;
+ ssize_t written;
+
+ memset(buf, 0, 4096);
+ loop_num = size / 4096;
+ for (i = 0; i < loop_num; i++) {
+ written = pwrite64(out_fd, buf, 4096, location);
+ if (written != 4096)
+ ret = -EIO;
+ location += 4096;
+ }
+ return ret;
+}
+
+static int is_ssd(const char *file)
+{
+ blkid_probe probe;
+ char wholedisk[PATH_MAX];
+ char sysfs_path[PATH_MAX];
+ dev_t devno;
+ int fd;
+ char rotational;
+ int ret;
+
+ probe = blkid_new_probe_from_filename(file);
+ if (!probe)
+ return 0;
+
+ /* Device number of this disk (possibly a partition) */
+ devno = blkid_probe_get_devno(probe);
+ if (!devno) {
+ blkid_free_probe(probe);
+ return 0;
+ }
+
+ /* Get whole disk name (not full path) for this devno */
+ ret = blkid_devno_to_wholedisk(devno,
+ wholedisk, sizeof(wholedisk), NULL);
+ if (ret) {
+ blkid_free_probe(probe);
+ return 0;
+ }
+
+ snprintf(sysfs_path, PATH_MAX, "/sys/block/%s/queue/rotational",
+ wholedisk);
+
+ blkid_free_probe(probe);
+
+ fd = open(sysfs_path, O_RDONLY);
+ if (fd < 0) {
+ return 0;
+ }
+
+ if (read(fd, &rotational, sizeof(char)) < sizeof(char)) {
+ close(fd);
+ return 0;
+ }
+ close(fd);
+
+ return !atoi((const char *)&rotational);
+}
+
+static int _cmp_device_by_id(void *priv, struct list_head *a,
+ struct list_head *b)
+{
+ return list_entry(a, struct btrfs_device, dev_list)->devid -
+ list_entry(b, struct btrfs_device, dev_list)->devid;
+}
+
+static void list_all_devices(struct btrfs_root *root)
+{
+ struct btrfs_fs_devices *fs_devices;
+ struct btrfs_device *device;
+ int number_of_devices = 0;
+ u64 total_block_count = 0;
+
+ fs_devices = root->fs_info->fs_devices;
+
+ list_for_each_entry(device, &fs_devices->devices, dev_list)
+ number_of_devices++;
+
+ list_sort(NULL, &fs_devices->devices, _cmp_device_by_id);
+
+ printf("Number of devices: %d\n", number_of_devices);
+ /* printf("Total devices size: %10s\n", */
+ /* pretty_size(total_block_count)); */
+ printf("Devices:\n");
+ printf(" ID SIZE PATH\n");
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
+ printf(" %3llu %10s %s\n",
+ device->devid,
+ pretty_size(device->total_bytes),
+ device->name);
+ total_block_count += device->total_bytes;
+ }
+
+ printf("\n");
+}
+
+static int is_temp_block_group(struct extent_buffer *node,
+ struct btrfs_block_group_item *bgi,
+ u64 data_profile, u64 meta_profile,
+ u64 sys_profile)
+{
+ u64 flag = btrfs_disk_block_group_flags(node, bgi);
+ u64 flag_type = flag & BTRFS_BLOCK_GROUP_TYPE_MASK;
+ u64 flag_profile = flag & BTRFS_BLOCK_GROUP_PROFILE_MASK;
+ u64 used = btrfs_disk_block_group_used(node, bgi);
+
+ /*
+ * Chunks meets all the following conditions is a temp chunk
+ * 1) Empty chunk
+ * Temp chunk is always empty.
+ *
+ * 2) profile dismatch with mkfs profile.
+ * Temp chunk is always in SINGLE
+ *
+ * 3) Size differs with mkfs_alloc
+ * Special case for SINGLE/SINGLE btrfs.
+ * In that case, temp data chunk and real data chunk are always empty.
+ * So we need to use mkfs_alloc to be sure which chunk is the newly
+ * allocated.
+ *
+ * Normally, new chunk size is equal to mkfs one (One chunk)
+ * If it has multiple chunks, we just refuse to delete any one.
+ * As they are all single, so no real problem will happen.
+ * So only use condition 1) and 2) to judge them.
+ */
+ if (used != 0)
+ return 0;
+ switch (flag_type) {
+ case BTRFS_BLOCK_GROUP_DATA:
+ case BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA:
+ data_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
+ if (flag_profile != data_profile)
+ return 1;
+ break;
+ case BTRFS_BLOCK_GROUP_METADATA:
+ meta_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
+ if (flag_profile != meta_profile)
+ return 1;
+ break;
+ case BTRFS_BLOCK_GROUP_SYSTEM:
+ sys_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
+ if (flag_profile != sys_profile)
+ return 1;
+ break;
+ }
+ return 0;
+}
+
+/* Note: if current is a block group, it will skip it anyway */
+static int next_block_group(struct btrfs_root *root,
+ struct btrfs_path *path)
+{
+ struct btrfs_key key;
+ int ret = 0;
+
+ while (1) {
+ ret = btrfs_next_item(root, path);
+ if (ret)
+ goto out;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
+ goto out;
+ }
+out:
+ return ret;
+}
+
+/* This function will cleanup */
+static int cleanup_temp_chunks(struct btrfs_fs_info *fs_info,
+ struct mkfs_allocation *alloc,
+ u64 data_profile, u64 meta_profile,
+ u64 sys_profile)
+{
+ struct btrfs_trans_handle *trans = NULL;
+ struct btrfs_block_group_item *bgi;
+ struct btrfs_root *root = fs_info->extent_root;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct btrfs_path *path;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ trans = btrfs_start_transaction(root, 1);
+
+ key.objectid = 0;
+ key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ key.offset = 0;
+
+ while (1) {
+ /*
+ * as the rest of the loop may modify the tree, we need to
+ * start a new search each time.
+ */
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+ path->slots[0]);
+ if (found_key.objectid < key.objectid)
+ goto out;
+ if (found_key.type != BTRFS_BLOCK_GROUP_ITEM_KEY) {
+ ret = next_block_group(root, path);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+ path->slots[0]);
+ }
+
+ bgi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_block_group_item);
+ if (is_temp_block_group(path->nodes[0], bgi,
+ data_profile, meta_profile,
+ sys_profile)) {
+ ret = btrfs_free_block_group(trans, fs_info,
+ found_key.objectid, found_key.offset);
+ if (ret < 0)
+ goto out;
+ }
+ btrfs_release_path(path);
+ key.objectid = found_key.objectid + found_key.offset;
+ }
+out:
+ if (trans)
+ btrfs_commit_transaction(trans, root);
+ btrfs_free_path(path);
+ return ret;
+}
+
+int main(int ac, char **av)
+{
+ char *file;
+ struct btrfs_root *root;
+ struct btrfs_trans_handle *trans;
+ char *label = NULL;
+ u64 block_count = 0;
+ u64 dev_block_count = 0;
+ u64 blocks[7];
+ u64 alloc_start = 0;
+ u64 metadata_profile = 0;
+ u64 data_profile = 0;
+ u32 nodesize = max_t(u32, sysconf(_SC_PAGESIZE),
+ BTRFS_MKFS_DEFAULT_NODE_SIZE);
+ u32 sectorsize = 4096;
+ u32 stripesize = 4096;
+ int zero_end = 1;
+ int fd;
+ int ret;
+ int i;
+ int mixed = 0;
+ int nodesize_forced = 0;
+ int data_profile_opt = 0;
+ int metadata_profile_opt = 0;
+ int discard = 1;
+ int ssd = 0;
+ int force_overwrite = 0;
+ char *source_dir = NULL;
+ int source_dir_set = 0;
+ u64 num_of_meta_chunks = 0;
+ u64 size_of_data = 0;
+ u64 source_dir_size = 0;
+ int dev_cnt = 0;
+ int saved_optind;
+ char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = { 0 };
+ u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
+ struct mkfs_allocation allocation = { 0 };
+ struct btrfs_mkfs_config mkfs_cfg;
+
+ while(1) {
+ int c;
+ static const struct option long_options[] = {
+ { "alloc-start", required_argument, NULL, 'A'},
+ { "byte-count", required_argument, NULL, 'b' },
+ { "force", no_argument, NULL, 'f' },
+ { "leafsize", required_argument, NULL, 'l' },
+ { "label", required_argument, NULL, 'L'},
+ { "metadata", required_argument, NULL, 'm' },
+ { "mixed", no_argument, NULL, 'M' },
+ { "nodesize", required_argument, NULL, 'n' },
+ { "sectorsize", required_argument, NULL, 's' },
+ { "data", required_argument, NULL, 'd' },
+ { "version", no_argument, NULL, 'V' },
+ { "rootdir", required_argument, NULL, 'r' },
+ { "nodiscard", no_argument, NULL, 'K' },
+ { "features", required_argument, NULL, 'O' },
+ { "uuid", required_argument, NULL, 'U' },
+ { "quiet", 0, NULL, 'q' },
+ { "help", no_argument, NULL, GETOPT_VAL_HELP },
+ { NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long(ac, av, "A:b:fl:n:s:m:d:L:O:r:U:VMKq",
+ long_options, NULL);
+ if (c < 0)
+ break;
+ switch(c) {
+ case 'A':
+ alloc_start = parse_size(optarg);
+ break;
+ case 'f':
+ force_overwrite = 1;
+ break;
+ case 'd':
+ data_profile = parse_profile(optarg);
+ data_profile_opt = 1;
+ break;
+ case 'l':
+ fprintf(stderr,
+ "WARNING: --leafsize is deprecated, use --nodesize\n");
+ case 'n':
+ nodesize = parse_size(optarg);
+ nodesize_forced = 1;
+ break;
+ case 'L':
+ label = parse_label(optarg);
+ break;
+ case 'm':
+ metadata_profile = parse_profile(optarg);
+ metadata_profile_opt = 1;
+ break;
+ case 'M':
+ mixed = 1;
+ break;
+ case 'O': {
+ char *orig = strdup(optarg);
+ char *tmp = orig;
+
+ tmp = btrfs_parse_fs_features(tmp, &features);
+ if (tmp) {
+ fprintf(stderr,
+ "Unrecognized filesystem feature '%s'\n",
+ tmp);
+ free(orig);
+ exit(1);
+ }
+ free(orig);
+ if (features & BTRFS_FEATURE_LIST_ALL) {
+ btrfs_list_all_fs_features(0);
+ exit(0);
+ }
+ break;
+ }
+ case 's':
+ sectorsize = parse_size(optarg);
+ break;
+ case 'b':
+ block_count = parse_size(optarg);
+ zero_end = 0;
+ break;
+ case 'V':
+ print_version();
+ break;
+ case 'r':
+ source_dir = optarg;
+ source_dir_set = 1;
+ break;
+ case 'U':
+ strncpy(fs_uuid, optarg,
+ BTRFS_UUID_UNPARSED_SIZE - 1);
+ break;
+ case 'K':
+ discard = 0;
+ break;
+ case 'q':
+ verbose = 0;
+ break;
+ case GETOPT_VAL_HELP:
+ default:
+ print_usage(c != GETOPT_VAL_HELP);
+ }
+ }
+
+ if (verbose) {
+ printf("%s\n", PACKAGE_STRING);
+ printf("See %s for more information.\n\n", PACKAGE_URL);
+ }
+
+ sectorsize = max(sectorsize, (u32)sysconf(_SC_PAGESIZE));
+ saved_optind = optind;
+ dev_cnt = ac - optind;
+ if (dev_cnt == 0)
+ print_usage(1);
+
+ if (source_dir_set && dev_cnt > 1) {
+ fprintf(stderr,
+ "The -r option is limited to a single device\n");
+ exit(1);
+ }
+
+ if (*fs_uuid) {
+ uuid_t dummy_uuid;
+
+ if (uuid_parse(fs_uuid, dummy_uuid) != 0) {
+ fprintf(stderr, "could not parse UUID: %s\n", fs_uuid);
+ exit(1);
+ }
+ if (!test_uuid_unique(fs_uuid)) {
+ fprintf(stderr, "non-unique UUID: %s\n", fs_uuid);
+ exit(1);
+ }
+ }
+
+ while (dev_cnt-- > 0) {
+ file = av[optind++];
+ if (is_block_device(file) == 1)
+ if (test_dev_for_mkfs(file, force_overwrite))
+ exit(1);
+ }
+
+ optind = saved_optind;
+ dev_cnt = ac - optind;
+
+ file = av[optind++];
+ ssd = is_ssd(file);
+
+ /*
+ * Set default profiles according to number of added devices.
+ * For mixed groups defaults are single/single.
+ */
+ if (!mixed) {
+ if (!metadata_profile_opt) {
+ if (dev_cnt == 1 && ssd && verbose)
+ printf("Detected a SSD, turning off metadata "
+ "duplication. Mkfs with -m dup if you want to "
+ "force metadata duplication.\n");
+
+ metadata_profile = (dev_cnt > 1) ?
+ BTRFS_BLOCK_GROUP_RAID1 : (ssd) ?
+ 0: BTRFS_BLOCK_GROUP_DUP;
+ }
+ if (!data_profile_opt) {
+ data_profile = (dev_cnt > 1) ?
+ BTRFS_BLOCK_GROUP_RAID0 : 0; /* raid0 or single */
+ }
+ } else {
+ u32 best_nodesize = max_t(u32, sysconf(_SC_PAGESIZE), sectorsize);
+
+ if (metadata_profile_opt || data_profile_opt) {
+ if (metadata_profile != data_profile) {
+ fprintf(stderr,
+ "ERROR: With mixed block groups data and metadata profiles must be the same\n");
+ exit(1);
+ }
+ }
+
+ if (!nodesize_forced)
+ nodesize = best_nodesize;
+ }
+
+ /*
+ * FS features that can be set by other means than -O
+ * just set the bit here
+ */
+ if (mixed)
+ features |= BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS;
+
+ if ((data_profile | metadata_profile) &
+ (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) {
+ features |= BTRFS_FEATURE_INCOMPAT_RAID56;
+ }
+
+ if (btrfs_check_nodesize(nodesize, sectorsize,
+ features))
+ exit(1);
+
+ /* Check device/block_count after the nodesize is determined */
+ if (block_count && block_count < btrfs_min_dev_size(nodesize)) {
+ fprintf(stderr,
+ "Size '%llu' is too small to make a usable filesystem\n",
+ block_count);
+ fprintf(stderr,
+ "Minimum size for btrfs filesystem is %llu\n",
+ btrfs_min_dev_size(nodesize));
+ exit(1);
+ }
+ for (i = saved_optind; i < saved_optind + dev_cnt; i++) {
+ char *path;
+
+ path = av[i];
+ ret = test_minimum_size(path, nodesize);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to check size for '%s': %s\n",
+ path, strerror(-ret));
+ exit (1);
+ }
+ if (ret > 0) {
+ fprintf(stderr,
+ "'%s' is too small to make a usable filesystem\n",
+ path);
+ fprintf(stderr,
+ "Minimum size for each btrfs device is %llu.\n",
+ btrfs_min_dev_size(nodesize));
+ exit(1);
+ }
+ }
+ ret = test_num_disk_vs_raid(metadata_profile, data_profile,
+ dev_cnt, mixed, ssd);
+ if (ret)
+ exit(1);
+
+ dev_cnt--;
+
+ if (!source_dir_set) {
+ /*
+ * open without O_EXCL so that the problem should not
+ * occur by the following processing.
+ * (btrfs_register_one_device() fails if O_EXCL is on)
+ */
+ fd = open(file, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "unable to open %s: %s\n", file,
+ strerror(errno));
+ exit(1);
+ }
+ ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count,
+ block_count, discard);
+ if (ret) {
+ close(fd);
+ exit(1);
+ }
+ if (block_count && block_count > dev_block_count) {
+ fprintf(stderr, "%s is smaller than requested size\n", file);
+ exit(1);
+ }
+ } else {
+ fd = open_target(file);
+ if (fd < 0) {
+ fprintf(stderr, "unable to open the %s\n", file);
+ exit(1);
+ }
+
+ source_dir_size = size_sourcedir(source_dir, sectorsize,
+ &num_of_meta_chunks, &size_of_data);
+ if(block_count < source_dir_size)
+ block_count = source_dir_size;
+ ret = zero_output_file(fd, block_count);
+ if (ret) {
+ fprintf(stderr, "unable to zero the output file\n");
+ exit(1);
+ }
+ /* our "device" is the new image file */
+ dev_block_count = block_count;
+ }
+
+ /* To create the first block group and chunk 0 in make_btrfs */
+ if (dev_block_count < BTRFS_MKFS_SYSTEM_GROUP_SIZE) {
+ fprintf(stderr, "device is too small to make filesystem\n");
+ exit(1);
+ }
+
+ blocks[0] = BTRFS_SUPER_INFO_OFFSET;
+ for (i = 1; i < 7; i++) {
+ blocks[i] = BTRFS_SUPER_INFO_OFFSET + 1024 * 1024 +
+ nodesize * i;
+ }
+
+ if (group_profile_max_safe_loss(metadata_profile) <
+ group_profile_max_safe_loss(data_profile)){
+ fprintf(stderr,
+ "WARNING: metatdata has lower redundancy than data!\n\n");
+ }
+
+ mkfs_cfg.label = label;
+ mkfs_cfg.fs_uuid = fs_uuid;
+ memcpy(mkfs_cfg.blocks, blocks, sizeof(blocks));
+ mkfs_cfg.num_bytes = dev_block_count;
+ mkfs_cfg.nodesize = nodesize;
+ mkfs_cfg.sectorsize = sectorsize;
+ mkfs_cfg.stripesize = stripesize;
+ mkfs_cfg.features = features;
+
+ ret = make_btrfs(fd, &mkfs_cfg);
+ if (ret) {
+ fprintf(stderr, "error during mkfs: %s\n", strerror(-ret));
+ exit(1);
+ }
+
+ root = open_ctree(file, 0, OPEN_CTREE_WRITES);
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ close(fd);
+ exit(1);
+ }
+ root->fs_info->alloc_start = alloc_start;
+
+ ret = create_metadata_block_groups(root, mixed, &allocation);
+ if (ret) {
+ fprintf(stderr, "failed to create default block groups\n");
+ exit(1);
+ }
+
+ trans = btrfs_start_transaction(root, 1);
+ if (!trans) {
+ fprintf(stderr, "failed to start transaction\n");
+ exit(1);
+ }
+
+ ret = create_data_block_groups(trans, root, mixed, &allocation);
+ if (ret) {
+ fprintf(stderr, "failed to create default data block groups\n");
+ exit(1);
+ }
+
+ ret = make_root_dir(trans, root, &allocation);
+ if (ret) {
+ fprintf(stderr, "failed to setup the root directory\n");
+ exit(1);
+ }
+
+ btrfs_commit_transaction(trans, root);
+
+ trans = btrfs_start_transaction(root, 1);
+ if (!trans) {
+ fprintf(stderr, "failed to start transaction\n");
+ exit(1);
+ }
+
+ if (is_block_device(file) == 1)
+ btrfs_register_one_device(file);
+
+ if (dev_cnt == 0)
+ goto raid_groups;
+
+ while (dev_cnt-- > 0) {
+ file = av[optind++];
+
+ /*
+ * open without O_EXCL so that the problem should not
+ * occur by the following processing.
+ * (btrfs_register_one_device() fails if O_EXCL is on)
+ */
+ fd = open(file, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "unable to open %s: %s\n", file,
+ strerror(errno));
+ exit(1);
+ }
+ ret = btrfs_device_already_in_root(root, fd,
+ BTRFS_SUPER_INFO_OFFSET);
+ if (ret) {
+ fprintf(stderr, "skipping duplicate device %s in FS\n",
+ file);
+ close(fd);
+ continue;
+ }
+ ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count,
+ block_count, discard);
+ if (ret) {
+ close(fd);
+ exit(1);
+ }
+
+ ret = btrfs_add_to_fsid(trans, root, fd, file, dev_block_count,
+ sectorsize, sectorsize, sectorsize);
+ BUG_ON(ret);
+ if (verbose >= 2) {
+ struct btrfs_device *device;
+
+ device = container_of(root->fs_info->fs_devices->devices.next,
+ struct btrfs_device, dev_list);
+ printf("adding device %s id %llu\n", file,
+ (unsigned long long)device->devid);
+ }
+
+ if (is_block_device(file) == 1)
+ btrfs_register_one_device(file);
+ }
+
+raid_groups:
+ if (!source_dir_set) {
+ ret = create_raid_groups(trans, root, data_profile,
+ metadata_profile, mixed, &allocation);
+ BUG_ON(ret);
+ }
+
+ ret = create_data_reloc_tree(trans, root);
+ BUG_ON(ret);
+
+ btrfs_commit_transaction(trans, root);
+
+ if (source_dir_set) {
+ trans = btrfs_start_transaction(root, 1);
+ ret = create_chunks(trans, root,
+ num_of_meta_chunks, size_of_data,
+ &allocation);
+ BUG_ON(ret);
+ btrfs_commit_transaction(trans, root);
+
+ ret = make_image(source_dir, root, fd);
+ BUG_ON(ret);
+ }
+ ret = cleanup_temp_chunks(root->fs_info, &allocation, data_profile,
+ metadata_profile, metadata_profile);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to cleanup temporary chunks\n");
+ goto out;
+ }
+
+ if (verbose) {
+ char features_buf[64];
+
+ printf("Label: %s\n", label);
+ printf("UUID: %s\n", fs_uuid);
+ printf("Node size: %u\n", nodesize);
+ printf("Sector size: %u\n", sectorsize);
+ printf("Filesystem size: %s\n",
+ pretty_size(btrfs_super_total_bytes(root->fs_info->super_copy)));
+ printf("Block group profiles:\n");
+ if (allocation.data)
+ printf(" Data: %-8s %16s\n",
+ btrfs_group_profile_str(data_profile),
+ pretty_size(allocation.data));
+ if (allocation.metadata)
+ printf(" Metadata: %-8s %16s\n",
+ btrfs_group_profile_str(metadata_profile),
+ pretty_size(allocation.metadata));
+ if (allocation.mixed)
+ printf(" Data+Metadata: %-8s %16s\n",
+ btrfs_group_profile_str(data_profile),
+ pretty_size(allocation.mixed));
+ printf(" System: %-8s %16s\n",
+ btrfs_group_profile_str(metadata_profile),
+ pretty_size(allocation.system));
+ printf("SSD detected: %s\n", ssd ? "yes" : "no");
+ btrfs_parse_features_to_string(features_buf, features);
+ printf("Incompat features: %s", features_buf);
+ printf("\n");
+
+ list_all_devices(root);
+ }
+
+out:
+ ret = close_ctree(root);
+ BUG_ON(ret);
+ btrfs_close_all_devices();
+ free(label);
+ return 0;
+}
diff --git a/print-tree.c b/print-tree.c
new file mode 100644
index 00000000..6704ff68
--- /dev/null
+++ b/print-tree.c
@@ -0,0 +1,1137 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <uuid/uuid.h>
+#include "kerncompat.h"
+#include "radix-tree.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "utils.h"
+
+
+static void print_dir_item_type(struct extent_buffer *eb,
+ struct btrfs_dir_item *di)
+{
+ u8 type = btrfs_dir_type(eb, di);
+
+ switch (type) {
+ case BTRFS_FT_REG_FILE:
+ printf("FILE");
+ break;
+ case BTRFS_FT_DIR:
+ printf("DIR");
+ break;
+ case BTRFS_FT_CHRDEV:
+ printf("CHRDEV");
+ break;
+ case BTRFS_FT_BLKDEV:
+ printf("BLKDEV");
+ break;
+ case BTRFS_FT_FIFO:
+ printf("FIFO");
+ break;
+ case BTRFS_FT_SOCK:
+ printf("SOCK");
+ break;
+ case BTRFS_FT_SYMLINK:
+ printf("SYMLINK");
+ break;
+ case BTRFS_FT_XATTR:
+ printf("XATTR");
+ break;
+ default:
+ printf("%u", type);
+ }
+}
+
+static int print_dir_item(struct extent_buffer *eb, struct btrfs_item *item,
+ struct btrfs_dir_item *di)
+{
+ u32 total;
+ u32 cur = 0;
+ u32 len;
+ u32 name_len;
+ u32 data_len;
+ char namebuf[BTRFS_NAME_LEN];
+ struct btrfs_disk_key location;
+
+ total = btrfs_item_size(eb, item);
+ while(cur < total) {
+ btrfs_dir_item_key(eb, di, &location);
+ printf("\t\tlocation ");
+ btrfs_print_key(&location);
+ printf(" type ");
+ print_dir_item_type(eb, di);
+ printf("\n");
+ name_len = btrfs_dir_name_len(eb, di);
+ data_len = btrfs_dir_data_len(eb, di);
+ len = (name_len <= sizeof(namebuf))? name_len: sizeof(namebuf);
+ read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
+ printf("\t\tnamelen %u datalen %u name: %.*s\n",
+ name_len, data_len, len, namebuf);
+ if (data_len) {
+ len = (data_len <= sizeof(namebuf))? data_len: sizeof(namebuf);
+ read_extent_buffer(eb, namebuf,
+ (unsigned long)(di + 1) + name_len, len);
+ printf("\t\tdata %.*s\n", len, namebuf);
+ }
+ len = sizeof(*di) + name_len + data_len;
+ di = (struct btrfs_dir_item *)((char *)di + len);
+ cur += len;
+ }
+ return 0;
+}
+
+static int print_inode_extref_item(struct extent_buffer *eb,
+ struct btrfs_item *item,
+ struct btrfs_inode_extref *extref)
+{
+ u32 total;
+ u32 cur = 0;
+ u32 len;
+ u32 name_len = 0;
+ u64 index = 0;
+ u64 parent_objid;
+ char namebuf[BTRFS_NAME_LEN];
+
+ total = btrfs_item_size(eb, item);
+
+ while (cur < total) {
+ index = btrfs_inode_extref_index(eb, extref);
+ name_len = btrfs_inode_extref_name_len(eb, extref);
+ parent_objid = btrfs_inode_extref_parent(eb, extref);
+
+ len = (name_len <= sizeof(namebuf))? name_len: sizeof(namebuf);
+
+ read_extent_buffer(eb, namebuf, (unsigned long)(extref->name), len);
+
+ printf("\t\tinode extref index %llu parent %llu namelen %u "
+ "name: %.*s\n",
+ (unsigned long long)index,
+ (unsigned long long)parent_objid,
+ name_len, len, namebuf);
+
+ len = sizeof(*extref) + name_len;
+ extref = (struct btrfs_inode_extref *)((char *)extref + len);
+ cur += len;
+ }
+ return 0;
+}
+
+static int print_inode_ref_item(struct extent_buffer *eb, struct btrfs_item *item,
+ struct btrfs_inode_ref *ref)
+{
+ u32 total;
+ u32 cur = 0;
+ u32 len;
+ u32 name_len;
+ u64 index;
+ char namebuf[BTRFS_NAME_LEN];
+ total = btrfs_item_size(eb, item);
+ while(cur < total) {
+ name_len = btrfs_inode_ref_name_len(eb, ref);
+ index = btrfs_inode_ref_index(eb, ref);
+ len = (name_len <= sizeof(namebuf))? name_len: sizeof(namebuf);
+ read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
+ printf("\t\tinode ref index %llu namelen %u name: %.*s\n",
+ (unsigned long long)index, name_len, len, namebuf);
+ len = sizeof(*ref) + name_len;
+ ref = (struct btrfs_inode_ref *)((char *)ref + len);
+ cur += len;
+ }
+ return 0;
+}
+
+/* Caller should ensure sizeof(*ret)>=21 "DATA|METADATA|RAID10" */
+static void bg_flags_to_str(u64 flags, char *ret)
+{
+ int empty = 1;
+
+ if (flags & BTRFS_BLOCK_GROUP_DATA) {
+ empty = 0;
+ strcpy(ret, "DATA");
+ }
+ if (flags & BTRFS_BLOCK_GROUP_METADATA) {
+ if (!empty)
+ strcat(ret, "|");
+ strcat(ret, "METADATA");
+ }
+ if (flags & BTRFS_BLOCK_GROUP_SYSTEM) {
+ if (!empty)
+ strcat(ret, "|");
+ strcat(ret, "SYSTEM");
+ }
+ switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ case BTRFS_BLOCK_GROUP_RAID0:
+ strcat(ret, "|RAID0");
+ break;
+ case BTRFS_BLOCK_GROUP_RAID1:
+ strcat(ret, "|RAID1");
+ break;
+ case BTRFS_BLOCK_GROUP_DUP:
+ strcat(ret, "|DUP");
+ break;
+ case BTRFS_BLOCK_GROUP_RAID10:
+ strcat(ret, "|RAID10");
+ break;
+ case BTRFS_BLOCK_GROUP_RAID5:
+ strcat(ret, "|RAID5");
+ break;
+ case BTRFS_BLOCK_GROUP_RAID6:
+ strcat(ret, "|RAID6");
+ break;
+ default:
+ break;
+ }
+}
+
+/* Caller should ensure sizeof(*ret)>= 26 "OFF|SCANNING|INCONSISTENT" */
+static void qgroup_flags_to_str(u64 flags, char *ret)
+{
+ if (flags & BTRFS_QGROUP_STATUS_FLAG_ON)
+ strcpy(ret, "ON");
+ else
+ strcpy(ret, "OFF");
+
+ if (flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
+ strcat(ret, "|SCANNING");
+ if (flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)
+ strcat(ret, "|INCONSISTENT");
+}
+
+void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk)
+{
+ int num_stripes = btrfs_chunk_num_stripes(eb, chunk);
+ int i;
+ char chunk_flags_str[32] = {0};
+
+ bg_flags_to_str(btrfs_chunk_type(eb, chunk), chunk_flags_str);
+ printf("\t\tchunk length %llu owner %llu stripe_len %llu\n",
+ (unsigned long long)btrfs_chunk_length(eb, chunk),
+ (unsigned long long)btrfs_chunk_owner(eb, chunk),
+ (unsigned long long)btrfs_chunk_stripe_len(eb, chunk));
+ printf("\t\ttype %s num_stripes %d\n",
+ chunk_flags_str, num_stripes);
+ for (i = 0 ; i < num_stripes ; i++) {
+ unsigned char dev_uuid[BTRFS_UUID_SIZE];
+ char str_dev_uuid[BTRFS_UUID_UNPARSED_SIZE];
+
+ read_extent_buffer(eb, dev_uuid,
+ (unsigned long)btrfs_stripe_dev_uuid_nr(chunk, i),
+ BTRFS_UUID_SIZE);
+ uuid_unparse(dev_uuid, str_dev_uuid);
+ printf("\t\t\tstripe %d devid %llu offset %llu\n", i,
+ (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i),
+ (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i));
+ printf("\t\t\tdev uuid: %s\n", str_dev_uuid);
+ }
+}
+
+static void print_dev_item(struct extent_buffer *eb,
+ struct btrfs_dev_item *dev_item)
+{
+ char disk_uuid_c[BTRFS_UUID_UNPARSED_SIZE];
+ u8 disk_uuid[BTRFS_UUID_SIZE];
+
+ read_extent_buffer(eb, disk_uuid,
+ (unsigned long)btrfs_device_uuid(dev_item),
+ BTRFS_UUID_SIZE);
+ uuid_unparse(disk_uuid, disk_uuid_c);
+ printf("\t\tdev item devid %llu "
+ "total_bytes %llu bytes used %Lu\n"
+ "\t\tdev uuid %s\n",
+ (unsigned long long)btrfs_device_id(eb, dev_item),
+ (unsigned long long)btrfs_device_total_bytes(eb, dev_item),
+ (unsigned long long)btrfs_device_bytes_used(eb, dev_item),
+ disk_uuid_c);
+}
+
+static void print_uuids(struct extent_buffer *eb)
+{
+ char fs_uuid[BTRFS_UUID_UNPARSED_SIZE];
+ char chunk_uuid[BTRFS_UUID_UNPARSED_SIZE];
+ u8 disk_uuid[BTRFS_UUID_SIZE];
+
+ read_extent_buffer(eb, disk_uuid, btrfs_header_fsid(),
+ BTRFS_FSID_SIZE);
+
+ fs_uuid[BTRFS_UUID_UNPARSED_SIZE - 1] = '\0';
+ uuid_unparse(disk_uuid, fs_uuid);
+
+ read_extent_buffer(eb, disk_uuid,
+ btrfs_header_chunk_tree_uuid(eb),
+ BTRFS_UUID_SIZE);
+
+ chunk_uuid[BTRFS_UUID_UNPARSED_SIZE - 1] = '\0';
+ uuid_unparse(disk_uuid, chunk_uuid);
+ printf("fs uuid %s\nchunk uuid %s\n", fs_uuid, chunk_uuid);
+}
+
+static void print_file_extent_item(struct extent_buffer *eb,
+ struct btrfs_item *item,
+ int slot,
+ struct btrfs_file_extent_item *fi)
+{
+ int extent_type = btrfs_file_extent_type(eb, fi);
+
+ if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+ printf("\t\tinline extent data size %u "
+ "ram %u compress %d\n",
+ btrfs_file_extent_inline_item_len(eb, item),
+ btrfs_file_extent_inline_len(eb, slot, fi),
+ btrfs_file_extent_compression(eb, fi));
+ return;
+ }
+ if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ printf("\t\tprealloc data disk byte %llu nr %llu\n",
+ (unsigned long long)btrfs_file_extent_disk_bytenr(eb, fi),
+ (unsigned long long)btrfs_file_extent_disk_num_bytes(eb, fi));
+ printf("\t\tprealloc data offset %llu nr %llu\n",
+ (unsigned long long)btrfs_file_extent_offset(eb, fi),
+ (unsigned long long)btrfs_file_extent_num_bytes(eb, fi));
+ return;
+ }
+ printf("\t\textent data disk byte %llu nr %llu\n",
+ (unsigned long long)btrfs_file_extent_disk_bytenr(eb, fi),
+ (unsigned long long)btrfs_file_extent_disk_num_bytes(eb, fi));
+ printf("\t\textent data offset %llu nr %llu ram %llu\n",
+ (unsigned long long)btrfs_file_extent_offset(eb, fi),
+ (unsigned long long)btrfs_file_extent_num_bytes(eb, fi),
+ (unsigned long long)btrfs_file_extent_ram_bytes(eb, fi));
+ printf("\t\textent compression %d\n",
+ btrfs_file_extent_compression(eb, fi));
+}
+
+/* Caller should ensure sizeof(*ret) >= 16("DATA|TREE_BLOCK") */
+static void extent_flags_to_str(u64 flags, char *ret)
+{
+ int empty = 1;
+
+ if (flags & BTRFS_EXTENT_FLAG_DATA) {
+ empty = 0;
+ strcpy(ret, "DATA");
+ }
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ if (!empty) {
+ empty = 0;
+ strcat(ret, "|");
+ }
+ strcat(ret, "TREE_BLOCK");
+ }
+ if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
+ strcat(ret, "|");
+ strcat(ret, "FULL_BACKREF");
+ }
+}
+
+void print_extent_item(struct extent_buffer *eb, int slot, int metadata)
+{
+ struct btrfs_extent_item *ei;
+ struct btrfs_extent_inline_ref *iref;
+ struct btrfs_extent_data_ref *dref;
+ struct btrfs_shared_data_ref *sref;
+ struct btrfs_disk_key key;
+ unsigned long end;
+ unsigned long ptr;
+ int type;
+ u32 item_size = btrfs_item_size_nr(eb, slot);
+ u64 flags;
+ u64 offset;
+ char flags_str[32] = {0};
+
+ if (item_size < sizeof(*ei)) {
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ struct btrfs_extent_item_v0 *ei0;
+ BUG_ON(item_size != sizeof(*ei0));
+ ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
+ printf("\t\textent refs %u\n",
+ btrfs_extent_refs_v0(eb, ei0));
+ return;
+#else
+ BUG();
+#endif
+ }
+
+ ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
+ flags = btrfs_extent_flags(eb, ei);
+ extent_flags_to_str(flags, flags_str);
+
+ printf("\t\textent refs %llu gen %llu flags %s\n",
+ (unsigned long long)btrfs_extent_refs(eb, ei),
+ (unsigned long long)btrfs_extent_generation(eb, ei),
+ flags_str);
+
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !metadata) {
+ struct btrfs_tree_block_info *info;
+ info = (struct btrfs_tree_block_info *)(ei + 1);
+ btrfs_tree_block_key(eb, info, &key);
+ printf("\t\ttree block ");
+ btrfs_print_key(&key);
+ printf(" level %d\n", btrfs_tree_block_level(eb, info));
+ iref = (struct btrfs_extent_inline_ref *)(info + 1);
+ } else if (metadata) {
+ struct btrfs_key tmp;
+
+ btrfs_item_key_to_cpu(eb, &tmp, slot);
+ printf("\t\ttree block skinny level %d\n", (int)tmp.offset);
+ iref = (struct btrfs_extent_inline_ref *)(ei + 1);
+ } else{
+ iref = (struct btrfs_extent_inline_ref *)(ei + 1);
+ }
+
+ ptr = (unsigned long)iref;
+ end = (unsigned long)ei + item_size;
+ while (ptr < end) {
+ iref = (struct btrfs_extent_inline_ref *)ptr;
+ type = btrfs_extent_inline_ref_type(eb, iref);
+ offset = btrfs_extent_inline_ref_offset(eb, iref);
+ switch (type) {
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ printf("\t\ttree block backref root %llu\n",
+ (unsigned long long)offset);
+ break;
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ printf("\t\tshared block backref parent %llu\n",
+ (unsigned long long)offset);
+ break;
+ case BTRFS_EXTENT_DATA_REF_KEY:
+ dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+ printf("\t\textent data backref root %llu "
+ "objectid %llu offset %llu count %u\n",
+ (unsigned long long)btrfs_extent_data_ref_root(eb, dref),
+ (unsigned long long)btrfs_extent_data_ref_objectid(eb, dref),
+ (unsigned long long)btrfs_extent_data_ref_offset(eb, dref),
+ btrfs_extent_data_ref_count(eb, dref));
+ break;
+ case BTRFS_SHARED_DATA_REF_KEY:
+ sref = (struct btrfs_shared_data_ref *)(iref + 1);
+ printf("\t\tshared data backref parent %llu count %u\n",
+ (unsigned long long)offset,
+ btrfs_shared_data_ref_count(eb, sref));
+ break;
+ default:
+ return;
+ }
+ ptr += btrfs_extent_inline_ref_size(type);
+ }
+ WARN_ON(ptr > end);
+}
+
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+static void print_extent_ref_v0(struct extent_buffer *eb, int slot)
+{
+ struct btrfs_extent_ref_v0 *ref0;
+
+ ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0);
+ printf("\t\textent back ref root %llu gen %llu "
+ "owner %llu num_refs %lu\n",
+ (unsigned long long)btrfs_ref_root_v0(eb, ref0),
+ (unsigned long long)btrfs_ref_generation_v0(eb, ref0),
+ (unsigned long long)btrfs_ref_objectid_v0(eb, ref0),
+ (unsigned long)btrfs_ref_count_v0(eb, ref0));
+}
+#endif
+
+static void print_root_ref(struct extent_buffer *leaf, int slot, char *tag)
+{
+ struct btrfs_root_ref *ref;
+ char namebuf[BTRFS_NAME_LEN];
+ int namelen;
+
+ ref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
+ namelen = btrfs_root_ref_name_len(leaf, ref);
+ read_extent_buffer(leaf, namebuf, (unsigned long)(ref + 1), namelen);
+ printf("\t\troot %s key dirid %llu sequence %llu name %.*s\n", tag,
+ (unsigned long long)btrfs_root_ref_dirid(leaf, ref),
+ (unsigned long long)btrfs_root_ref_sequence(leaf, ref),
+ namelen, namebuf);
+}
+
+static int count_bytes(void *buf, int len, char b)
+{
+ int cnt = 0;
+ int i;
+ for (i = 0; i < len; i++) {
+ if (((char*)buf)[i] == b)
+ cnt++;
+ }
+ return cnt;
+}
+
+static void print_root(struct extent_buffer *leaf, int slot)
+{
+ struct btrfs_root_item *ri;
+ struct btrfs_root_item root_item;
+ int len;
+ char uuid_str[BTRFS_UUID_UNPARSED_SIZE];
+
+ ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item);
+ len = btrfs_item_size_nr(leaf, slot);
+
+ memset(&root_item, 0, sizeof(root_item));
+ read_extent_buffer(leaf, &root_item, (unsigned long)ri, len);
+
+ printf("\t\troot data bytenr %llu level %d dirid %llu refs %u gen %llu lastsnap %llu\n",
+ (unsigned long long)btrfs_root_bytenr(&root_item),
+ btrfs_root_level(&root_item),
+ (unsigned long long)btrfs_root_dirid(&root_item),
+ btrfs_root_refs(&root_item),
+ (unsigned long long)btrfs_root_generation(&root_item),
+ (unsigned long long)btrfs_root_last_snapshot(&root_item));
+
+ if (root_item.generation == root_item.generation_v2) {
+ uuid_unparse(root_item.uuid, uuid_str);
+ printf("\t\tuuid %s\n", uuid_str);
+ if (count_bytes(root_item.parent_uuid, BTRFS_UUID_SIZE, 0) != BTRFS_UUID_SIZE) {
+ uuid_unparse(root_item.parent_uuid, uuid_str);
+ printf("\t\tparent_uuid %s\n", uuid_str);
+ }
+ if (count_bytes(root_item.received_uuid, BTRFS_UUID_SIZE, 0) != BTRFS_UUID_SIZE) {
+ uuid_unparse(root_item.received_uuid, uuid_str);
+ printf("\t\treceived_uuid %s\n", uuid_str);
+ }
+ if (root_item.ctransid) {
+ printf("\t\tctransid %llu otransid %llu stransid %llu rtransid %llu\n",
+ btrfs_root_ctransid(&root_item),
+ btrfs_root_otransid(&root_item),
+ btrfs_root_stransid(&root_item),
+ btrfs_root_rtransid(&root_item));
+ }
+ }
+ if (btrfs_root_refs(&root_item) == 0) {
+ struct btrfs_key drop_key;
+ btrfs_disk_key_to_cpu(&drop_key,
+ &root_item.drop_progress);
+ printf("\t\tdrop ");
+ btrfs_print_key(&root_item.drop_progress);
+ printf(" level %d\n", root_item.drop_level);
+ }
+}
+
+static void print_free_space_header(struct extent_buffer *leaf, int slot)
+{
+ struct btrfs_free_space_header *header;
+ struct btrfs_disk_key location;
+
+ header = btrfs_item_ptr(leaf, slot, struct btrfs_free_space_header);
+ btrfs_free_space_key(leaf, header, &location);
+ printf("\t\tlocation ");
+ btrfs_print_key(&location);
+ printf("\n");
+ printf("\t\tcache generation %llu entries %llu bitmaps %llu\n",
+ (unsigned long long)btrfs_free_space_generation(leaf, header),
+ (unsigned long long)btrfs_free_space_entries(leaf, header),
+ (unsigned long long)btrfs_free_space_bitmaps(leaf, header));
+}
+
+static void print_key_type(u64 objectid, u8 type)
+{
+ if (type == 0 && objectid == BTRFS_FREE_SPACE_OBJECTID) {
+ printf("UNTYPED");
+ return;
+ }
+
+ switch (type) {
+ case BTRFS_INODE_ITEM_KEY:
+ printf("INODE_ITEM");
+ break;
+ case BTRFS_INODE_REF_KEY:
+ printf("INODE_REF");
+ break;
+ case BTRFS_INODE_EXTREF_KEY:
+ printf("INODE_EXTREF");
+ break;
+ case BTRFS_DIR_ITEM_KEY:
+ printf("DIR_ITEM");
+ break;
+ case BTRFS_DIR_INDEX_KEY:
+ printf("DIR_INDEX");
+ break;
+ case BTRFS_DIR_LOG_ITEM_KEY:
+ printf("DIR_LOG_ITEM");
+ break;
+ case BTRFS_DIR_LOG_INDEX_KEY:
+ printf("DIR_LOG_INDEX");
+ break;
+ case BTRFS_XATTR_ITEM_KEY:
+ printf("XATTR_ITEM");
+ break;
+ case BTRFS_ORPHAN_ITEM_KEY:
+ printf("ORPHAN_ITEM");
+ break;
+ case BTRFS_ROOT_ITEM_KEY:
+ printf("ROOT_ITEM");
+ break;
+ case BTRFS_ROOT_REF_KEY:
+ printf("ROOT_REF");
+ break;
+ case BTRFS_ROOT_BACKREF_KEY:
+ printf("ROOT_BACKREF");
+ break;
+ case BTRFS_EXTENT_ITEM_KEY:
+ printf("EXTENT_ITEM");
+ break;
+ case BTRFS_METADATA_ITEM_KEY:
+ printf("METADATA_ITEM");
+ break;
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ printf("TREE_BLOCK_REF");
+ break;
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ printf("SHARED_BLOCK_REF");
+ break;
+ case BTRFS_EXTENT_DATA_REF_KEY:
+ printf("EXTENT_DATA_REF");
+ break;
+ case BTRFS_SHARED_DATA_REF_KEY:
+ printf("SHARED_DATA_REF");
+ break;
+ case BTRFS_EXTENT_REF_V0_KEY:
+ printf("EXTENT_REF_V0");
+ break;
+ case BTRFS_CSUM_ITEM_KEY:
+ printf("CSUM_ITEM");
+ break;
+ case BTRFS_EXTENT_CSUM_KEY:
+ printf("EXTENT_CSUM");
+ break;
+ case BTRFS_EXTENT_DATA_KEY:
+ printf("EXTENT_DATA");
+ break;
+ case BTRFS_BLOCK_GROUP_ITEM_KEY:
+ printf("BLOCK_GROUP_ITEM");
+ break;
+ case BTRFS_FREE_SPACE_INFO_KEY:
+ printf("FREE_SPACE_INFO");
+ break;
+ case BTRFS_FREE_SPACE_EXTENT_KEY:
+ printf("FREE_SPACE_EXTENT");
+ break;
+ case BTRFS_FREE_SPACE_BITMAP_KEY:
+ printf("FREE_SPACE_BITMAP");
+ break;
+ case BTRFS_CHUNK_ITEM_KEY:
+ printf("CHUNK_ITEM");
+ break;
+ case BTRFS_DEV_ITEM_KEY:
+ printf("DEV_ITEM");
+ break;
+ case BTRFS_DEV_EXTENT_KEY:
+ printf("DEV_EXTENT");
+ break;
+ case BTRFS_BALANCE_ITEM_KEY:
+ printf("BALANCE_ITEM");
+ break;
+ case BTRFS_DEV_REPLACE_KEY:
+ printf("DEV_REPLACE");
+ break;
+ case BTRFS_STRING_ITEM_KEY:
+ printf("STRING_ITEM");
+ break;
+ case BTRFS_QGROUP_STATUS_KEY:
+ printf("QGROUP_STATUS");
+ break;
+ case BTRFS_QGROUP_RELATION_KEY:
+ printf("QGROUP_RELATION");
+ break;
+ case BTRFS_QGROUP_INFO_KEY:
+ printf("QGROUP_INFO");
+ break;
+ case BTRFS_QGROUP_LIMIT_KEY:
+ printf("QGROUP_LIMIT");
+ break;
+ case BTRFS_DEV_STATS_KEY:
+ printf("DEV_STATS");
+ break;
+ case BTRFS_UUID_KEY_SUBVOL:
+ printf("UUID_KEY_SUBVOL");
+ break;
+ case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
+ printf("UUID_KEY_RECEIVED_SUBVOL");
+ break;
+ default:
+ printf("UNKNOWN.%d", type);
+ };
+}
+
+static void print_objectid(u64 objectid, u8 type)
+{
+ switch (type) {
+ case BTRFS_DEV_EXTENT_KEY:
+ printf("%llu", (unsigned long long)objectid); /* device id */
+ return;
+ case BTRFS_QGROUP_RELATION_KEY:
+ printf("%llu/%llu", btrfs_qgroup_level(objectid),
+ btrfs_qgroup_subvid(objectid));
+ return;
+ case BTRFS_UUID_KEY_SUBVOL:
+ case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
+ printf("0x%016llx", (unsigned long long)objectid);
+ return;
+ }
+
+ switch (objectid) {
+ case BTRFS_ROOT_TREE_OBJECTID:
+ if (type == BTRFS_DEV_ITEM_KEY)
+ printf("DEV_ITEMS");
+ else
+ printf("ROOT_TREE");
+ break;
+ case BTRFS_EXTENT_TREE_OBJECTID:
+ printf("EXTENT_TREE");
+ break;
+ case BTRFS_CHUNK_TREE_OBJECTID:
+ printf("CHUNK_TREE");
+ break;
+ case BTRFS_DEV_TREE_OBJECTID:
+ printf("DEV_TREE");
+ break;
+ case BTRFS_FS_TREE_OBJECTID:
+ printf("FS_TREE");
+ break;
+ case BTRFS_ROOT_TREE_DIR_OBJECTID:
+ printf("ROOT_TREE_DIR");
+ break;
+ case BTRFS_CSUM_TREE_OBJECTID:
+ printf("CSUM_TREE");
+ break;
+ case BTRFS_BALANCE_OBJECTID:
+ printf("BALANCE");
+ break;
+ case BTRFS_ORPHAN_OBJECTID:
+ printf("ORPHAN");
+ break;
+ case BTRFS_TREE_LOG_OBJECTID:
+ printf("TREE_LOG");
+ break;
+ case BTRFS_TREE_LOG_FIXUP_OBJECTID:
+ printf("LOG_FIXUP");
+ break;
+ case BTRFS_TREE_RELOC_OBJECTID:
+ printf("TREE_RELOC");
+ break;
+ case BTRFS_DATA_RELOC_TREE_OBJECTID:
+ printf("DATA_RELOC_TREE");
+ break;
+ case BTRFS_EXTENT_CSUM_OBJECTID:
+ printf("EXTENT_CSUM");
+ break;
+ case BTRFS_FREE_SPACE_OBJECTID:
+ printf("FREE_SPACE");
+ break;
+ case BTRFS_FREE_INO_OBJECTID:
+ printf("FREE_INO");
+ break;
+ case BTRFS_QUOTA_TREE_OBJECTID:
+ printf("QUOTA_TREE");
+ break;
+ case BTRFS_UUID_TREE_OBJECTID:
+ printf("UUID_TREE");
+ break;
+ case BTRFS_FREE_SPACE_TREE_OBJECTID:
+ printf("FREE_SPACE_TREE");
+ break;
+ case BTRFS_MULTIPLE_OBJECTIDS:
+ printf("MULTIPLE");
+ break;
+ case (u64)-1:
+ printf("-1");
+ break;
+ case BTRFS_FIRST_CHUNK_TREE_OBJECTID:
+ if (type == BTRFS_CHUNK_ITEM_KEY) {
+ printf("FIRST_CHUNK_TREE");
+ break;
+ }
+ /* fall-thru */
+ default:
+ printf("%llu", (unsigned long long)objectid);
+ }
+}
+
+void btrfs_print_key(struct btrfs_disk_key *disk_key)
+{
+ u64 objectid = btrfs_disk_key_objectid(disk_key);
+ u8 type = btrfs_disk_key_type(disk_key);
+ u64 offset = btrfs_disk_key_offset(disk_key);
+
+ printf("key (");
+ print_objectid(objectid, type);
+ printf(" ");
+ print_key_type(objectid, type);
+ switch (type) {
+ case BTRFS_QGROUP_RELATION_KEY:
+ case BTRFS_QGROUP_INFO_KEY:
+ case BTRFS_QGROUP_LIMIT_KEY:
+ printf(" %llu/%llu)", btrfs_qgroup_level(offset),
+ btrfs_qgroup_subvid(offset));
+ break;
+ case BTRFS_UUID_KEY_SUBVOL:
+ case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
+ printf(" 0x%016llx)", (unsigned long long)offset);
+ break;
+ default:
+ if (offset == (u64)-1)
+ printf(" -1)");
+ else
+ printf(" %llu)", (unsigned long long)offset);
+ break;
+ }
+}
+
+static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
+ u32 item_size)
+{
+ if (item_size & (sizeof(u64) - 1)) {
+ printf("btrfs: uuid item with illegal size %lu!\n",
+ (unsigned long)item_size);
+ return;
+ }
+ while (item_size) {
+ __le64 subvol_id;
+
+ read_extent_buffer(l, &subvol_id, offset, sizeof(u64));
+ printf("\t\tsubvol_id %llu\n",
+ (unsigned long long)le64_to_cpu(subvol_id));
+ item_size -= sizeof(u64);
+ offset += sizeof(u64);
+ }
+}
+
+void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
+{
+ int i;
+ char *str;
+ struct btrfs_item *item;
+ struct btrfs_dir_item *di;
+ struct btrfs_inode_item *ii;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_block_group_item *bi;
+ struct btrfs_extent_data_ref *dref;
+ struct btrfs_shared_data_ref *sref;
+ struct btrfs_inode_ref *iref;
+ struct btrfs_inode_extref *iref2;
+ struct btrfs_dev_extent *dev_extent;
+ struct btrfs_disk_key disk_key;
+ struct btrfs_block_group_item bg_item;
+ struct btrfs_free_space_info *free_info;
+ struct btrfs_dir_log_item *dlog;
+ struct btrfs_qgroup_info_item *qg_info;
+ struct btrfs_qgroup_limit_item *qg_limit;
+ struct btrfs_qgroup_status_item *qg_status;
+ u32 nr = btrfs_header_nritems(l);
+ u64 objectid;
+ u32 type;
+ char flags_str[32];
+
+ printf("leaf %llu items %d free space %d generation %llu owner %llu\n",
+ (unsigned long long)btrfs_header_bytenr(l), nr,
+ btrfs_leaf_free_space(root, l),
+ (unsigned long long)btrfs_header_generation(l),
+ (unsigned long long)btrfs_header_owner(l));
+ print_uuids(l);
+ fflush(stdout);
+ for (i = 0 ; i < nr ; i++) {
+ item = btrfs_item_nr(i);
+ btrfs_item_key(l, &disk_key, i);
+ objectid = btrfs_disk_key_objectid(&disk_key);
+ type = btrfs_disk_key_type(&disk_key);
+ printf("\titem %d ", i);
+ btrfs_print_key(&disk_key);
+ printf(" itemoff %d itemsize %d\n",
+ btrfs_item_offset(l, item),
+ btrfs_item_size(l, item));
+
+ if (type == 0 && objectid == BTRFS_FREE_SPACE_OBJECTID)
+ print_free_space_header(l, i);
+
+ switch (type) {
+ case BTRFS_INODE_ITEM_KEY:
+ ii = btrfs_item_ptr(l, i, struct btrfs_inode_item);
+ printf("\t\tinode generation %llu transid %llu size %llu nbytes %llu\n"
+ "\t\tblock group %llu mode %o links %u uid %u gid %u\n"
+ "\t\trdev %llu flags 0x%llx\n",
+ (unsigned long long)btrfs_inode_generation(l, ii),
+ (unsigned long long)btrfs_inode_transid(l, ii),
+ (unsigned long long)btrfs_inode_size(l, ii),
+ (unsigned long long)btrfs_inode_nbytes(l, ii),
+ (unsigned long long)btrfs_inode_block_group(l,ii),
+ btrfs_inode_mode(l, ii),
+ btrfs_inode_nlink(l, ii),
+ btrfs_inode_uid(l, ii),
+ btrfs_inode_gid(l, ii),
+ (unsigned long long)btrfs_inode_rdev(l,ii),
+ (unsigned long long)btrfs_inode_flags(l,ii));
+ break;
+ case BTRFS_INODE_REF_KEY:
+ iref = btrfs_item_ptr(l, i, struct btrfs_inode_ref);
+ print_inode_ref_item(l, item, iref);
+ break;
+ case BTRFS_INODE_EXTREF_KEY:
+ iref2 = btrfs_item_ptr(l, i, struct btrfs_inode_extref);
+ print_inode_extref_item(l, item, iref2);
+ break;
+ case BTRFS_DIR_ITEM_KEY:
+ case BTRFS_DIR_INDEX_KEY:
+ case BTRFS_XATTR_ITEM_KEY:
+ di = btrfs_item_ptr(l, i, struct btrfs_dir_item);
+ print_dir_item(l, item, di);
+ break;
+ case BTRFS_DIR_LOG_INDEX_KEY:
+ case BTRFS_DIR_LOG_ITEM_KEY:
+ dlog = btrfs_item_ptr(l, i, struct btrfs_dir_log_item);
+ printf("\t\tdir log end %Lu\n",
+ (unsigned long long)btrfs_dir_log_end(l, dlog));
+ break;
+ case BTRFS_ORPHAN_ITEM_KEY:
+ printf("\t\torphan item\n");
+ break;
+ case BTRFS_ROOT_ITEM_KEY:
+ print_root(l, i);
+ break;
+ case BTRFS_ROOT_REF_KEY:
+ print_root_ref(l, i, "ref");
+ break;
+ case BTRFS_ROOT_BACKREF_KEY:
+ print_root_ref(l, i, "backref");
+ break;
+ case BTRFS_EXTENT_ITEM_KEY:
+ print_extent_item(l, i, 0);
+ break;
+ case BTRFS_METADATA_ITEM_KEY:
+ print_extent_item(l, i, 1);
+ break;
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ printf("\t\ttree block backref\n");
+ break;
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ printf("\t\tshared block backref\n");
+ break;
+ case BTRFS_EXTENT_DATA_REF_KEY:
+ dref = btrfs_item_ptr(l, i, struct btrfs_extent_data_ref);
+ printf("\t\textent data backref root %llu "
+ "objectid %llu offset %llu count %u\n",
+ (unsigned long long)btrfs_extent_data_ref_root(l, dref),
+ (unsigned long long)btrfs_extent_data_ref_objectid(l, dref),
+ (unsigned long long)btrfs_extent_data_ref_offset(l, dref),
+ btrfs_extent_data_ref_count(l, dref));
+ break;
+ case BTRFS_SHARED_DATA_REF_KEY:
+ sref = btrfs_item_ptr(l, i, struct btrfs_shared_data_ref);
+ printf("\t\tshared data backref count %u\n",
+ btrfs_shared_data_ref_count(l, sref));
+ break;
+ case BTRFS_EXTENT_REF_V0_KEY:
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ print_extent_ref_v0(l, i);
+#else
+ BUG();
+#endif
+ break;
+ case BTRFS_CSUM_ITEM_KEY:
+ printf("\t\tcsum item\n");
+ break;
+ case BTRFS_EXTENT_CSUM_KEY:
+ printf("\t\textent csum item\n");
+ break;
+ case BTRFS_EXTENT_DATA_KEY:
+ fi = btrfs_item_ptr(l, i,
+ struct btrfs_file_extent_item);
+ print_file_extent_item(l, item, i, fi);
+ break;
+ case BTRFS_BLOCK_GROUP_ITEM_KEY:
+ bi = btrfs_item_ptr(l, i,
+ struct btrfs_block_group_item);
+ read_extent_buffer(l, &bg_item, (unsigned long)bi,
+ sizeof(bg_item));
+ memset(flags_str, 0, sizeof(flags_str));
+ bg_flags_to_str(btrfs_block_group_flags(&bg_item),
+ flags_str);
+ printf("\t\tblock group used %llu chunk_objectid %llu flags %s\n",
+ (unsigned long long)btrfs_block_group_used(&bg_item),
+ (unsigned long long)btrfs_block_group_chunk_objectid(&bg_item),
+ flags_str);
+ break;
+ case BTRFS_FREE_SPACE_INFO_KEY:
+ free_info = btrfs_item_ptr(l, i, struct btrfs_free_space_info);
+ printf("\t\tfree space info extent count %u flags %u\n",
+ (unsigned)btrfs_free_space_extent_count(l, free_info),
+ (unsigned)btrfs_free_space_flags(l, free_info));
+ break;
+ case BTRFS_FREE_SPACE_EXTENT_KEY:
+ printf("\t\tfree space extent\n");
+ break;
+ case BTRFS_FREE_SPACE_BITMAP_KEY:
+ printf("\t\tfree space bitmap\n");
+ break;
+ case BTRFS_CHUNK_ITEM_KEY:
+ print_chunk(l, btrfs_item_ptr(l, i, struct btrfs_chunk));
+ break;
+ case BTRFS_DEV_ITEM_KEY:
+ print_dev_item(l, btrfs_item_ptr(l, i,
+ struct btrfs_dev_item));
+ break;
+ case BTRFS_DEV_EXTENT_KEY:
+ dev_extent = btrfs_item_ptr(l, i,
+ struct btrfs_dev_extent);
+ printf("\t\tdev extent chunk_tree %llu\n"
+ "\t\tchunk objectid %llu chunk offset %llu "
+ "length %llu\n",
+ (unsigned long long)
+ btrfs_dev_extent_chunk_tree(l, dev_extent),
+ (unsigned long long)
+ btrfs_dev_extent_chunk_objectid(l, dev_extent),
+ (unsigned long long)
+ btrfs_dev_extent_chunk_offset(l, dev_extent),
+ (unsigned long long)
+ btrfs_dev_extent_length(l, dev_extent));
+ break;
+ case BTRFS_QGROUP_STATUS_KEY:
+ qg_status = btrfs_item_ptr(l, i,
+ struct btrfs_qgroup_status_item);
+ memset(flags_str, 0, sizeof(flags_str));
+ qgroup_flags_to_str(btrfs_qgroup_status_flags(l, qg_status),
+ flags_str);
+ printf("\t\tversion %llu generation %llu flags %s "
+ "scan %lld\n",
+ (unsigned long long)
+ btrfs_qgroup_status_version(l, qg_status),
+ (unsigned long long)
+ btrfs_qgroup_status_generation(l, qg_status),
+ flags_str,
+ (unsigned long long)
+ btrfs_qgroup_status_scan(l, qg_status));
+ break;
+ case BTRFS_QGROUP_RELATION_KEY:
+ break;
+ case BTRFS_QGROUP_INFO_KEY:
+ qg_info = btrfs_item_ptr(l, i,
+ struct btrfs_qgroup_info_item);
+ printf("\t\tgeneration %llu\n"
+ "\t\treferenced %llu referenced compressed %llu\n"
+ "\t\texclusive %llu exclusive compressed %llu\n",
+ (unsigned long long)
+ btrfs_qgroup_info_generation(l, qg_info),
+ (unsigned long long)
+ btrfs_qgroup_info_referenced(l, qg_info),
+ (unsigned long long)
+ btrfs_qgroup_info_referenced_compressed(l,
+ qg_info),
+ (unsigned long long)
+ btrfs_qgroup_info_exclusive(l, qg_info),
+ (unsigned long long)
+ btrfs_qgroup_info_exclusive_compressed(l,
+ qg_info));
+ break;
+ case BTRFS_QGROUP_LIMIT_KEY:
+ qg_limit = btrfs_item_ptr(l, i,
+ struct btrfs_qgroup_limit_item);
+ printf("\t\tflags %llx\n"
+ "\t\tmax referenced %lld max exclusive %lld\n"
+ "\t\trsv referenced %lld rsv exclusive %lld\n",
+ (unsigned long long)
+ btrfs_qgroup_limit_flags(l, qg_limit),
+ (long long)
+ btrfs_qgroup_limit_max_referenced(l, qg_limit),
+ (long long)
+ btrfs_qgroup_limit_max_exclusive(l, qg_limit),
+ (long long)
+ btrfs_qgroup_limit_rsv_referenced(l, qg_limit),
+ (long long)
+ btrfs_qgroup_limit_rsv_exclusive(l, qg_limit));
+ break;
+ case BTRFS_UUID_KEY_SUBVOL:
+ case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
+ print_uuid_item(l, btrfs_item_ptr_offset(l, i),
+ btrfs_item_size_nr(l, i));
+ break;
+ case BTRFS_STRING_ITEM_KEY:
+ /* dirty, but it's simple */
+ str = l->data + btrfs_item_ptr_offset(l, i);
+ printf("\t\titem data %.*s\n", btrfs_item_size(l, item), str);
+ break;
+ case BTRFS_DEV_STATS_KEY:
+ printf("\t\tdevice stats\n");
+ break;
+ };
+ fflush(stdout);
+ }
+}
+
+void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *eb, int follow)
+{
+ int i;
+ u32 nr;
+ u32 size;
+ struct btrfs_disk_key disk_key;
+ struct btrfs_key key;
+
+ if (!eb)
+ return;
+ nr = btrfs_header_nritems(eb);
+ if (btrfs_is_leaf(eb)) {
+ btrfs_print_leaf(root, eb);
+ return;
+ }
+ printf("node %llu level %d items %d free %u generation %llu owner %llu\n",
+ (unsigned long long)eb->start,
+ btrfs_header_level(eb), nr,
+ (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr,
+ (unsigned long long)btrfs_header_generation(eb),
+ (unsigned long long)btrfs_header_owner(eb));
+ print_uuids(eb);
+ fflush(stdout);
+ size = btrfs_level_size(root, btrfs_header_level(eb) - 1);
+ for (i = 0; i < nr; i++) {
+ u64 blocknr = btrfs_node_blockptr(eb, i);
+ btrfs_node_key(eb, &disk_key, i);
+ btrfs_disk_key_to_cpu(&key, &disk_key);
+ printf("\t");
+ btrfs_print_key(&disk_key);
+ printf(" block %llu (%llu) gen %llu\n",
+ (unsigned long long)blocknr,
+ (unsigned long long)blocknr / size,
+ (unsigned long long)btrfs_node_ptr_generation(eb, i));
+ fflush(stdout);
+ }
+ if (!follow)
+ return;
+
+ for (i = 0; i < nr; i++) {
+ struct extent_buffer *next = read_tree_block(root,
+ btrfs_node_blockptr(eb, i),
+ size,
+ btrfs_node_ptr_generation(eb, i));
+ if (!extent_buffer_uptodate(next)) {
+ fprintf(stderr, "failed to read %llu in tree %llu\n",
+ (unsigned long long)btrfs_node_blockptr(eb, i),
+ (unsigned long long)btrfs_header_owner(eb));
+ continue;
+ }
+ if (btrfs_is_leaf(next) &&
+ btrfs_header_level(eb) != 1)
+ BUG();
+ if (btrfs_header_level(next) !=
+ btrfs_header_level(eb) - 1)
+ BUG();
+ btrfs_print_tree(root, next, 1);
+ free_extent_buffer(next);
+ }
+}
diff --git a/print-tree.h b/print-tree.h
new file mode 100644
index 00000000..f0153c18
--- /dev/null
+++ b/print-tree.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __PRINT_TREE_H__
+#define __PRINT_TREE_H__
+
+void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l);
+void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t, int follow);
+void btrfs_print_key(struct btrfs_disk_key *disk_key);
+void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk);
+void print_extent_item(struct extent_buffer *eb, int slot, int metadata);
+#endif
diff --git a/props.c b/props.c
new file mode 100644
index 00000000..5b749324
--- /dev/null
+++ b/props.c
@@ -0,0 +1,198 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "ctree.h"
+#include "commands.h"
+#include "utils.h"
+#include "props.h"
+
+#define XATTR_BTRFS_PREFIX "btrfs."
+#define XATTR_BTRFS_PREFIX_LEN (sizeof(XATTR_BTRFS_PREFIX) - 1)
+
+/*
+ * Defined as synonyms in attr/xattr.h
+ */
+#ifndef ENOATTR
+#define ENOATTR ENODATA
+#endif
+
+static int prop_read_only(enum prop_object_type type,
+ const char *object,
+ const char *name,
+ const char *value)
+{
+ int ret = 0;
+ int fd = -1;
+ u64 flags = 0;
+
+ fd = open(object, O_RDONLY);
+ if (fd < 0) {
+ ret = -errno;
+ fprintf(stderr, "ERROR: open %s failed. %s\n",
+ object, strerror(-ret));
+ goto out;
+ }
+
+ ret = ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr, "ERROR: failed to get flags for %s. %s\n",
+ object, strerror(-ret));
+ goto out;
+ }
+
+ if (!value) {
+ if (flags & BTRFS_SUBVOL_RDONLY)
+ fprintf(stdout, "ro=true\n");
+ else
+ fprintf(stdout, "ro=false\n");
+ ret = 0;
+ goto out;
+ }
+
+ if (!strcmp(value, "true")) {
+ flags |= BTRFS_SUBVOL_RDONLY;
+ } else if (!strcmp(value, "false")) {
+ flags = flags & ~BTRFS_SUBVOL_RDONLY;
+ } else {
+ ret = -EINVAL;
+ fprintf(stderr, "ERROR: invalid value for property.\n");
+ goto out;
+ }
+
+ ret = ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &flags);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr, "ERROR: failed to set flags for %s. %s\n",
+ object, strerror(-ret));
+ goto out;
+ }
+
+out:
+ if (fd != -1)
+ close(fd);
+ return ret;
+}
+
+static int prop_label(enum prop_object_type type,
+ const char *object,
+ const char *name,
+ const char *value)
+{
+ int ret;
+
+ if (value) {
+ ret = set_label((char *) object, (char *) value);
+ } else {
+ char label[BTRFS_LABEL_SIZE];
+
+ ret = get_label((char *) object, label);
+ if (!ret)
+ fprintf(stdout, "label=%s\n", label);
+ }
+
+ return ret;
+}
+
+static int prop_compression(enum prop_object_type type,
+ const char *object,
+ const char *name,
+ const char *value)
+{
+ int ret;
+ ssize_t sret;
+ int fd = -1;
+ DIR *dirstream = NULL;
+ char *buf = NULL;
+ char *xattr_name = NULL;
+ int open_flags = value ? O_RDWR : O_RDONLY;
+
+ fd = open_file_or_dir3(object, &dirstream, open_flags);
+ if (fd == -1) {
+ ret = -errno;
+ fprintf(stderr, "ERROR: open %s failed. %s\n",
+ object, strerror(-ret));
+ goto out;
+ }
+
+ xattr_name = malloc(XATTR_BTRFS_PREFIX_LEN + strlen(name) + 1);
+ if (!xattr_name) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ memcpy(xattr_name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN);
+ memcpy(xattr_name + XATTR_BTRFS_PREFIX_LEN, name, strlen(name));
+ xattr_name[XATTR_BTRFS_PREFIX_LEN + strlen(name)] = '\0';
+
+ if (value)
+ sret = fsetxattr(fd, xattr_name, value, strlen(value), 0);
+ else
+ sret = fgetxattr(fd, xattr_name, NULL, 0);
+ if (sret < 0) {
+ ret = -errno;
+ if (ret != -ENOATTR)
+ fprintf(stderr,
+ "ERROR: failed to %s compression for %s. %s\n",
+ value ? "set" : "get", object, strerror(-ret));
+ else
+ ret = 0;
+ goto out;
+ }
+ if (!value) {
+ size_t len = sret;
+
+ buf = malloc(len);
+ if (!buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ sret = fgetxattr(fd, xattr_name, buf, len);
+ if (sret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "ERROR: failed to get compression for %s. %s\n",
+ object, strerror(-ret));
+ goto out;
+ }
+ fprintf(stdout, "compression=%.*s\n", (int)len, buf);
+ }
+
+ ret = 0;
+out:
+ free(xattr_name);
+ free(buf);
+ if (fd >= 0)
+ close_file_or_dir(fd, dirstream);
+
+ return ret;
+}
+
+const struct prop_handler prop_handlers[] = {
+ {"ro", "Set/get read-only flag of subvolume.", 0, prop_object_subvol,
+ prop_read_only},
+ {"label", "Set/get label of device.", 0,
+ prop_object_dev | prop_object_root, prop_label},
+ {"compression", "Set/get compression for a file or directory", 0,
+ prop_object_inode, prop_compression},
+ {NULL, NULL, 0, 0, NULL}
+};
diff --git a/props.h b/props.h
new file mode 100644
index 00000000..a43cb253
--- /dev/null
+++ b/props.h
@@ -0,0 +1,43 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_PROPS_H__
+#define __BTRFS_PROPS_H__
+
+enum prop_object_type {
+ prop_object_dev = (1 << 0),
+ prop_object_root = (1 << 1),
+ prop_object_subvol = (1 << 2),
+ prop_object_inode = (1 << 3),
+ __prop_object_max,
+};
+
+typedef int (*prop_handler_t)(enum prop_object_type type,
+ const char *object,
+ const char *name,
+ const char *value);
+
+struct prop_handler {
+ const char *name;
+ const char *desc;
+ int read_only;
+ int types;
+ prop_handler_t handler;
+};
+
+extern const struct prop_handler prop_handlers[];
+
+#endif
diff --git a/qgroup-verify.c b/qgroup-verify.c
new file mode 100644
index 00000000..7e3afda8
--- /dev/null
+++ b/qgroup-verify.c
@@ -0,0 +1,1179 @@
+/*
+ * Copyright (C) 2014 SUSE. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Mark Fasheh <mfasheh@suse.de>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <uuid/uuid.h>
+#include "kerncompat.h"
+#include "radix-tree.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "utils.h"
+#include "ulist.h"
+#include "rbtree-utils.h"
+
+#include "qgroup-verify.h"
+
+/*#define QGROUP_VERIFY_DEBUG*/
+static unsigned long tot_extents_scanned = 0;
+
+static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive);
+
+struct qgroup_info {
+ u64 referenced;
+ u64 referenced_compressed;
+ u64 exclusive;
+ u64 exclusive_compressed;
+};
+
+struct qgroup_count {
+ u64 qgroupid;
+ int subvol_exists;
+
+ struct btrfs_disk_key key;
+ struct qgroup_info diskinfo;
+
+ struct qgroup_info info;
+
+ struct rb_node rb_node;
+};
+
+static struct counts_tree {
+ struct rb_root root;
+ unsigned int num_groups;
+} counts = { .root = RB_ROOT };
+
+static struct rb_root by_bytenr = RB_ROOT;
+
+/*
+ * List of interior tree blocks. We walk this list after loading the
+ * extent tree to resolve implied refs. For each interior node we'll
+ * place a shared ref in the ref tree against each child object. This
+ * allows the shared ref resolving code to do the actual work later of
+ * finding roots to account against.
+ *
+ * An implied ref is when a tree block has refs on it that may not
+ * exist in any of its child nodes. Even though the refs might not
+ * exist further down the tree, the fact that our interior node has a
+ * ref means we need to account anything below it to all its roots.
+ */
+static struct ulist *tree_blocks = NULL; /* unode->val = bytenr, ->aux
+ * = tree_block pointer */
+struct tree_block {
+ int level;
+ u64 num_bytes;
+};
+
+struct ref {
+ u64 bytenr;
+ u64 num_bytes;
+ u64 parent;
+ u64 root;
+
+ struct rb_node bytenr_node;
+};
+
+#ifdef QGROUP_VERIFY_DEBUG
+static void print_ref(struct ref *ref)
+{
+ printf("bytenr: %llu\t\tnum_bytes: %llu\t\t parent: %llu\t\t"
+ "root: %llu\n", ref->bytenr, ref->num_bytes,
+ ref->parent, ref->root);
+}
+
+static void print_all_refs(void)
+{
+ unsigned long count = 0;
+ struct ref *ref;
+ struct rb_node *node;
+
+ node = rb_first(&by_bytenr);
+ while (node) {
+ ref = rb_entry(node, struct ref, bytenr_node);
+
+ print_ref(ref);
+
+ count++;
+ node = rb_next(node);
+ }
+
+ printf("%lu extents scanned with %lu refs in total.\n",
+ tot_extents_scanned, count);
+}
+#endif
+
+/*
+ * Store by bytenr in rbtree
+ *
+ * The tree is sorted in ascending order by bytenr, then parent, then
+ * root. Since full refs have a parent == 0, those will come before
+ * shared refs.
+ */
+static int compare_ref(struct ref *orig, u64 bytenr, u64 root, u64 parent)
+{
+ if (bytenr < orig->bytenr)
+ return -1;
+ if (bytenr > orig->bytenr)
+ return 1;
+
+ if (parent < orig->parent)
+ return -1;
+ if (parent > orig->parent)
+ return 1;
+
+ if (root < orig->root)
+ return -1;
+ if (root > orig->root)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * insert a new ref into the tree. returns the existing ref entry
+ * if one is already there.
+ */
+static struct ref *insert_ref(struct ref *ref)
+{
+ int ret;
+ struct rb_node **p = &by_bytenr.rb_node;
+ struct rb_node *parent = NULL;
+ struct ref *curr;
+
+ while (*p) {
+ parent = *p;
+ curr = rb_entry(parent, struct ref, bytenr_node);
+
+ ret = compare_ref(curr, ref->bytenr, ref->root, ref->parent);
+ if (ret < 0)
+ p = &(*p)->rb_left;
+ else if (ret > 0)
+ p = &(*p)->rb_right;
+ else
+ return curr;
+ }
+
+ rb_link_node(&ref->bytenr_node, parent, p);
+ rb_insert_color(&ref->bytenr_node, &by_bytenr);
+ return ref;
+}
+
+/*
+ * Partial search, returns the first ref with matching bytenr. Caller
+ * can walk forward from there.
+ *
+ * Leftmost refs will be full refs - this is used to our advantage
+ * when resolving roots.
+ */
+static struct ref *find_ref_bytenr(u64 bytenr)
+{
+ struct rb_node *n = by_bytenr.rb_node;
+ struct ref *ref;
+
+ while (n) {
+ ref = rb_entry(n, struct ref, bytenr_node);
+
+ if (bytenr < ref->bytenr)
+ n = n->rb_left;
+ else if (bytenr > ref->bytenr)
+ n = n->rb_right;
+ else {
+ /* Walk to the left to find the first item */
+ struct rb_node *node_left = rb_prev(&ref->bytenr_node);
+ struct ref *ref_left;
+
+ while (node_left) {
+ ref_left = rb_entry(node_left, struct ref,
+ bytenr_node);
+ if (ref_left->bytenr != ref->bytenr)
+ break;
+ ref = ref_left;
+ node_left = rb_prev(node_left);
+ }
+ return ref;
+ }
+ }
+ return NULL;
+}
+
+static struct ref *find_ref(u64 bytenr, u64 root, u64 parent)
+{
+ struct rb_node *n = by_bytenr.rb_node;
+ struct ref *ref;
+ int ret;
+
+ while (n) {
+ ref = rb_entry(n, struct ref, bytenr_node);
+
+ ret = compare_ref(ref, bytenr, root, parent);
+ if (ret < 0)
+ n = n->rb_left;
+ else if (ret > 0)
+ n = n->rb_right;
+ else
+ return ref;
+ }
+ return NULL;
+}
+
+static struct ref *alloc_ref(u64 bytenr, u64 root, u64 parent, u64 num_bytes)
+{
+ struct ref *ref = find_ref(bytenr, root, parent);
+
+ BUG_ON(parent && root);
+
+ if (ref == NULL) {
+ ref = calloc(1, sizeof(*ref));
+ if (ref) {
+ ref->bytenr = bytenr;
+ ref->root = root;
+ ref->parent = parent;
+ ref->num_bytes = num_bytes;
+
+ insert_ref(ref);
+ }
+ }
+ return ref;
+}
+
+static void free_ref_node(struct rb_node *node)
+{
+ struct ref *ref = rb_entry(node, struct ref, bytenr_node);
+ free(ref);
+}
+
+FREE_RB_BASED_TREE(ref, free_ref_node);
+
+/*
+ * Resolves all the possible roots for the ref at parent.
+ */
+static void find_parent_roots(struct ulist *roots, u64 parent)
+{
+ struct ref *ref;
+ struct rb_node *node;
+
+ /*
+ * Search the rbtree for the first ref with bytenr == parent.
+ * Walk forward so long as bytenr == parent, adding resolved root ids.
+ * For each unresolved root, we recurse
+ */
+ ref = find_ref_bytenr(parent);
+ node = &ref->bytenr_node;
+ BUG_ON(ref == NULL);
+ BUG_ON(ref->bytenr != parent);
+
+ {
+ /*
+ * Random sanity check, are we actually getting the
+ * leftmost node?
+ */
+ struct rb_node *prev_node = rb_prev(&ref->bytenr_node);
+ struct ref *prev;
+ if (prev_node) {
+ prev = rb_entry(prev_node, struct ref, bytenr_node);
+ BUG_ON(prev->bytenr == parent);
+ }
+ }
+
+ do {
+ if (ref->root)
+ ulist_add(roots, ref->root, 0, 0);
+ else
+ find_parent_roots(roots, ref->parent);
+
+ node = rb_next(node);
+ if (node)
+ ref = rb_entry(node, struct ref, bytenr_node);
+ } while (node && ref->bytenr == parent);
+}
+
+static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes,
+ struct ulist *roots);
+/*
+ * Account each ref. Walk the refs, for each set of refs in a
+ * given bytenr:
+ *
+ * - add the roots for direct refs to the ref roots ulist
+ *
+ * - resolve all possible roots for shared refs, insert each
+ * of those into ref_roots ulist (this is a recursive process)
+ *
+ * - Walk ref_roots ulist, adding extent bytes to each qgroup count that
+ * cooresponds to a found root.
+ */
+static void account_all_refs(int do_qgroups, u64 search_subvol)
+{
+ int exclusive;
+ struct ref *ref;
+ struct rb_node *node;
+ u64 bytenr, num_bytes;
+ struct ulist *roots = ulist_alloc(0);
+ struct ulist_iterator uiter;
+ struct ulist_node *unode;
+
+ node = rb_first(&by_bytenr);
+ while (node) {
+ ulist_reinit(roots);
+
+ ref = rb_entry(node, struct ref, bytenr_node);
+ /*
+ * Walk forward through the list of refs for this
+ * bytenr, adding roots to our ulist. If it's a full
+ * ref, then we have the easy case. Otherwise we need
+ * to search for roots.
+ */
+ bytenr = ref->bytenr;
+ num_bytes = ref->num_bytes;
+ do {
+ BUG_ON(ref->bytenr != bytenr);
+ BUG_ON(ref->num_bytes != num_bytes);
+ if (ref->root)
+ ulist_add(roots, ref->root, 0, 0);
+ else
+ find_parent_roots(roots, ref->parent);
+
+ /*
+ * When we leave this inner loop, node is set
+ * to next in our tree and will be turned into
+ * a ref object up top
+ */
+ node = rb_next(node);
+ if (node)
+ ref = rb_entry(node, struct ref, bytenr_node);
+ } while (node && ref->bytenr == bytenr);
+
+ /*
+ * Now that we have all roots, we can properly account
+ * this extent against the corresponding qgroups.
+ */
+ if (roots->nnodes == 1)
+ exclusive = 1;
+ else
+ exclusive = 0;
+
+ if (search_subvol)
+ print_subvol_info(search_subvol, bytenr, num_bytes,
+ roots);
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(roots, &uiter))) {
+ BUG_ON(unode->val == 0ULL);
+ /* We only want to account fs trees */
+ if (is_fstree(unode->val) && do_qgroups)
+ add_bytes(unode->val, num_bytes, exclusive);
+ }
+ }
+
+ ulist_free(roots);
+}
+
+static u64 resolve_one_root(u64 bytenr)
+{
+ struct ref *ref = find_ref_bytenr(bytenr);
+
+ BUG_ON(ref == NULL);
+
+ if (ref->root)
+ return ref->root;
+ return resolve_one_root(ref->parent);
+}
+
+static inline struct tree_block *unode_tree_block(struct ulist_node *unode)
+{
+ return u64_to_ptr(unode->aux);
+}
+static inline u64 unode_bytenr(struct ulist_node *unode)
+{
+ return unode->val;
+}
+
+static int alloc_tree_block(u64 bytenr, u64 num_bytes, int level)
+{
+ struct tree_block *block = calloc(1, sizeof(*block));
+
+ if (block) {
+ block->num_bytes = num_bytes;
+ block->level = level;
+ if (ulist_add(tree_blocks, bytenr, ptr_to_u64(block), 0) >= 0)
+ return 0;
+ free(block);
+ }
+ return -ENOMEM;
+}
+
+static void free_tree_blocks(void)
+{
+ struct ulist_iterator uiter;
+ struct ulist_node *unode;
+
+ if (!tree_blocks)
+ return;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(tree_blocks, &uiter)))
+ free(unode_tree_block(unode));
+ ulist_free(tree_blocks);
+ tree_blocks = NULL;
+}
+
+#ifdef QGROUP_VERIFY_DEBUG
+static void print_tree_block(u64 bytenr, struct tree_block *block)
+{
+ struct ref *ref;
+ struct rb_node *node;
+
+ printf("tree block: %llu\t\tlevel: %d\n", (unsigned long long)bytenr,
+ block->level);
+
+ ref = find_ref_bytenr(bytenr);
+ node = &ref->bytenr_node;
+ do {
+ print_ref(ref);
+ node = rb_next(node);
+ if (node)
+ ref = rb_entry(node, struct ref, bytenr_node);
+ } while (node && ref->bytenr == bytenr);
+
+ printf("\n");
+}
+
+static void print_all_tree_blocks(void)
+{
+ struct ulist_iterator uiter;
+ struct ulist_node *unode;
+
+ if (!tree_blocks)
+ return;
+
+ printf("Listing all found interior tree nodes:\n");
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(tree_blocks, &uiter)))
+ print_tree_block(unode_bytenr(unode), unode_tree_block(unode));
+}
+#endif
+
+static int add_refs_for_leaf_items(struct extent_buffer *eb, u64 ref_parent)
+{
+ int nr, i;
+ int extent_type;
+ u64 bytenr, num_bytes;
+ struct btrfs_key key;
+ struct btrfs_disk_key disk_key;
+ struct btrfs_file_extent_item *fi;
+
+ nr = btrfs_header_nritems(eb);
+ for (i = 0; i < nr; i++) {
+ btrfs_item_key(eb, &disk_key, i);
+ btrfs_disk_key_to_cpu(&key, &disk_key);
+
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ continue;
+
+ fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
+ /* filter out: inline, disk_bytenr == 0, compressed?
+ * not if we can avoid it */
+ extent_type = btrfs_file_extent_type(eb, fi);
+
+ if (extent_type == BTRFS_FILE_EXTENT_INLINE)
+ continue;
+
+ bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
+ if (!bytenr)
+ continue;
+
+ num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
+ if (alloc_ref(bytenr, 0, ref_parent, num_bytes) == NULL)
+ return ENOMEM;
+ }
+
+ return 0;
+}
+
+static int travel_tree(struct btrfs_fs_info *info, struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 ref_parent)
+{
+ int ret, nr, i;
+ struct extent_buffer *eb;
+ u64 new_bytenr;
+ u64 new_num_bytes;
+
+// printf("travel_tree: bytenr: %llu\tnum_bytes: %llu\tref_parent: %llu\n",
+// bytenr, num_bytes, ref_parent);
+
+ eb = read_tree_block(root, bytenr, num_bytes, 0);
+ if (!extent_buffer_uptodate(eb))
+ return -EIO;
+
+ ret = 0;
+ /* Don't add a ref for our starting tree block to itself */
+ if (bytenr != ref_parent) {
+ if (alloc_ref(bytenr, 0, ref_parent, num_bytes) == NULL)
+ return ENOMEM;
+ }
+
+ if (btrfs_is_leaf(eb)) {
+ ret = add_refs_for_leaf_items(eb, ref_parent);
+ goto out;
+ }
+
+ /*
+ * Interior nodes are tuples of (key, bytenr) where key is the
+ * leftmost key in the tree block pointed to by bytenr. We
+ * don't have to care about key here, just follow the bytenr
+ * pointer.
+ */
+ nr = btrfs_header_nritems(eb);
+ for (i = 0; i < nr; i++) {
+ new_bytenr = btrfs_node_blockptr(eb, i);
+ new_num_bytes = btrfs_level_size(root,
+ btrfs_header_level(eb) - 1);
+
+ ret = travel_tree(info, root, new_bytenr, new_num_bytes,
+ ref_parent);
+ }
+
+out:
+ free_extent_buffer(eb);
+ return ret;
+}
+
+static int add_refs_for_implied(struct btrfs_fs_info *info, u64 bytenr,
+ struct tree_block *block)
+{
+ int ret;
+ u64 root_id = resolve_one_root(bytenr);
+ struct btrfs_root *root;
+ struct btrfs_key key;
+
+ key.objectid = root_id;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ /*
+ * XXX: Don't free the root object as we don't know whether it
+ * came off our fs_info struct or not.
+ */
+ root = btrfs_read_fs_root(info, &key);
+ if (!root || IS_ERR(root))
+ return ENOENT;
+
+ ret = travel_tree(info, root, bytenr, block->num_bytes, bytenr);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+/*
+ * Place shared refs in the ref tree for each child of an interior tree node.
+ */
+static int map_implied_refs(struct btrfs_fs_info *info)
+{
+ int ret = 0;
+ struct ulist_iterator uiter;
+ struct ulist_node *unode;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(tree_blocks, &uiter))) {
+ ret = add_refs_for_implied(info, unode_bytenr(unode),
+ unode_tree_block(unode));
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
+
+/*
+ * insert a new root into the tree. returns the existing root entry
+ * if one is already there. qgroupid is used
+ * as the key
+ */
+static int insert_count(struct qgroup_count *qc)
+{
+ struct rb_node **p = &counts.root.rb_node;
+ struct rb_node *parent = NULL;
+ struct qgroup_count *curr;
+
+ while (*p) {
+ parent = *p;
+ curr = rb_entry(parent, struct qgroup_count, rb_node);
+
+ if (qc->qgroupid < curr->qgroupid)
+ p = &(*p)->rb_left;
+ else if (qc->qgroupid > curr->qgroupid)
+ p = &(*p)->rb_right;
+ else
+ return EEXIST;
+ }
+ counts.num_groups++;
+ rb_link_node(&qc->rb_node, parent, p);
+ rb_insert_color(&qc->rb_node, &counts.root);
+ return 0;
+}
+
+static struct qgroup_count *find_count(u64 qgroupid)
+{
+ struct rb_node *n = counts.root.rb_node;
+ struct qgroup_count *count;
+
+ while (n) {
+ count = rb_entry(n, struct qgroup_count, rb_node);
+
+ if (qgroupid < count->qgroupid)
+ n = n->rb_left;
+ else if (qgroupid > count->qgroupid)
+ n = n->rb_right;
+ else
+ return count;
+ }
+ return NULL;
+}
+
+static struct qgroup_count *alloc_count(struct btrfs_disk_key *key,
+ struct extent_buffer *leaf,
+ struct btrfs_qgroup_info_item *disk)
+{
+ struct qgroup_count *c = calloc(1, sizeof(*c));
+ struct qgroup_info *item;
+
+ if (c) {
+ c->qgroupid = btrfs_disk_key_offset(key);
+ c->key = *key;
+
+ item = &c->diskinfo;
+ item->referenced = btrfs_qgroup_info_referenced(leaf, disk);
+ item->referenced_compressed =
+ btrfs_qgroup_info_referenced_compressed(leaf, disk);
+ item->exclusive = btrfs_qgroup_info_exclusive(leaf, disk);
+ item->exclusive_compressed =
+ btrfs_qgroup_info_exclusive_compressed(leaf, disk);
+
+ if (insert_count(c)) {
+ free(c);
+ c = NULL;
+ }
+ }
+ return c;
+}
+
+static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive)
+{
+ struct qgroup_count *count = find_count(root_objectid);
+ struct qgroup_info *qg;
+
+ BUG_ON(num_bytes < 4096); /* Random sanity check. */
+
+ if (!count)
+ return;
+
+ qg = &count->info;
+
+ qg->referenced += num_bytes;
+ /*
+ * count of compressed bytes is unimplemented, so we do the
+ * same as kernel.
+ */
+ qg->referenced_compressed += num_bytes;
+
+ if (exclusive) {
+ qg->exclusive += num_bytes;
+ qg->exclusive_compressed += num_bytes;
+ }
+}
+
+static int load_quota_info(struct btrfs_fs_info *info)
+{
+ int ret;
+ struct btrfs_root *root = info->quota_root;
+ struct btrfs_root *tmproot;
+ struct btrfs_path path;
+ struct btrfs_key key;
+ struct btrfs_key root_key;
+ struct btrfs_disk_key disk_key;
+ struct extent_buffer *leaf;
+ struct btrfs_qgroup_info_item *item;
+ struct qgroup_count *count;
+ int i, nr;
+
+ btrfs_init_path(&path);
+
+ key.offset = 0;
+ key.objectid = 0;
+ key.type = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: Couldn't search slot: %d\n", ret);
+ goto out;
+ }
+
+ while (1) {
+ leaf = path.nodes[0];
+
+ nr = btrfs_header_nritems(leaf);
+ for(i = 0; i < nr; i++) {
+ btrfs_item_key(leaf, &disk_key, i);
+ btrfs_disk_key_to_cpu(&key, &disk_key);
+
+ if (key.type == BTRFS_QGROUP_RELATION_KEY)
+ printf("Ignoring qgroup relation key %llu\n",
+ key.objectid);
+
+ /*
+ * Ignore: BTRFS_QGROUP_STATUS_KEY,
+ * BTRFS_QGROUP_LIMIT_KEY, BTRFS_QGROUP_RELATION_KEY
+ */
+ if (key.type != BTRFS_QGROUP_INFO_KEY)
+ continue;
+
+ item = btrfs_item_ptr(leaf, i,
+ struct btrfs_qgroup_info_item);
+
+ count = alloc_count(&disk_key, leaf, item);
+ if (!count) {
+ ret = ENOMEM;
+ fprintf(stderr, "ERROR: out of memory\n");
+ goto out;
+ }
+
+ root_key.objectid = key.offset;
+ root_key.type = BTRFS_ROOT_ITEM_KEY;
+ root_key.offset = (u64)-1;
+ tmproot = btrfs_read_fs_root_no_cache(info, &root_key);
+ if (tmproot && !IS_ERR(tmproot)) {
+ count->subvol_exists = 1;
+ free(tmproot);
+ }
+ }
+
+ ret = btrfs_next_leaf(root, &path);
+ if (ret != 0)
+ break;
+ }
+
+ ret = 0;
+ btrfs_release_path(&path);
+out:
+ return ret;
+}
+
+static int add_inline_refs(struct btrfs_fs_info *info,
+ struct extent_buffer *ei_leaf, int slot,
+ u64 bytenr, u64 num_bytes, int meta_item)
+{
+ struct btrfs_extent_item *ei;
+ struct btrfs_extent_inline_ref *iref;
+ struct btrfs_extent_data_ref *dref;
+ u64 flags, root_obj, offset, parent;
+ u32 item_size = btrfs_item_size_nr(ei_leaf, slot);
+ int type;
+ unsigned long end;
+ unsigned long ptr;
+
+ ei = btrfs_item_ptr(ei_leaf, slot, struct btrfs_extent_item);
+ flags = btrfs_extent_flags(ei_leaf, ei);
+
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !meta_item) {
+ struct btrfs_tree_block_info *tbinfo;
+ tbinfo = (struct btrfs_tree_block_info *)(ei + 1);
+ iref = (struct btrfs_extent_inline_ref *)(tbinfo + 1);
+ } else {
+ iref = (struct btrfs_extent_inline_ref *)(ei + 1);
+ }
+
+ ptr = (unsigned long)iref;
+ end = (unsigned long)ei + item_size;
+ while (ptr < end) {
+ iref = (struct btrfs_extent_inline_ref *)ptr;
+
+ parent = root_obj = 0;
+ offset = btrfs_extent_inline_ref_offset(ei_leaf, iref);
+ type = btrfs_extent_inline_ref_type(ei_leaf, iref);
+ switch (type) {
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ root_obj = offset;
+ break;
+ case BTRFS_EXTENT_DATA_REF_KEY:
+ dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+ root_obj = btrfs_extent_data_ref_root(ei_leaf, dref);
+ break;
+ case BTRFS_SHARED_DATA_REF_KEY:
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ parent = offset;
+ break;
+ default:
+ return 1;
+ }
+
+ if (alloc_ref(bytenr, root_obj, parent, num_bytes) == NULL)
+ return ENOMEM;
+
+ ptr += btrfs_extent_inline_ref_size(type);
+ }
+
+ return 0;
+}
+
+static int add_keyed_ref(struct btrfs_fs_info *info,
+ struct btrfs_key *key,
+ struct extent_buffer *leaf, int slot,
+ u64 bytenr, u64 num_bytes)
+{
+ u64 root_obj = 0, parent = 0;
+ struct btrfs_extent_data_ref *dref;
+
+ switch(key->type) {
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ root_obj = key->offset;
+ break;
+ case BTRFS_EXTENT_DATA_REF_KEY:
+ dref = btrfs_item_ptr(leaf, slot, struct btrfs_extent_data_ref);
+ root_obj = btrfs_extent_data_ref_root(leaf, dref);
+ break;
+ case BTRFS_SHARED_DATA_REF_KEY:
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ parent = key->offset;
+ break;
+ default:
+ return 1;
+ }
+
+ if (alloc_ref(bytenr, root_obj, parent, num_bytes) == NULL)
+ return ENOMEM;
+
+ return 0;
+}
+
+/*
+ * return value of 0 indicates leaf or not meta data. The code that
+ * calls this does not need to make a distinction between the two as
+ * it is only concerned with intermediate blocks which will always
+ * have level > 0.
+ */
+static int get_tree_block_level(struct btrfs_key *key,
+ struct extent_buffer *ei_leaf,
+ int slot)
+{
+ int level = 0;
+ int meta_key = key->type == BTRFS_METADATA_ITEM_KEY;
+ u64 flags;
+ struct btrfs_extent_item *ei;
+
+ ei = btrfs_item_ptr(ei_leaf, slot, struct btrfs_extent_item);
+ flags = btrfs_extent_flags(ei_leaf, ei);
+
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !meta_key) {
+ struct btrfs_tree_block_info *tbinfo;
+ tbinfo = (struct btrfs_tree_block_info *)(ei + 1);
+ level = btrfs_tree_block_level(ei_leaf, tbinfo);
+ } else if (meta_key) {
+ /* skinny metadata */
+ level = (int)key->offset;
+ }
+ return level;
+}
+
+/*
+ * Walk the extent tree, allocating a ref item for every ref and
+ * storing it in the bytenr tree.
+ */
+static int scan_extents(struct btrfs_fs_info *info,
+ u64 start, u64 end)
+{
+ int ret, i, nr, level;
+ struct btrfs_root *root = info->extent_root;
+ struct btrfs_key key;
+ struct btrfs_path path;
+ struct btrfs_disk_key disk_key;
+ struct extent_buffer *leaf;
+ u64 bytenr = 0, num_bytes = 0;
+
+ btrfs_init_path(&path);
+
+ key.objectid = start;
+ key.type = 0;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: Couldn't search slot: %d\n", ret);
+ goto out;
+ }
+ path.reada = 1;
+
+ while (1) {
+ leaf = path.nodes[0];
+
+ nr = btrfs_header_nritems(leaf);
+ for(i = 0; i < nr; i++) {
+ btrfs_item_key(leaf, &disk_key, i);
+ btrfs_disk_key_to_cpu(&key, &disk_key);
+
+ if (key.objectid < start)
+ continue;
+
+ if (key.objectid > end)
+ goto done;
+
+ if (key.type == BTRFS_EXTENT_ITEM_KEY ||
+ key.type == BTRFS_METADATA_ITEM_KEY) {
+ int meta = 0;
+
+ tot_extents_scanned++;
+
+ bytenr = key.objectid;
+ num_bytes = key.offset;
+ if (key.type == BTRFS_METADATA_ITEM_KEY) {
+ num_bytes = info->extent_root->leafsize;
+ meta = 1;
+ }
+
+ ret = add_inline_refs(info, leaf, i, bytenr,
+ num_bytes, meta);
+ if (ret)
+ goto out;
+
+ level = get_tree_block_level(&key, leaf, i);
+ if (level) {
+ if (alloc_tree_block(bytenr, num_bytes,
+ level))
+ return ENOMEM;
+ }
+
+ continue;
+ }
+
+ if (key.type > BTRFS_SHARED_DATA_REF_KEY)
+ continue;
+ if (key.type < BTRFS_TREE_BLOCK_REF_KEY)
+ continue;
+
+ /*
+ * Keyed refs should come after their extent
+ * item in the tree. As a result, the value of
+ * bytenr and num_bytes should be unchanged
+ * from the above block that catches the
+ * original extent item.
+ */
+ BUG_ON(key.objectid != bytenr);
+
+ ret = add_keyed_ref(info, &key, leaf, i, bytenr,
+ num_bytes);
+ if (ret)
+ goto out;
+ }
+
+ ret = btrfs_next_leaf(root, &path);
+ if (ret != 0) {
+ if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: Next leaf failed: %d\n", ret);
+ goto out;
+ }
+ break;
+ }
+ }
+done:
+ ret = 0;
+out:
+ btrfs_release_path(&path);
+
+ return ret;
+}
+
+static void print_fields(u64 bytes, u64 bytes_compressed, char *prefix,
+ char *type)
+{
+ printf("%s\t\t%s %llu %s compressed %llu\n",
+ prefix, type, (unsigned long long)bytes, type,
+ (unsigned long long)bytes_compressed);
+}
+
+static void print_fields_signed(long long bytes,
+ long long bytes_compressed,
+ char *prefix, char *type)
+{
+ printf("%s\t\t%s %lld %s compressed %lld\n",
+ prefix, type, bytes, type, bytes_compressed);
+}
+
+static void print_qgroup_difference(struct qgroup_count *count, int verbose)
+{
+ int is_different;
+ struct qgroup_info *info = &count->info;
+ struct qgroup_info *disk = &count->diskinfo;
+ long long excl_diff = info->exclusive - disk->exclusive;
+ long long ref_diff = info->referenced - disk->referenced;
+
+ is_different = excl_diff || ref_diff;
+
+ if (verbose || (is_different && count->subvol_exists)) {
+ printf("Counts for qgroup id: %llu %s\n",
+ (unsigned long long)count->qgroupid,
+ is_different ? "are different" : "");
+
+ print_fields(info->referenced, info->referenced_compressed,
+ "our:", "referenced");
+ print_fields(disk->referenced, disk->referenced_compressed,
+ "disk:", "referenced");
+ if (ref_diff)
+ print_fields_signed(ref_diff, ref_diff,
+ "diff:", "referenced");
+ print_fields(info->exclusive, info->exclusive_compressed,
+ "our:", "exclusive");
+ print_fields(disk->exclusive, disk->exclusive_compressed,
+ "disk:", "exclusive");
+ if (excl_diff)
+ print_fields_signed(excl_diff, excl_diff,
+ "diff:", "exclusive");
+ }
+}
+
+void print_qgroup_report(int all)
+{
+ struct rb_node *node;
+ struct qgroup_count *c;
+
+ node = rb_first(&counts.root);
+ while (node) {
+ c = rb_entry(node, struct qgroup_count, rb_node);
+ print_qgroup_difference(c, all);
+ node = rb_next(node);
+ }
+}
+
+int qgroup_verify_all(struct btrfs_fs_info *info)
+{
+ int ret;
+
+ if (!info->quota_enabled)
+ return 0;
+
+ tree_blocks = ulist_alloc(0);
+ if (!tree_blocks) {
+ fprintf(stderr,
+ "ERROR: Out of memory while allocating ulist.\n");
+ return ENOMEM;
+ }
+
+ ret = load_quota_info(info);
+ if (ret) {
+ fprintf(stderr, "ERROR: Loading qgroups from disk: %d\n", ret);
+ goto out;
+ }
+
+ /*
+ * Put all extent refs into our rbtree
+ */
+ ret = scan_extents(info, 0, ~0ULL);
+ if (ret) {
+ fprintf(stderr, "ERROR: while scanning extent tree: %d\n", ret);
+ goto out;
+ }
+
+ ret = map_implied_refs(info);
+ if (ret) {
+ fprintf(stderr, "ERROR: while mapping refs: %d\n", ret);
+ goto out;
+ }
+
+ account_all_refs(1, 0);
+
+out:
+ /*
+ * Don't free the qgroup count records as they will be walked
+ * later via the print function.
+ */
+ free_tree_blocks();
+ free_ref_tree(&by_bytenr);
+ return ret;
+}
+
+static void __print_subvol_info(u64 bytenr, u64 num_bytes, struct ulist *roots)
+{
+ int n = roots->nnodes;
+ struct ulist_iterator uiter;
+ struct ulist_node *unode;
+
+ printf("%llu\t%llu\t%d\t", bytenr, num_bytes, n);
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(roots, &uiter))) {
+ printf("%llu ", unode->val);
+ }
+ printf("\n");
+}
+
+static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes,
+ struct ulist *roots)
+{
+ struct ulist_iterator uiter;
+ struct ulist_node *unode;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(roots, &uiter))) {
+ BUG_ON(unode->val == 0ULL);
+ if (unode->val == subvolid) {
+ __print_subvol_info(bytenr, num_bytes, roots);
+ return;
+ }
+ }
+
+
+}
+
+int print_extent_state(struct btrfs_fs_info *info, u64 subvol)
+{
+ int ret;
+
+ tree_blocks = ulist_alloc(0);
+ if (!tree_blocks) {
+ fprintf(stderr,
+ "ERROR: Out of memory while allocating ulist.\n");
+ return ENOMEM;
+ }
+
+ /*
+ * Put all extent refs into our rbtree
+ */
+ ret = scan_extents(info, 0, ~0ULL);
+ if (ret) {
+ fprintf(stderr, "ERROR: while scanning extent tree: %d\n", ret);
+ goto out;
+ }
+
+ ret = map_implied_refs(info);
+ if (ret) {
+ fprintf(stderr, "ERROR: while mapping refs: %d\n", ret);
+ goto out;
+ }
+
+ printf("Offset\t\tLen\tRoot Refs\tRoots\n");
+ account_all_refs(0, subvol);
+
+out:
+ free_tree_blocks();
+ free_ref_tree(&by_bytenr);
+ return ret;
+}
+
diff --git a/qgroup-verify.h b/qgroup-verify.h
new file mode 100644
index 00000000..7d91c197
--- /dev/null
+++ b/qgroup-verify.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2014 SUSE. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_QGROUP_VERIFY_H__
+#define __BTRFS_QGROUP_VERIFY_H__
+
+#include "kerncompat.h"
+#include "ctree.h"
+
+int qgroup_verify_all(struct btrfs_fs_info *info);
+void print_qgroup_report(int all);
+
+int print_extent_state(struct btrfs_fs_info *info, u64 subvol);
+
+#endif
diff --git a/qgroup.c b/qgroup.c
new file mode 100644
index 00000000..a672ac04
--- /dev/null
+++ b/qgroup.c
@@ -0,0 +1,1390 @@
+/*
+ * Copyright (C) 2012 STRATO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "qgroup.h"
+#include <sys/ioctl.h>
+#include "ctree.h"
+#include "ioctl.h"
+#include "utils.h"
+#include <errno.h>
+
+#define BTRFS_QGROUP_NFILTERS_INCREASE (2 * BTRFS_QGROUP_FILTER_MAX)
+#define BTRFS_QGROUP_NCOMPS_INCREASE (2 * BTRFS_QGROUP_COMP_MAX)
+
+struct qgroup_lookup {
+ struct rb_root root;
+};
+
+struct btrfs_qgroup {
+ struct rb_node rb_node;
+ struct rb_node sort_node;
+ /*
+ *all_parent_node is used to
+ *filter a qgroup's all parent
+ */
+ struct rb_node all_parent_node;
+ u64 qgroupid;
+
+ /*
+ * info_item
+ */
+ u64 generation;
+ u64 rfer; /*referenced*/
+ u64 rfer_cmpr; /*referenced compressed*/
+ u64 excl; /*exclusive*/
+ u64 excl_cmpr; /*exclusive compressed*/
+
+ /*
+ *limit_item
+ */
+ u64 flags; /*which limits are set*/
+ u64 max_rfer;
+ u64 max_excl;
+ u64 rsv_rfer;
+ u64 rsv_excl;
+
+ /*qgroups this group is member of*/
+ struct list_head qgroups;
+ /*qgroups that are members of this group*/
+ struct list_head members;
+};
+
+/*
+ * glue structure to represent the relations
+ * between qgroups
+ */
+struct btrfs_qgroup_list {
+ struct list_head next_qgroup;
+ struct list_head next_member;
+ struct btrfs_qgroup *qgroup;
+ struct btrfs_qgroup *member;
+};
+
+/*
+ * qgroupid,rfer,excl default to set
+ */
+static struct {
+ char *name;
+ char *column_name;
+ int need_print;
+ unsigned unit_mode;
+ int max_len;
+} btrfs_qgroup_columns[] = {
+ {
+ .name = "qgroupid",
+ .column_name = "Qgroupid",
+ .need_print = 1,
+ .unit_mode = 0,
+ .max_len = 8,
+ },
+ {
+ .name = "rfer",
+ .column_name = "Rfer",
+ .need_print = 1,
+ .unit_mode = UNITS_DEFAULT,
+ .max_len = 12,
+ },
+ {
+ .name = "excl",
+ .column_name = "Excl",
+ .need_print = 1,
+ .unit_mode = UNITS_DEFAULT,
+ .max_len = 12,
+ },
+ { .name = "max_rfer",
+ .column_name = "Max_rfer",
+ .need_print = 0,
+ .unit_mode = UNITS_DEFAULT,
+ .max_len = 12,
+ },
+ {
+ .name = "max_excl",
+ .column_name = "Max_excl",
+ .need_print = 0,
+ .unit_mode = UNITS_DEFAULT,
+ .max_len = 12,
+ },
+ {
+ .name = "parent",
+ .column_name = "Parent",
+ .need_print = 0,
+ .unit_mode = 0,
+ .max_len = 7,
+ },
+ {
+ .name = "child",
+ .column_name = "Child",
+ .need_print = 0,
+ .unit_mode = 0,
+ .max_len = 5,
+ },
+ {
+ .name = NULL,
+ .column_name = NULL,
+ .need_print = 0,
+ .unit_mode = 0,
+ },
+};
+
+static btrfs_qgroup_filter_func all_filter_funcs[];
+static btrfs_qgroup_comp_func all_comp_funcs[];
+
+void btrfs_qgroup_setup_print_column(enum btrfs_qgroup_column_enum column)
+{
+ int i;
+
+ BUG_ON(column < 0 || column > BTRFS_QGROUP_ALL);
+
+ if (column < BTRFS_QGROUP_ALL) {
+ btrfs_qgroup_columns[column].need_print = 1;
+ return;
+ }
+ for (i = 0; i < BTRFS_QGROUP_ALL; i++)
+ btrfs_qgroup_columns[i].need_print = 1;
+}
+
+void btrfs_qgroup_setup_units(unsigned unit_mode)
+{
+ btrfs_qgroup_columns[BTRFS_QGROUP_RFER].unit_mode = unit_mode;
+ btrfs_qgroup_columns[BTRFS_QGROUP_EXCL].unit_mode = unit_mode;
+ btrfs_qgroup_columns[BTRFS_QGROUP_MAX_RFER].unit_mode = unit_mode;
+ btrfs_qgroup_columns[BTRFS_QGROUP_MAX_EXCL].unit_mode = unit_mode;
+}
+
+static int print_parent_column(struct btrfs_qgroup *qgroup)
+{
+ struct btrfs_qgroup_list *list = NULL;
+ int len = 0;
+
+ list_for_each_entry(list, &qgroup->qgroups, next_qgroup) {
+ len += printf("%llu/%llu",
+ btrfs_qgroup_level(list->qgroup->qgroupid),
+ btrfs_qgroup_subvid(list->qgroup->qgroupid));
+ if (!list_is_last(&list->next_qgroup, &qgroup->qgroups))
+ len += printf(",");
+ }
+ if (list_empty(&qgroup->qgroups))
+ len += printf("---");
+
+ return len;
+}
+
+static int print_child_column(struct btrfs_qgroup *qgroup)
+{
+ struct btrfs_qgroup_list *list = NULL;
+ int len = 0;
+
+ list_for_each_entry(list, &qgroup->members, next_member) {
+ len += printf("%llu/%llu",
+ btrfs_qgroup_level(list->member->qgroupid),
+ btrfs_qgroup_subvid(list->member->qgroupid));
+ if (!list_is_last(&list->next_member, &qgroup->members))
+ len += printf(",");
+ }
+ if (list_empty(&qgroup->members))
+ len += printf("---");
+
+ return len;
+}
+
+static void print_qgroup_column_add_blank(enum btrfs_qgroup_column_enum column,
+ int len)
+{
+ len = btrfs_qgroup_columns[column].max_len - len;
+ while (len--)
+ printf(" ");
+}
+
+static void print_qgroup_column(struct btrfs_qgroup *qgroup,
+ enum btrfs_qgroup_column_enum column)
+{
+ BUG_ON(column >= BTRFS_QGROUP_ALL || column < 0);
+ int len;
+ int unit_mode = btrfs_qgroup_columns[column].unit_mode;
+ int max_len = btrfs_qgroup_columns[column].max_len;
+
+ switch (column) {
+
+ case BTRFS_QGROUP_QGROUPID:
+ len = printf("%llu/%llu",
+ btrfs_qgroup_level(qgroup->qgroupid),
+ btrfs_qgroup_subvid(qgroup->qgroupid));
+ print_qgroup_column_add_blank(BTRFS_QGROUP_QGROUPID, len);
+ break;
+ case BTRFS_QGROUP_RFER:
+ len = printf("%*s", max_len, pretty_size_mode(qgroup->rfer, unit_mode));
+ break;
+ case BTRFS_QGROUP_EXCL:
+ len = printf("%*s", max_len, pretty_size_mode(qgroup->excl, unit_mode));
+ break;
+ case BTRFS_QGROUP_PARENT:
+ len = print_parent_column(qgroup);
+ print_qgroup_column_add_blank(BTRFS_QGROUP_PARENT, len);
+ break;
+ case BTRFS_QGROUP_MAX_RFER:
+ if (qgroup->flags & BTRFS_QGROUP_LIMIT_MAX_RFER)
+ len = printf("%*s", max_len, pretty_size_mode(qgroup->max_rfer, unit_mode));
+ else
+ len = printf("%*s", max_len, "none");
+ break;
+ case BTRFS_QGROUP_MAX_EXCL:
+ if (qgroup->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
+ len = printf("%*s", max_len, pretty_size_mode(qgroup->max_excl, unit_mode));
+ else
+ len = printf("%*s", max_len, "none");
+ break;
+ case BTRFS_QGROUP_CHILD:
+ len = print_child_column(qgroup);
+ print_qgroup_column_add_blank(BTRFS_QGROUP_CHILD, len);
+ break;
+ default:
+ break;
+ }
+}
+
+static void print_single_qgroup_table(struct btrfs_qgroup *qgroup)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_QGROUP_ALL; i++) {
+ if (!btrfs_qgroup_columns[i].need_print)
+ continue;
+ print_qgroup_column(qgroup, i);
+
+ if (i != BTRFS_QGROUP_CHILD)
+ printf(" ");
+ }
+ printf("\n");
+}
+
+static void print_table_head(void)
+{
+ int i;
+ int len;
+ int max_len;
+
+ for (i = 0; i < BTRFS_QGROUP_ALL; i++) {
+ max_len = btrfs_qgroup_columns[i].max_len;
+ if (!btrfs_qgroup_columns[i].need_print)
+ continue;
+ if ((i == BTRFS_QGROUP_QGROUPID) | (i == BTRFS_QGROUP_PARENT) |
+ (i == BTRFS_QGROUP_CHILD))
+ printf("%-*s", max_len, btrfs_qgroup_columns[i].name);
+ else
+ printf("%*s", max_len, btrfs_qgroup_columns[i].name);
+ printf(" ");
+ }
+ printf("\n");
+ for (i = 0; i < BTRFS_QGROUP_ALL; i++) {
+ max_len = btrfs_qgroup_columns[i].max_len;
+ if (!btrfs_qgroup_columns[i].need_print)
+ continue;
+ if ((i == BTRFS_QGROUP_QGROUPID) | (i == BTRFS_QGROUP_PARENT) |
+ (i == BTRFS_QGROUP_CHILD)) {
+ len = strlen(btrfs_qgroup_columns[i].name);
+ while (len--)
+ printf("-");
+ len = max_len - strlen(btrfs_qgroup_columns[i].name);
+ while (len--)
+ printf(" ");
+ } else {
+ len = max_len - strlen(btrfs_qgroup_columns[i].name);
+ while (len--)
+ printf(" ");
+ len = strlen(btrfs_qgroup_columns[i].name);
+ while (len--)
+ printf("-");
+ }
+ printf(" ");
+ }
+ printf("\n");
+}
+
+static void qgroup_lookup_init(struct qgroup_lookup *tree)
+{
+ tree->root.rb_node = NULL;
+}
+
+static int comp_entry_with_qgroupid(struct btrfs_qgroup *entry1,
+ struct btrfs_qgroup *entry2,
+ int is_descending)
+{
+
+ int ret;
+
+ if (entry1->qgroupid > entry2->qgroupid)
+ ret = 1;
+ else if (entry1->qgroupid < entry2->qgroupid)
+ ret = -1;
+ else
+ ret = 0;
+
+ return is_descending ? -ret : ret;
+}
+
+static int comp_entry_with_rfer(struct btrfs_qgroup *entry1,
+ struct btrfs_qgroup *entry2,
+ int is_descending)
+{
+ int ret;
+
+ if (entry1->rfer > entry2->rfer)
+ ret = 1;
+ else if (entry1->rfer < entry2->rfer)
+ ret = -1;
+ else
+ ret = 0;
+
+ return is_descending ? -ret : ret;
+}
+
+static int comp_entry_with_excl(struct btrfs_qgroup *entry1,
+ struct btrfs_qgroup *entry2,
+ int is_descending)
+{
+ int ret;
+
+ if (entry1->excl > entry2->excl)
+ ret = 1;
+ else if (entry1->excl < entry2->excl)
+ ret = -1;
+ else
+ ret = 0;
+
+ return is_descending ? -ret : ret;
+}
+
+static int comp_entry_with_max_rfer(struct btrfs_qgroup *entry1,
+ struct btrfs_qgroup *entry2,
+ int is_descending)
+{
+ int ret;
+
+ if (entry1->max_rfer > entry2->max_rfer)
+ ret = 1;
+ else if (entry1->max_rfer < entry2->max_rfer)
+ ret = -1;
+ else
+ ret = 0;
+
+ return is_descending ? -ret : ret;
+}
+
+static int comp_entry_with_max_excl(struct btrfs_qgroup *entry1,
+ struct btrfs_qgroup *entry2,
+ int is_descending)
+{
+ int ret;
+
+ if (entry1->max_excl > entry2->max_excl)
+ ret = 1;
+ else if (entry1->max_excl < entry2->max_excl)
+ ret = -1;
+ else
+ ret = 0;
+
+ return is_descending ? -ret : ret;
+}
+
+static btrfs_qgroup_comp_func all_comp_funcs[] = {
+ [BTRFS_QGROUP_COMP_QGROUPID] = comp_entry_with_qgroupid,
+ [BTRFS_QGROUP_COMP_RFER] = comp_entry_with_rfer,
+ [BTRFS_QGROUP_COMP_EXCL] = comp_entry_with_excl,
+ [BTRFS_QGROUP_COMP_MAX_RFER] = comp_entry_with_max_rfer,
+ [BTRFS_QGROUP_COMP_MAX_EXCL] = comp_entry_with_max_excl
+};
+
+static char *all_sort_items[] = {
+ [BTRFS_QGROUP_COMP_QGROUPID] = "qgroupid",
+ [BTRFS_QGROUP_COMP_RFER] = "rfer",
+ [BTRFS_QGROUP_COMP_EXCL] = "excl",
+ [BTRFS_QGROUP_COMP_MAX_RFER] = "max_rfer",
+ [BTRFS_QGROUP_COMP_MAX_EXCL] = "max_excl",
+ [BTRFS_QGROUP_COMP_MAX] = NULL,
+};
+
+static int btrfs_qgroup_get_sort_item(char *sort_name)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_QGROUP_COMP_MAX; i++) {
+ if (strcmp(sort_name, all_sort_items[i]) == 0)
+ return i;
+ }
+ return -1;
+}
+
+struct btrfs_qgroup_comparer_set *btrfs_qgroup_alloc_comparer_set(void)
+{
+ struct btrfs_qgroup_comparer_set *set;
+ int size;
+ size = sizeof(struct btrfs_qgroup_comparer_set) +
+ BTRFS_QGROUP_NCOMPS_INCREASE *
+ sizeof(struct btrfs_qgroup_comparer);
+ set = calloc(1, size);
+ if (!set) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(1);
+ }
+
+ set->total = BTRFS_QGROUP_NCOMPS_INCREASE;
+
+ return set;
+}
+
+void btrfs_qgroup_free_comparer_set(struct btrfs_qgroup_comparer_set *comp_set)
+{
+ free(comp_set);
+}
+
+int btrfs_qgroup_setup_comparer(struct btrfs_qgroup_comparer_set **comp_set,
+ enum btrfs_qgroup_comp_enum comparer,
+ int is_descending)
+{
+ struct btrfs_qgroup_comparer_set *set = *comp_set;
+ int size;
+
+ BUG_ON(!set);
+ BUG_ON(comparer >= BTRFS_QGROUP_COMP_MAX);
+ BUG_ON(set->ncomps > set->total);
+
+ if (set->ncomps == set->total) {
+ void *tmp;
+
+ size = set->total + BTRFS_QGROUP_NCOMPS_INCREASE;
+ size = sizeof(*set) +
+ size * sizeof(struct btrfs_qgroup_comparer);
+ tmp = set;
+ set = realloc(set, size);
+ if (!set) {
+ fprintf(stderr, "memory allocation failed\n");
+ free(tmp);
+ exit(1);
+ }
+
+ memset(&set->comps[set->total], 0,
+ BTRFS_QGROUP_NCOMPS_INCREASE *
+ sizeof(struct btrfs_qgroup_comparer));
+ set->total += BTRFS_QGROUP_NCOMPS_INCREASE;
+ *comp_set = set;
+ }
+
+ BUG_ON(set->comps[set->ncomps].comp_func);
+
+ set->comps[set->ncomps].comp_func = all_comp_funcs[comparer];
+ set->comps[set->ncomps].is_descending = is_descending;
+ set->ncomps++;
+ return 0;
+}
+
+static int sort_comp(struct btrfs_qgroup *entry1, struct btrfs_qgroup *entry2,
+ struct btrfs_qgroup_comparer_set *set)
+{
+ int qgroupid_compared = 0;
+ int i, ret = 0;
+
+ if (!set || !set->ncomps)
+ goto comp_qgroupid;
+
+ for (i = 0; i < set->ncomps; i++) {
+ if (!set->comps[i].comp_func)
+ break;
+
+ ret = set->comps[i].comp_func(entry1, entry2,
+ set->comps[i].is_descending);
+ if (ret)
+ return ret;
+
+ if (set->comps[i].comp_func == comp_entry_with_qgroupid)
+ qgroupid_compared = 1;
+ }
+
+ if (!qgroupid_compared) {
+comp_qgroupid:
+ ret = comp_entry_with_qgroupid(entry1, entry2, 0);
+ }
+
+ return ret;
+}
+
+/*
+ * insert a new root into the tree. returns the existing root entry
+ * if one is already there. qgroupid is used
+ * as the key
+ */
+static int qgroup_tree_insert(struct qgroup_lookup *root_tree,
+ struct btrfs_qgroup *ins)
+{
+
+ struct rb_node **p = &root_tree->root.rb_node;
+ struct rb_node *parent = NULL;
+ struct btrfs_qgroup *curr;
+ int ret;
+
+ while (*p) {
+ parent = *p;
+ curr = rb_entry(parent, struct btrfs_qgroup, rb_node);
+
+ ret = comp_entry_with_qgroupid(ins, curr, 0);
+ if (ret < 0)
+ p = &(*p)->rb_left;
+ else if (ret > 0)
+ p = &(*p)->rb_right;
+ else
+ return -EEXIST;
+ }
+ rb_link_node(&ins->rb_node, parent, p);
+ rb_insert_color(&ins->rb_node, &root_tree->root);
+ return 0;
+}
+
+/*
+ *find a given qgroupid in the tree. We return the smallest one,
+ *rb_next can be used to move forward looking for more if required
+ */
+static struct btrfs_qgroup *qgroup_tree_search(struct qgroup_lookup *root_tree,
+ u64 qgroupid)
+{
+ struct rb_node *n = root_tree->root.rb_node;
+ struct btrfs_qgroup *entry;
+ struct btrfs_qgroup tmp;
+ int ret;
+
+ tmp.qgroupid = qgroupid;
+
+ while (n) {
+ entry = rb_entry(n, struct btrfs_qgroup, rb_node);
+
+ ret = comp_entry_with_qgroupid(&tmp, entry, 0);
+ if (ret < 0)
+ n = n->rb_left;
+ else if (ret > 0)
+ n = n->rb_right;
+ else
+ return entry;
+
+ }
+ return NULL;
+}
+
+static int update_qgroup(struct qgroup_lookup *qgroup_lookup, u64 qgroupid,
+ u64 generation, u64 rfer, u64 rfer_cmpr, u64 excl,
+ u64 excl_cmpr, u64 flags, u64 max_rfer, u64 max_excl,
+ u64 rsv_rfer, u64 rsv_excl, struct btrfs_qgroup *pa,
+ struct btrfs_qgroup *child)
+{
+ struct btrfs_qgroup *bq;
+ struct btrfs_qgroup_list *list;
+
+ bq = qgroup_tree_search(qgroup_lookup, qgroupid);
+ if (!bq || bq->qgroupid != qgroupid)
+ return -ENOENT;
+
+ if (generation)
+ bq->generation = generation;
+ if (rfer)
+ bq->rfer = rfer;
+ if (rfer_cmpr)
+ bq->rfer_cmpr = rfer_cmpr;
+ if (excl)
+ bq->excl = excl;
+ if (excl_cmpr)
+ bq->excl_cmpr = excl_cmpr;
+ if (flags)
+ bq->flags = flags;
+ if (max_rfer)
+ bq->max_rfer = max_rfer;
+ if (max_excl)
+ bq->max_excl = max_excl;
+ if (rsv_rfer)
+ bq->rsv_rfer = rsv_rfer;
+ if (pa && child) {
+ list = malloc(sizeof(*list));
+ if (!list) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(1);
+ }
+ list->qgroup = pa;
+ list->member = child;
+ list_add_tail(&list->next_qgroup, &child->qgroups);
+ list_add_tail(&list->next_member, &pa->members);
+ }
+ return 0;
+}
+
+static int add_qgroup(struct qgroup_lookup *qgroup_lookup, u64 qgroupid,
+ u64 generation, u64 rfer, u64 rfer_cmpr, u64 excl,
+ u64 excl_cmpr, u64 flags, u64 max_rfer, u64 max_excl,
+ u64 rsv_rfer, u64 rsv_excl, struct btrfs_qgroup *parent,
+ struct btrfs_qgroup *child)
+{
+ struct btrfs_qgroup *bq;
+ struct btrfs_qgroup_list *list;
+ int ret;
+
+ ret = update_qgroup(qgroup_lookup, qgroupid, generation, rfer,
+ rfer_cmpr, excl, excl_cmpr, flags, max_rfer,
+ max_excl, rsv_rfer, rsv_excl, parent, child);
+ if (!ret)
+ return 0;
+
+ bq = calloc(1, sizeof(*bq));
+ if (!bq) {
+ printf("memory allocation failed\n");
+ exit(1);
+ }
+ if (qgroupid) {
+ bq->qgroupid = qgroupid;
+ INIT_LIST_HEAD(&bq->qgroups);
+ INIT_LIST_HEAD(&bq->members);
+ }
+ if (generation)
+ bq->generation = generation;
+ if (rfer)
+ bq->rfer = rfer;
+ if (rfer_cmpr)
+ bq->rfer_cmpr = rfer_cmpr;
+ if (excl)
+ bq->excl = excl;
+ if (excl_cmpr)
+ bq->excl_cmpr = excl_cmpr;
+ if (flags)
+ bq->flags = flags;
+ if (max_rfer)
+ bq->max_rfer = max_rfer;
+ if (max_excl)
+ bq->max_excl = max_excl;
+ if (rsv_rfer)
+ bq->rsv_rfer = rsv_rfer;
+ if (parent && child) {
+ list = malloc(sizeof(*list));
+ if (!list) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(1);
+ }
+ list->qgroup = parent;
+ list->member = child;
+ list_add_tail(&list->next_qgroup, &child->qgroups);
+ list_add_tail(&list->next_member, &parent->members);
+ }
+ ret = qgroup_tree_insert(qgroup_lookup, bq);
+ if (ret) {
+ printf("failed to insert tree %llu\n",
+ bq->qgroupid);
+ exit(1);
+ }
+ return ret;
+}
+
+static void __free_btrfs_qgroup(struct btrfs_qgroup *bq)
+{
+ struct btrfs_qgroup_list *list;
+ while (!list_empty(&bq->qgroups)) {
+ list = list_entry((&bq->qgroups)->next,
+ struct btrfs_qgroup_list,
+ next_qgroup);
+ list_del(&list->next_qgroup);
+ list_del(&list->next_member);
+ free(list);
+ }
+ while (!list_empty(&bq->members)) {
+ list = list_entry((&bq->members)->next,
+ struct btrfs_qgroup_list,
+ next_member);
+ list_del(&list->next_qgroup);
+ list_del(&list->next_member);
+ free(list);
+ }
+ free(bq);
+}
+
+static void __free_all_qgroups(struct qgroup_lookup *root_tree)
+{
+ struct btrfs_qgroup *entry;
+ struct rb_node *n;
+
+ n = rb_first(&root_tree->root);
+ while (n) {
+ entry = rb_entry(n, struct btrfs_qgroup, rb_node);
+ rb_erase(n, &root_tree->root);
+ __free_btrfs_qgroup(entry);
+
+ n = rb_first(&root_tree->root);
+ }
+}
+
+static int filter_all_parent_insert(struct qgroup_lookup *sort_tree,
+ struct btrfs_qgroup *bq)
+{
+ struct rb_node **p = &sort_tree->root.rb_node;
+ struct rb_node *parent = NULL;
+ struct btrfs_qgroup *curr;
+ int ret;
+
+ while (*p) {
+ parent = *p;
+ curr = rb_entry(parent, struct btrfs_qgroup, all_parent_node);
+
+ ret = comp_entry_with_qgroupid(bq, curr, 0);
+ if (ret < 0)
+ p = &(*p)->rb_left;
+ else if (ret > 0)
+ p = &(*p)->rb_right;
+ else
+ return -EEXIST;
+ }
+ rb_link_node(&bq->all_parent_node, parent, p);
+ rb_insert_color(&bq->all_parent_node, &sort_tree->root);
+ return 0;
+}
+
+static int filter_by_parent(struct btrfs_qgroup *bq, u64 data)
+{
+ struct btrfs_qgroup *qgroup =
+ (struct btrfs_qgroup *)(unsigned long)data;
+
+ if (data == 0)
+ return 0;
+ if (qgroup->qgroupid == bq->qgroupid)
+ return 1;
+ return 0;
+}
+
+static int filter_by_all_parent(struct btrfs_qgroup *bq, u64 data)
+{
+ struct qgroup_lookup lookup;
+ struct qgroup_lookup *ql = &lookup;
+ struct btrfs_qgroup_list *list;
+ struct rb_node *n;
+ struct btrfs_qgroup *qgroup =
+ (struct btrfs_qgroup *)(unsigned long)data;
+
+ if (data == 0)
+ return 0;
+ if (bq->qgroupid == qgroup->qgroupid)
+ return 1;
+
+ qgroup_lookup_init(ql);
+ filter_all_parent_insert(ql, qgroup);
+ n = rb_first(&ql->root);
+ while (n) {
+ qgroup = rb_entry(n, struct btrfs_qgroup, all_parent_node);
+ if (!list_empty(&qgroup->qgroups)) {
+ list_for_each_entry(list, &qgroup->qgroups,
+ next_qgroup) {
+ if ((list->qgroup)->qgroupid == bq->qgroupid)
+ return 1;
+ filter_all_parent_insert(ql, list->qgroup);
+ }
+ }
+ rb_erase(n, &ql->root);
+ n = rb_first(&ql->root);
+ }
+ return 0;
+}
+
+static btrfs_qgroup_filter_func all_filter_funcs[] = {
+ [BTRFS_QGROUP_FILTER_PARENT] = filter_by_parent,
+ [BTRFS_QGROUP_FILTER_ALL_PARENT] = filter_by_all_parent,
+};
+
+struct btrfs_qgroup_filter_set *btrfs_qgroup_alloc_filter_set(void)
+{
+ struct btrfs_qgroup_filter_set *set;
+ int size;
+
+ size = sizeof(struct btrfs_qgroup_filter_set) +
+ BTRFS_QGROUP_NFILTERS_INCREASE *
+ sizeof(struct btrfs_qgroup_filter);
+ set = calloc(1, size);
+ if (!set) {
+ fprintf(stderr, "memory allocation failed\n");
+ exit(1);
+ }
+ set->total = BTRFS_QGROUP_NFILTERS_INCREASE;
+
+ return set;
+}
+
+void btrfs_qgroup_free_filter_set(struct btrfs_qgroup_filter_set *filter_set)
+{
+ free(filter_set);
+}
+
+int btrfs_qgroup_setup_filter(struct btrfs_qgroup_filter_set **filter_set,
+ enum btrfs_qgroup_filter_enum filter, u64 data)
+{
+ struct btrfs_qgroup_filter_set *set = *filter_set;
+ int size;
+
+ BUG_ON(!set);
+ BUG_ON(filter >= BTRFS_QGROUP_FILTER_MAX);
+ BUG_ON(set->nfilters > set->total);
+
+ if (set->nfilters == set->total) {
+ void *tmp;
+
+ size = set->total + BTRFS_QGROUP_NFILTERS_INCREASE;
+ size = sizeof(*set) + size * sizeof(struct btrfs_qgroup_filter);
+
+ tmp = set;
+ set = realloc(set, size);
+ if (!set) {
+ fprintf(stderr, "memory allocation failed\n");
+ free(tmp);
+ exit(1);
+ }
+ memset(&set->filters[set->total], 0,
+ BTRFS_QGROUP_NFILTERS_INCREASE *
+ sizeof(struct btrfs_qgroup_filter));
+ set->total += BTRFS_QGROUP_NFILTERS_INCREASE;
+ *filter_set = set;
+ }
+ BUG_ON(set->filters[set->nfilters].filter_func);
+ set->filters[set->nfilters].filter_func = all_filter_funcs[filter];
+ set->filters[set->nfilters].data = data;
+ set->nfilters++;
+ return 0;
+}
+
+static int filter_qgroup(struct btrfs_qgroup *bq,
+ struct btrfs_qgroup_filter_set *set)
+{
+ int i, ret;
+
+ if (!set || !set->nfilters)
+ return 1;
+ for (i = 0; i < set->nfilters; i++) {
+ if (!set->filters[i].filter_func)
+ break;
+ ret = set->filters[i].filter_func(bq, set->filters[i].data);
+ if (!ret)
+ return 0;
+ }
+ return 1;
+}
+
+static void pre_process_filter_set(struct qgroup_lookup *lookup,
+ struct btrfs_qgroup_filter_set *set)
+{
+ int i;
+ struct btrfs_qgroup *qgroup_for_filter = NULL;
+
+ for (i = 0; i < set->nfilters; i++) {
+
+ if (set->filters[i].filter_func == filter_by_all_parent
+ || set->filters[i].filter_func == filter_by_parent) {
+ qgroup_for_filter = qgroup_tree_search(lookup,
+ set->filters[i].data);
+ set->filters[i].data =
+ (u64)(unsigned long)qgroup_for_filter;
+ }
+ }
+}
+
+static int sort_tree_insert(struct qgroup_lookup *sort_tree,
+ struct btrfs_qgroup *bq,
+ struct btrfs_qgroup_comparer_set *comp_set)
+{
+ struct rb_node **p = &sort_tree->root.rb_node;
+ struct rb_node *parent = NULL;
+ struct btrfs_qgroup *curr;
+ int ret;
+
+ while (*p) {
+ parent = *p;
+ curr = rb_entry(parent, struct btrfs_qgroup, sort_node);
+
+ ret = sort_comp(bq, curr, comp_set);
+ if (ret < 0)
+ p = &(*p)->rb_left;
+ else if (ret > 0)
+ p = &(*p)->rb_right;
+ else
+ return -EEXIST;
+ }
+ rb_link_node(&bq->sort_node, parent, p);
+ rb_insert_color(&bq->sort_node, &sort_tree->root);
+ return 0;
+}
+
+static void __update_columns_max_len(struct btrfs_qgroup *bq,
+ enum btrfs_qgroup_column_enum column)
+{
+ BUG_ON(column >= BTRFS_QGROUP_ALL || column < 0);
+ struct btrfs_qgroup_list *list = NULL;
+ char tmp[100];
+ int len;
+ unsigned unit_mode = btrfs_qgroup_columns[column].unit_mode;
+
+ switch (column) {
+
+ case BTRFS_QGROUP_QGROUPID:
+ sprintf(tmp, "%llu/%llu",
+ btrfs_qgroup_level(bq->qgroupid),
+ btrfs_qgroup_subvid(bq->qgroupid));
+ len = strlen(tmp);
+ if (btrfs_qgroup_columns[column].max_len < len)
+ btrfs_qgroup_columns[column].max_len = len;
+ break;
+ case BTRFS_QGROUP_RFER:
+ len = strlen(pretty_size_mode(bq->rfer, unit_mode));
+ if (btrfs_qgroup_columns[column].max_len < len)
+ btrfs_qgroup_columns[column].max_len = len;
+ break;
+ case BTRFS_QGROUP_EXCL:
+ len = strlen(pretty_size_mode(bq->excl, unit_mode));
+ if (btrfs_qgroup_columns[column].max_len < len)
+ btrfs_qgroup_columns[column].max_len = len;
+ break;
+ case BTRFS_QGROUP_MAX_RFER:
+ len = strlen(pretty_size_mode(bq->max_rfer, unit_mode));
+ if (btrfs_qgroup_columns[column].max_len < len)
+ btrfs_qgroup_columns[column].max_len = len;
+ break;
+ case BTRFS_QGROUP_MAX_EXCL:
+ len = strlen(pretty_size_mode(bq->max_excl, unit_mode));
+ if (btrfs_qgroup_columns[column].max_len < len)
+ btrfs_qgroup_columns[column].max_len = len;
+ break;
+ case BTRFS_QGROUP_PARENT:
+ len = 0;
+ list_for_each_entry(list, &bq->qgroups, next_qgroup) {
+ len += sprintf(tmp, "%llu/%llu",
+ btrfs_qgroup_level(list->qgroup->qgroupid),
+ btrfs_qgroup_subvid(list->qgroup->qgroupid));
+ if (!list_is_last(&list->next_qgroup, &bq->qgroups))
+ len += 1;
+ }
+ if (btrfs_qgroup_columns[column].max_len < len)
+ btrfs_qgroup_columns[column].max_len = len;
+ break;
+ case BTRFS_QGROUP_CHILD:
+ len = 0;
+ list_for_each_entry(list, &bq->members, next_member) {
+ len += sprintf(tmp, "%llu/%llu",
+ btrfs_qgroup_level(list->member->qgroupid),
+ btrfs_qgroup_subvid(list->member->qgroupid));
+ if (!list_is_last(&list->next_member, &bq->members))
+ len += 1;
+ }
+ if (btrfs_qgroup_columns[column].max_len < len)
+ btrfs_qgroup_columns[column].max_len = len;
+ break;
+ default:
+ break;
+ }
+
+}
+
+static void update_columns_max_len(struct btrfs_qgroup *bq)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_QGROUP_ALL; i++) {
+ if (!btrfs_qgroup_columns[i].need_print)
+ continue;
+ __update_columns_max_len(bq, i);
+ }
+}
+
+static void __filter_and_sort_qgroups(struct qgroup_lookup *all_qgroups,
+ struct qgroup_lookup *sort_tree,
+ struct btrfs_qgroup_filter_set *filter_set,
+ struct btrfs_qgroup_comparer_set *comp_set)
+{
+ struct rb_node *n;
+ struct btrfs_qgroup *entry;
+ int ret;
+
+ qgroup_lookup_init(sort_tree);
+ pre_process_filter_set(all_qgroups, filter_set);
+
+ n = rb_last(&all_qgroups->root);
+ while (n) {
+ entry = rb_entry(n, struct btrfs_qgroup, rb_node);
+
+ ret = filter_qgroup(entry, filter_set);
+ if (ret) {
+ sort_tree_insert(sort_tree, entry, comp_set);
+
+ update_columns_max_len(entry);
+ }
+ n = rb_prev(n);
+ }
+}
+
+static inline void print_status_flag_warning(u64 flags)
+{
+ if (!(flags & BTRFS_QGROUP_STATUS_FLAG_ON))
+ fprintf(stderr,
+ "WARNING: Quota disabled, qgroup data may be out of date\n");
+ else if (flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
+ fprintf(stderr,
+ "WARNING: Rescan is running, qgroup data may be incorrect\n");
+ else if (flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)
+ fprintf(stderr,
+ "WARNING: Qgroup data inconsistent, rescan recommended\n");
+}
+
+static int __qgroups_search(int fd, struct qgroup_lookup *qgroup_lookup)
+{
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ struct btrfs_ioctl_search_header *sh;
+ unsigned long off = 0;
+ unsigned int i;
+ struct btrfs_qgroup_info_item *info;
+ struct btrfs_qgroup_limit_item *limit;
+ struct btrfs_qgroup *bq;
+ struct btrfs_qgroup *bq1;
+ u64 a1;
+ u64 a2;
+ u64 a3;
+ u64 a4;
+ u64 a5;
+
+ memset(&args, 0, sizeof(args));
+
+ sk->tree_id = BTRFS_QUOTA_TREE_OBJECTID;
+ sk->max_type = BTRFS_QGROUP_RELATION_KEY;
+ sk->min_type = BTRFS_QGROUP_STATUS_KEY;
+ sk->max_objectid = (u64)-1;
+ sk->max_offset = (u64)-1;
+ sk->max_transid = (u64)-1;
+ sk->nr_items = 4096;
+
+ qgroup_lookup_init(qgroup_lookup);
+
+ while (1) {
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: can't perform the search - %s\n",
+ strerror(errno));
+ return ret;
+ }
+ /* the ioctl returns the number of item it found in nr_items */
+ if (sk->nr_items == 0)
+ break;
+
+ off = 0;
+ /*
+ * for each item, pull the key out of the header and then
+ * read the root_ref item it contains
+ */
+ for (i = 0; i < sk->nr_items; i++) {
+ sh = (struct btrfs_ioctl_search_header *)(args.buf +
+ off);
+ off += sizeof(*sh);
+
+ if (sh->type == BTRFS_QGROUP_STATUS_KEY) {
+ struct btrfs_qgroup_status_item *si;
+ u64 flags;
+
+ si = (struct btrfs_qgroup_status_item *)
+ (args.buf + off);
+ flags = btrfs_stack_qgroup_status_flags(si);
+ print_status_flag_warning(flags);
+ } else if (sh->type == BTRFS_QGROUP_INFO_KEY) {
+ info = (struct btrfs_qgroup_info_item *)
+ (args.buf + off);
+ a1 = btrfs_stack_qgroup_info_generation(info);
+ a2 = btrfs_stack_qgroup_info_referenced(info);
+ a3 =
+ btrfs_stack_qgroup_info_referenced_compressed
+ (info);
+ a4 = btrfs_stack_qgroup_info_exclusive(info);
+ a5 =
+ btrfs_stack_qgroup_info_exclusive_compressed
+ (info);
+ add_qgroup(qgroup_lookup, sh->offset, a1, a2,
+ a3, a4, a5, 0, 0, 0, 0, 0,
+ NULL, NULL);
+ } else if (sh->type == BTRFS_QGROUP_LIMIT_KEY) {
+ limit = (struct btrfs_qgroup_limit_item *)
+ (args.buf + off);
+
+ a1 = btrfs_stack_qgroup_limit_flags(limit);
+ a2 = btrfs_stack_qgroup_limit_max_referenced
+ (limit);
+ a3 = btrfs_stack_qgroup_limit_max_exclusive
+ (limit);
+ a4 = btrfs_stack_qgroup_limit_rsv_referenced
+ (limit);
+ a5 = btrfs_stack_qgroup_limit_rsv_exclusive
+ (limit);
+ add_qgroup(qgroup_lookup, sh->offset, 0, 0,
+ 0, 0, 0, a1, a2, a3, a4, a5,
+ NULL, NULL);
+ } else if (sh->type == BTRFS_QGROUP_RELATION_KEY) {
+ if (sh->offset < sh->objectid)
+ goto skip;
+ bq = qgroup_tree_search(qgroup_lookup,
+ sh->offset);
+ if (!bq)
+ goto skip;
+ bq1 = qgroup_tree_search(qgroup_lookup,
+ sh->objectid);
+ if (!bq1)
+ goto skip;
+ add_qgroup(qgroup_lookup, sh->offset, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, bq, bq1);
+ } else
+ goto done;
+skip:
+ off += sh->len;
+
+ /*
+ * record the mins in sk so we can make sure the
+ * next search doesn't repeat this root
+ */
+ sk->min_type = sh->type;
+ sk->min_offset = sh->offset;
+ sk->min_objectid = sh->objectid;
+ }
+ sk->nr_items = 4096;
+ /*
+ * this iteration is done, step forward one qgroup for the next
+ * ioctl
+ */
+ if (sk->min_offset < (u64)-1)
+ sk->min_offset++;
+ else
+ break;
+ }
+
+done:
+ return ret;
+}
+
+static void print_all_qgroups(struct qgroup_lookup *qgroup_lookup)
+{
+
+ struct rb_node *n;
+ struct btrfs_qgroup *entry;
+
+ print_table_head();
+
+ n = rb_first(&qgroup_lookup->root);
+ while (n) {
+ entry = rb_entry(n, struct btrfs_qgroup, sort_node);
+ print_single_qgroup_table(entry);
+ n = rb_next(n);
+ }
+}
+
+int btrfs_show_qgroups(int fd,
+ struct btrfs_qgroup_filter_set *filter_set,
+ struct btrfs_qgroup_comparer_set *comp_set)
+{
+
+ struct qgroup_lookup qgroup_lookup;
+ struct qgroup_lookup sort_tree;
+ int ret;
+
+ ret = __qgroups_search(fd, &qgroup_lookup);
+ if (ret)
+ return ret;
+ __filter_and_sort_qgroups(&qgroup_lookup, &sort_tree,
+ filter_set, comp_set);
+ print_all_qgroups(&sort_tree);
+
+ __free_all_qgroups(&qgroup_lookup);
+ btrfs_qgroup_free_filter_set(filter_set);
+ btrfs_qgroup_free_comparer_set(comp_set);
+ return ret;
+}
+
+u64 btrfs_get_path_rootid(int fd)
+{
+ int ret;
+ struct btrfs_ioctl_ino_lookup_args args;
+
+ memset(&args, 0, sizeof(args));
+ args.objectid = BTRFS_FIRST_FREE_OBJECTID;
+
+ ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
+ if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: can't perform the search - %s\n",
+ strerror(errno));
+ return ret;
+ }
+ return args.treeid;
+}
+
+int btrfs_qgroup_parse_sort_string(char *opt_arg,
+ struct btrfs_qgroup_comparer_set **comps)
+{
+ int order;
+ int flag;
+ char *p;
+ char **ptr_argv;
+ int what_to_sort;
+
+ while ((p = strtok(opt_arg, ",")) != NULL) {
+ flag = 0;
+ ptr_argv = all_sort_items;
+
+ while (*ptr_argv) {
+ if (strcmp(*ptr_argv, p) == 0) {
+ flag = 1;
+ break;
+ } else {
+ p++;
+ if (strcmp(*ptr_argv, p) == 0) {
+ flag = 1;
+ p--;
+ break;
+ }
+ p--;
+ }
+ ptr_argv++;
+ }
+
+ if (flag == 0)
+ return -1;
+
+ else {
+ if (*p == '+') {
+ order = 0;
+ p++;
+ } else if (*p == '-') {
+ order = 1;
+ p++;
+ } else
+ order = 0;
+
+ what_to_sort = btrfs_qgroup_get_sort_item(p);
+ if (what_to_sort < 0)
+ return -1;
+ btrfs_qgroup_setup_comparer(comps, what_to_sort, order);
+ }
+ opt_arg = NULL;
+ }
+
+ return 0;
+}
+
+int qgroup_inherit_size(struct btrfs_qgroup_inherit *p)
+{
+ return sizeof(*p) + sizeof(p->qgroups[0]) *
+ (p->num_qgroups + 2 * p->num_ref_copies +
+ 2 * p->num_excl_copies);
+}
+
+static int
+qgroup_inherit_realloc(struct btrfs_qgroup_inherit **inherit, int n, int pos)
+{
+ struct btrfs_qgroup_inherit *out;
+ int nitems = 0;
+
+ if (*inherit) {
+ nitems = (*inherit)->num_qgroups +
+ (*inherit)->num_ref_copies +
+ (*inherit)->num_excl_copies;
+ }
+
+ out = calloc(sizeof(*out) + sizeof(out->qgroups[0]) * (nitems + n), 1);
+ if (out == NULL) {
+ fprintf(stderr, "ERROR: Not enough memory\n");
+ return -ENOMEM;
+ }
+
+ if (*inherit) {
+ struct btrfs_qgroup_inherit *i = *inherit;
+ int s = sizeof(out->qgroups[0]);
+
+ out->num_qgroups = i->num_qgroups;
+ out->num_ref_copies = i->num_ref_copies;
+ out->num_excl_copies = i->num_excl_copies;
+ memcpy(out->qgroups, i->qgroups, pos * s);
+ memcpy(out->qgroups + pos + n, i->qgroups + pos,
+ (nitems - pos) * s);
+ }
+ free(*inherit);
+ *inherit = out;
+
+ return 0;
+}
+
+int qgroup_inherit_add_group(struct btrfs_qgroup_inherit **inherit, char *arg)
+{
+ int ret;
+ u64 qgroupid = parse_qgroupid(arg);
+ int pos = 0;
+
+ if (qgroupid == 0) {
+ fprintf(stderr, "ERROR: bad qgroup specification\n");
+ return -EINVAL;
+ }
+
+ if (*inherit)
+ pos = (*inherit)->num_qgroups;
+ ret = qgroup_inherit_realloc(inherit, 1, pos);
+ if (ret)
+ return ret;
+
+ (*inherit)->qgroups[(*inherit)->num_qgroups++] = qgroupid;
+
+ return 0;
+}
+
+int qgroup_inherit_add_copy(struct btrfs_qgroup_inherit **inherit, char *arg,
+ int type)
+{
+ int ret;
+ u64 qgroup_src;
+ u64 qgroup_dst;
+ char *p;
+ int pos = 0;
+
+ p = strchr(arg, ':');
+ if (!p) {
+bad:
+ fprintf(stderr, "ERROR: bad copy specification\n");
+ return -EINVAL;
+ }
+ *p = 0;
+ qgroup_src = parse_qgroupid(arg);
+ qgroup_dst = parse_qgroupid(p + 1);
+ *p = ':';
+
+ if (!qgroup_src || !qgroup_dst)
+ goto bad;
+
+ if (*inherit)
+ pos = (*inherit)->num_qgroups +
+ (*inherit)->num_ref_copies * 2 * type;
+
+ ret = qgroup_inherit_realloc(inherit, 2, pos);
+ if (ret)
+ return ret;
+
+ (*inherit)->qgroups[pos++] = qgroup_src;
+ (*inherit)->qgroups[pos++] = qgroup_dst;
+
+ if (!type)
+ ++(*inherit)->num_ref_copies;
+ else
+ ++(*inherit)->num_excl_copies;
+
+ return 0;
+}
diff --git a/qgroup.h b/qgroup.h
new file mode 100644
index 00000000..5886fc33
--- /dev/null
+++ b/qgroup.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2012 STRATO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_QGROUP_H__
+#define __BTRFS_QGROUP_H__
+
+#include "kerncompat.h"
+#include "ioctl.h"
+
+struct btrfs_qgroup;
+
+typedef int (*btrfs_qgroup_filter_func)(struct btrfs_qgroup *, u64);
+typedef int (*btrfs_qgroup_comp_func)(struct btrfs_qgroup *,
+ struct btrfs_qgroup *, int);
+
+
+struct btrfs_qgroup_filter {
+ btrfs_qgroup_filter_func filter_func;
+ u64 data;
+};
+
+struct btrfs_qgroup_comparer {
+ btrfs_qgroup_comp_func comp_func;
+ int is_descending;
+};
+
+struct btrfs_qgroup_filter_set {
+ int total;
+ int nfilters;
+ struct btrfs_qgroup_filter filters[0];
+};
+
+struct btrfs_qgroup_comparer_set {
+ int total;
+ int ncomps;
+ struct btrfs_qgroup_comparer comps[0];
+};
+
+enum btrfs_qgroup_column_enum {
+ BTRFS_QGROUP_QGROUPID,
+ BTRFS_QGROUP_RFER,
+ BTRFS_QGROUP_EXCL,
+ BTRFS_QGROUP_MAX_RFER,
+ BTRFS_QGROUP_MAX_EXCL,
+ BTRFS_QGROUP_PARENT,
+ BTRFS_QGROUP_CHILD,
+ BTRFS_QGROUP_ALL,
+};
+
+enum btrfs_qgroup_comp_enum {
+ BTRFS_QGROUP_COMP_QGROUPID,
+ BTRFS_QGROUP_COMP_RFER,
+ BTRFS_QGROUP_COMP_EXCL,
+ BTRFS_QGROUP_COMP_MAX_RFER,
+ BTRFS_QGROUP_COMP_MAX_EXCL,
+ BTRFS_QGROUP_COMP_MAX
+};
+
+enum btrfs_qgroup_filter_enum {
+ BTRFS_QGROUP_FILTER_PARENT,
+ BTRFS_QGROUP_FILTER_ALL_PARENT,
+ BTRFS_QGROUP_FILTER_MAX,
+};
+
+int btrfs_qgroup_parse_sort_string(char *opt_arg,
+ struct btrfs_qgroup_comparer_set **comps);
+u64 btrfs_get_path_rootid(int fd);
+int btrfs_show_qgroups(int fd, struct btrfs_qgroup_filter_set *,
+ struct btrfs_qgroup_comparer_set *);
+void btrfs_qgroup_setup_print_column(enum btrfs_qgroup_column_enum column);
+void btrfs_qgroup_setup_units(unsigned unit_mode);
+struct btrfs_qgroup_filter_set *btrfs_qgroup_alloc_filter_set(void);
+void btrfs_qgroup_free_filter_set(struct btrfs_qgroup_filter_set *filter_set);
+int btrfs_qgroup_setup_filter(struct btrfs_qgroup_filter_set **filter_set,
+ enum btrfs_qgroup_filter_enum, u64 data);
+struct btrfs_qgroup_comparer_set *btrfs_qgroup_alloc_comparer_set(void);
+void btrfs_qgroup_free_comparer_set(struct btrfs_qgroup_comparer_set *comp_set);
+int btrfs_qgroup_setup_comparer(struct btrfs_qgroup_comparer_set **comp_set,
+ enum btrfs_qgroup_comp_enum comparer,
+ int is_descending);
+int qgroup_inherit_size(struct btrfs_qgroup_inherit *p);
+int qgroup_inherit_add_group(struct btrfs_qgroup_inherit **inherit, char *arg);
+int qgroup_inherit_add_copy(struct btrfs_qgroup_inherit **inherit, char *arg,
+ int type);
+
+#endif
diff --git a/quick-test.c b/quick-test.c
new file mode 100644
index 00000000..ffde85d9
--- /dev/null
+++ b/quick-test.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include "kerncompat.h"
+#include "radix-tree.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+
+/* for testing only */
+static int next_key(int i, int max_key) {
+ return rand() % max_key;
+ // return i;
+}
+
+int main(int ac, char **av) {
+ struct btrfs_key ins;
+ struct btrfs_key last = { (u64)-1, 0, 0};
+ char *buf;
+ int i;
+ int num;
+ int ret;
+ int run_size = 300000;
+ int max_key = 100000000;
+ int tree_size = 2;
+ struct btrfs_path path;
+ struct btrfs_root *root;
+ struct btrfs_trans_handle *trans;
+
+ buf = calloc(1, 512);
+
+ radix_tree_init();
+
+ root = open_ctree(av[1], BTRFS_SUPER_INFO_OFFSET, OPEN_CTREE_WRITES);
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ exit(1);
+ }
+ trans = btrfs_start_transaction(root, 1);
+ srand(55);
+ btrfs_set_key_type(&ins, BTRFS_STRING_ITEM_KEY);
+ for (i = 0; i < run_size; i++) {
+ num = next_key(i, max_key);
+ // num = i;
+ sprintf(buf, "string-%d", num);
+ if (i % 10000 == 0)
+ fprintf(stderr, "insert %d:%d\n", num, i);
+ ins.objectid = num;
+ ins.offset = 0;
+ ret = btrfs_insert_item(trans, root, &ins, buf, 512);
+ if (!ret)
+ tree_size++;
+ if (i == run_size - 5) {
+ btrfs_commit_transaction(trans, root);
+ trans = btrfs_start_transaction(root, 1);
+ }
+ }
+ btrfs_commit_transaction(trans, root);
+ close_ctree(root);
+ exit(1);
+ root = open_ctree(av[1], BTRFS_SUPER_INFO_OFFSET, OPEN_CTREE_WRITES);
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ exit(1);
+ }
+ printf("starting search\n");
+ srand(55);
+ for (i = 0; i < run_size; i++) {
+ num = next_key(i, max_key);
+ ins.objectid = num;
+ btrfs_init_path(&path);
+ if (i % 10000 == 0)
+ fprintf(stderr, "search %d:%d\n", num, i);
+ ret = btrfs_search_slot(NULL, root, &ins, &path, 0, 0);
+ if (ret) {
+ btrfs_print_tree(root, root->node, 1);
+ printf("unable to find %d\n", num);
+ exit(1);
+ }
+ btrfs_release_path(&path);
+ }
+ close_ctree(root);
+
+ root = open_ctree(av[1], BTRFS_SUPER_INFO_OFFSET, OPEN_CTREE_WRITES);
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ exit(1);
+ }
+ printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
+ btrfs_header_level(root->node),
+ btrfs_header_nritems(root->node),
+ (unsigned long)BTRFS_NODEPTRS_PER_BLOCK(root) -
+ btrfs_header_nritems(root->node));
+ printf("all searches good, deleting some items\n");
+ i = 0;
+ srand(55);
+ trans = btrfs_start_transaction(root, 1);
+ for (i = 0 ; i < run_size/4; i++) {
+ num = next_key(i, max_key);
+ ins.objectid = num;
+ btrfs_init_path(&path);
+ ret = btrfs_search_slot(trans, root, &ins, &path, -1, 1);
+ if (!ret) {
+ if (i % 10000 == 0)
+ fprintf(stderr, "del %d:%d\n", num, i);
+ ret = btrfs_del_item(trans, root, &path);
+ if (ret != 0)
+ BUG();
+ tree_size--;
+ }
+ btrfs_release_path(&path);
+ }
+ btrfs_commit_transaction(trans, root);
+ close_ctree(root);
+
+ root = open_ctree(av[1], BTRFS_SUPER_INFO_OFFSET, OPEN_CTREE_WRITES);
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ exit(1);
+ }
+ trans = btrfs_start_transaction(root, 1);
+ srand(128);
+ for (i = 0; i < run_size; i++) {
+ num = next_key(i, max_key);
+ sprintf(buf, "string-%d", num);
+ ins.objectid = num;
+ if (i % 10000 == 0)
+ fprintf(stderr, "insert %d:%d\n", num, i);
+ ret = btrfs_insert_item(trans, root, &ins, buf, 512);
+ if (!ret)
+ tree_size++;
+ }
+ btrfs_commit_transaction(trans, root);
+ close_ctree(root);
+
+ root = open_ctree(av[1], BTRFS_SUPER_INFO_OFFSET, OPEN_CTREE_WRITES);
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ exit(1);
+ }
+ srand(128);
+ printf("starting search2\n");
+ for (i = 0; i < run_size; i++) {
+ num = next_key(i, max_key);
+ ins.objectid = num;
+ btrfs_init_path(&path);
+ if (i % 10000 == 0)
+ fprintf(stderr, "search %d:%d\n", num, i);
+ ret = btrfs_search_slot(NULL, root, &ins, &path, 0, 0);
+ if (ret) {
+ btrfs_print_tree(root, root->node, 1);
+ printf("unable to find %d\n", num);
+ exit(1);
+ }
+ btrfs_release_path(&path);
+ }
+ printf("starting big long delete run\n");
+ trans = btrfs_start_transaction(root, 1);
+ while(root->node && btrfs_header_nritems(root->node) > 0) {
+ struct extent_buffer *leaf;
+ int slot;
+ ins.objectid = (u64)-1;
+ btrfs_init_path(&path);
+ ret = btrfs_search_slot(trans, root, &ins, &path, -1, 1);
+ if (ret == 0)
+ BUG();
+
+ leaf = path.nodes[0];
+ slot = path.slots[0];
+ if (slot != btrfs_header_nritems(leaf))
+ BUG();
+ while(path.slots[0] > 0) {
+ path.slots[0] -= 1;
+ slot = path.slots[0];
+ leaf = path.nodes[0];
+
+ btrfs_item_key_to_cpu(leaf, &last, slot);
+
+ if (tree_size % 10000 == 0)
+ printf("big del %d:%d\n", tree_size, i);
+ ret = btrfs_del_item(trans, root, &path);
+ if (ret != 0) {
+ printf("del_item returned %d\n", ret);
+ BUG();
+ }
+ tree_size--;
+ }
+ btrfs_release_path(&path);
+ }
+ /*
+ printf("previous tree:\n");
+ btrfs_print_tree(root, root->commit_root);
+ printf("map before commit\n");
+ btrfs_print_tree(root->extent_root, root->extent_root->node);
+ */
+ btrfs_commit_transaction(trans, root);
+ printf("tree size is now %d\n", tree_size);
+ printf("root %p commit root %p\n", root->node, root->commit_root);
+ btrfs_print_tree(root, root->node, 1);
+ close_ctree(root);
+ return 0;
+}
diff --git a/radix-tree.c b/radix-tree.c
new file mode 100644
index 00000000..f259ab56
--- /dev/null
+++ b/radix-tree.c
@@ -0,0 +1,849 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+/*
+ * Copyright (C) 2001 Momchil Velikov
+ * Portions Copyright (C) 2001 Christoph Hellwig
+ * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "kerncompat.h"
+#include "radix-tree.h"
+#ifdef __KERNEL__
+#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
+#else
+#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
+#endif
+
+#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
+#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
+
+#define RADIX_TREE_TAG_LONGS \
+ ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
+
+struct radix_tree_node {
+ unsigned int count;
+ void *slots[RADIX_TREE_MAP_SIZE];
+ unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
+};
+
+struct radix_tree_path {
+ struct radix_tree_node *node;
+ int offset;
+};
+
+#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
+#define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2)
+
+static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly;
+
+/*
+ * Per-cpu pool of preloaded nodes
+ */
+struct radix_tree_preload {
+ int nr;
+ struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
+};
+static struct radix_tree_preload radix_tree_preloads = { 0, };
+
+static int internal_nodes = 0;
+/*
+ * This assumes that the caller has performed appropriate preallocation, and
+ * that the caller has pinned this thread of control to the current CPU.
+ */
+static struct radix_tree_node *
+radix_tree_node_alloc(struct radix_tree_root *root)
+{
+ struct radix_tree_node *ret;
+ ret = malloc(sizeof(struct radix_tree_node));
+ if (ret) {
+ memset(ret, 0, sizeof(struct radix_tree_node));
+ internal_nodes++;
+ }
+ return ret;
+}
+
+static inline void
+radix_tree_node_free(struct radix_tree_node *node)
+{
+ internal_nodes--;
+ free(node);
+}
+
+/*
+ * Load up this CPU's radix_tree_node buffer with sufficient objects to
+ * ensure that the addition of a single element in the tree cannot fail. On
+ * success, return zero, with preemption disabled. On error, return -ENOMEM
+ * with preemption not disabled.
+ */
+int radix_tree_preload(gfp_t gfp_mask)
+{
+ struct radix_tree_preload *rtp;
+ struct radix_tree_node *node;
+ int ret = -ENOMEM;
+
+ preempt_disable();
+ rtp = &__get_cpu_var(radix_tree_preloads);
+ while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
+ preempt_enable();
+ node = radix_tree_node_alloc(NULL);
+ if (node == NULL)
+ goto out;
+ preempt_disable();
+ rtp = &__get_cpu_var(radix_tree_preloads);
+ if (rtp->nr < ARRAY_SIZE(rtp->nodes))
+ rtp->nodes[rtp->nr++] = node;
+ else
+ radix_tree_node_free(node);
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
+ int offset)
+{
+ __set_bit(offset, node->tags[tag]);
+}
+
+static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
+ int offset)
+{
+ __clear_bit(offset, node->tags[tag]);
+}
+
+static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
+ int offset)
+{
+ return test_bit(offset, node->tags[tag]);
+}
+
+static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
+{
+ root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
+}
+
+
+static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
+{
+ root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
+}
+
+static inline void root_tag_clear_all(struct radix_tree_root *root)
+{
+ root->gfp_mask &= __GFP_BITS_MASK;
+}
+
+static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
+{
+ return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
+}
+
+/*
+ * Returns 1 if any slot in the node has this tag set.
+ * Otherwise returns 0.
+ */
+static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
+{
+ int idx;
+ for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
+ if (node->tags[tag][idx])
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Return the maximum key which can be store into a
+ * radix tree with height HEIGHT.
+ */
+static inline unsigned long radix_tree_maxindex(unsigned int height)
+{
+ return height_to_maxindex[height];
+}
+
+/*
+ * Extend a radix tree so it can store key @index.
+ */
+static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
+{
+ struct radix_tree_node *node;
+ unsigned int height;
+ int tag;
+
+ /* Figure out what the height should be. */
+ height = root->height + 1;
+ while (index > radix_tree_maxindex(height))
+ height++;
+
+ if (root->rnode == NULL) {
+ root->height = height;
+ goto out;
+ }
+
+ do {
+ if (!(node = radix_tree_node_alloc(root)))
+ return -ENOMEM;
+
+ /* Increase the height. */
+ node->slots[0] = root->rnode;
+
+ /* Propagate the aggregated tag info into the new root */
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+ if (root_tag_get(root, tag))
+ tag_set(node, tag, 0);
+ }
+
+ node->count = 1;
+ root->rnode = node;
+ root->height++;
+ } while (height > root->height);
+out:
+ return 0;
+}
+
+/**
+ * radix_tree_insert - insert into a radix tree
+ * @root: radix tree root
+ * @index: index key
+ * @item: item to insert
+ *
+ * Insert an item into the radix tree at position @index.
+ */
+int radix_tree_insert(struct radix_tree_root *root,
+ unsigned long index, void *item)
+{
+ struct radix_tree_node *node = NULL, *slot;
+ unsigned int height, shift;
+ int offset;
+ int error;
+
+ /* Make sure the tree is high enough. */
+ if (index > radix_tree_maxindex(root->height)) {
+ error = radix_tree_extend(root, index);
+ if (error)
+ return error;
+ }
+
+ slot = root->rnode;
+ height = root->height;
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+
+ offset = 0; /* uninitialised var warning */
+ while (height > 0) {
+ if (slot == NULL) {
+ /* Have to add a child node. */
+ if (!(slot = radix_tree_node_alloc(root)))
+ return -ENOMEM;
+ if (node) {
+ node->slots[offset] = slot;
+ node->count++;
+ } else
+ root->rnode = slot;
+ }
+
+ /* Go a level down */
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ node = slot;
+ slot = node->slots[offset];
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ }
+
+ if (slot != NULL)
+ return -EEXIST;
+
+ if (node) {
+ node->count++;
+ node->slots[offset] = item;
+ BUG_ON(tag_get(node, 0, offset));
+ BUG_ON(tag_get(node, 1, offset));
+ } else {
+ root->rnode = item;
+ BUG_ON(root_tag_get(root, 0));
+ BUG_ON(root_tag_get(root, 1));
+ }
+
+ return 0;
+}
+
+static inline void **__lookup_slot(struct radix_tree_root *root,
+ unsigned long index)
+{
+ unsigned int height, shift;
+ struct radix_tree_node **slot;
+
+ height = root->height;
+
+ if (index > radix_tree_maxindex(height))
+ return NULL;
+
+ if (height == 0 && root->rnode)
+ return (void *)&root->rnode;
+
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+ slot = &root->rnode;
+
+ while (height > 0) {
+ if (*slot == NULL)
+ return NULL;
+
+ slot = (struct radix_tree_node **)
+ ((*slot)->slots +
+ ((index >> shift) & RADIX_TREE_MAP_MASK));
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ }
+
+ return (void **)slot;
+}
+
+/**
+ * radix_tree_lookup_slot - lookup a slot in a radix tree
+ * @root: radix tree root
+ * @index: index key
+ *
+ * Lookup the slot corresponding to the position @index in the radix tree
+ * @root. This is useful for update-if-exists operations.
+ */
+void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
+{
+ return __lookup_slot(root, index);
+}
+
+/**
+ * radix_tree_lookup - perform lookup operation on a radix tree
+ * @root: radix tree root
+ * @index: index key
+ *
+ * Lookup the item at the position @index in the radix tree @root.
+ */
+void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
+{
+ void **slot;
+
+ slot = __lookup_slot(root, index);
+ return slot != NULL ? *slot : NULL;
+}
+
+/**
+ * radix_tree_tag_set - set a tag on a radix tree node
+ * @root: radix tree root
+ * @index: index key
+ * @tag: tag index
+ *
+ * Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
+ * corresponding to @index in the radix tree. From
+ * the root all the way down to the leaf node.
+ *
+ * Returns the address of the tagged item. Setting a tag on a not-present
+ * item is a bug.
+ */
+void *radix_tree_tag_set(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag)
+{
+ unsigned int height, shift;
+ struct radix_tree_node *slot;
+
+ height = root->height;
+ BUG_ON(index > radix_tree_maxindex(height));
+
+ slot = root->rnode;
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+
+ while (height > 0) {
+ int offset;
+
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ if (!tag_get(slot, tag, offset))
+ tag_set(slot, tag, offset);
+ slot = slot->slots[offset];
+ BUG_ON(slot == NULL);
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ }
+
+ /* set the root's tag bit */
+ if (slot && !root_tag_get(root, tag))
+ root_tag_set(root, tag);
+
+ return slot;
+}
+
+/**
+ * radix_tree_tag_clear - clear a tag on a radix tree node
+ * @root: radix tree root
+ * @index: index key
+ * @tag: tag index
+ *
+ * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
+ * corresponding to @index in the radix tree. If
+ * this causes the leaf node to have no tags set then clear the tag in the
+ * next-to-leaf node, etc.
+ *
+ * Returns the address of the tagged item on success, else NULL. ie:
+ * has the same return value and semantics as radix_tree_lookup().
+ */
+void *radix_tree_tag_clear(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag)
+{
+ struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
+ struct radix_tree_node *slot = NULL;
+ unsigned int height, shift;
+
+ height = root->height;
+ if (index > radix_tree_maxindex(height))
+ goto out;
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ pathp->node = NULL;
+ slot = root->rnode;
+
+ while (height > 0) {
+ int offset;
+
+ if (slot == NULL)
+ goto out;
+
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ pathp[1].offset = offset;
+ pathp[1].node = slot;
+ slot = slot->slots[offset];
+ pathp++;
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ }
+
+ if (slot == NULL)
+ goto out;
+
+ while (pathp->node) {
+ if (!tag_get(pathp->node, tag, pathp->offset))
+ goto out;
+ tag_clear(pathp->node, tag, pathp->offset);
+ if (any_tag_set(pathp->node, tag))
+ goto out;
+ pathp--;
+ }
+
+ /* clear the root's tag bit */
+ if (root_tag_get(root, tag))
+ root_tag_clear(root, tag);
+
+out:
+ return slot;
+}
+
+#ifndef __KERNEL__ /* Only the test harness uses this at present */
+/**
+ * radix_tree_tag_get - get a tag on a radix tree node
+ * @root: radix tree root
+ * @index: index key
+ * @tag: tag index (< RADIX_TREE_MAX_TAGS)
+ *
+ * Return values:
+ *
+ * 0: tag not present or not set
+ * 1: tag set
+ */
+int radix_tree_tag_get(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag)
+{
+ unsigned int height, shift;
+ struct radix_tree_node *slot;
+ int saw_unset_tag = 0;
+
+ height = root->height;
+ if (index > radix_tree_maxindex(height))
+ return 0;
+
+ /* check the root's tag bit */
+ if (!root_tag_get(root, tag))
+ return 0;
+
+ if (height == 0)
+ return 1;
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ slot = root->rnode;
+
+ for ( ; ; ) {
+ int offset;
+
+ if (slot == NULL)
+ return 0;
+
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+
+ /*
+ * This is just a debug check. Later, we can bale as soon as
+ * we see an unset tag.
+ */
+ if (!tag_get(slot, tag, offset))
+ saw_unset_tag = 1;
+ if (height == 1) {
+ int ret = tag_get(slot, tag, offset);
+
+ BUG_ON(ret && saw_unset_tag);
+ return !!ret;
+ }
+ slot = slot->slots[offset];
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ }
+}
+#endif
+
+static unsigned int
+__lookup(struct radix_tree_root *root, void **results, unsigned long index,
+ unsigned int max_items, unsigned long *next_index)
+{
+ unsigned int nr_found = 0;
+ unsigned int shift, height;
+ struct radix_tree_node *slot;
+ unsigned long i;
+
+ height = root->height;
+ if (height == 0) {
+ if (root->rnode && index == 0)
+ results[nr_found++] = root->rnode;
+ goto out;
+ }
+
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+ slot = root->rnode;
+
+ for ( ; height > 1; height--) {
+
+ for (i = (index >> shift) & RADIX_TREE_MAP_MASK ;
+ i < RADIX_TREE_MAP_SIZE; i++) {
+ if (slot->slots[i] != NULL)
+ break;
+ index &= ~((1UL << shift) - 1);
+ index += 1UL << shift;
+ if (index == 0)
+ goto out; /* 32-bit wraparound */
+ }
+ if (i == RADIX_TREE_MAP_SIZE)
+ goto out;
+
+ shift -= RADIX_TREE_MAP_SHIFT;
+ slot = slot->slots[i];
+ }
+
+ /* Bottom level: grab some items */
+ for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
+ index++;
+ if (slot->slots[i]) {
+ results[nr_found++] = slot->slots[i];
+ if (nr_found == max_items)
+ goto out;
+ }
+ }
+out:
+ *next_index = index;
+ return nr_found;
+}
+
+/**
+ * radix_tree_gang_lookup - perform multiple lookup on a radix tree
+ * @root: radix tree root
+ * @results: where the results of the lookup are placed
+ * @first_index: start the lookup from this key
+ * @max_items: place up to this many items at *results
+ *
+ * Performs an index-ascending scan of the tree for present items. Places
+ * them at *@results and returns the number of items which were placed at
+ * *@results.
+ *
+ * The implementation is naive.
+ */
+unsigned int
+radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items)
+{
+ const unsigned long max_index = radix_tree_maxindex(root->height);
+ unsigned long cur_index = first_index;
+ unsigned int ret = 0;
+
+ while (ret < max_items) {
+ unsigned int nr_found;
+ unsigned long next_index; /* Index of next search */
+
+ if (cur_index > max_index)
+ break;
+ nr_found = __lookup(root, results + ret, cur_index,
+ max_items - ret, &next_index);
+ ret += nr_found;
+ if (next_index == 0)
+ break;
+ cur_index = next_index;
+ }
+ return ret;
+}
+
+/*
+ * FIXME: the two tag_get()s here should use find_next_bit() instead of
+ * open-coding the search.
+ */
+static unsigned int
+__lookup_tag(struct radix_tree_root *root, void **results, unsigned long index,
+ unsigned int max_items, unsigned long *next_index, unsigned int tag)
+{
+ unsigned int nr_found = 0;
+ unsigned int shift;
+ unsigned int height = root->height;
+ struct radix_tree_node *slot;
+
+ if (height == 0) {
+ if (root->rnode && index == 0)
+ results[nr_found++] = root->rnode;
+ goto out;
+ }
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ slot = root->rnode;
+
+ do {
+ unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK;
+
+ for ( ; i < RADIX_TREE_MAP_SIZE; i++) {
+ if (tag_get(slot, tag, i)) {
+ BUG_ON(slot->slots[i] == NULL);
+ break;
+ }
+ index &= ~((1UL << shift) - 1);
+ index += 1UL << shift;
+ if (index == 0)
+ goto out; /* 32-bit wraparound */
+ }
+ if (i == RADIX_TREE_MAP_SIZE)
+ goto out;
+ height--;
+ if (height == 0) { /* Bottom level: grab some items */
+ unsigned long j = index & RADIX_TREE_MAP_MASK;
+
+ for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
+ index++;
+ if (tag_get(slot, tag, j)) {
+ BUG_ON(slot->slots[j] == NULL);
+ results[nr_found++] = slot->slots[j];
+ if (nr_found == max_items)
+ goto out;
+ }
+ }
+ }
+ shift -= RADIX_TREE_MAP_SHIFT;
+ slot = slot->slots[i];
+ } while (height > 0);
+out:
+ *next_index = index;
+ return nr_found;
+}
+
+/**
+ * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
+ * based on a tag
+ * @root: radix tree root
+ * @results: where the results of the lookup are placed
+ * @first_index: start the lookup from this key
+ * @max_items: place up to this many items at *results
+ * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
+ *
+ * Performs an index-ascending scan of the tree for present items which
+ * have the tag indexed by @tag set. Places the items at *@results and
+ * returns the number of items which were placed at *@results.
+ */
+unsigned int
+radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items,
+ unsigned int tag)
+{
+ const unsigned long max_index = radix_tree_maxindex(root->height);
+ unsigned long cur_index = first_index;
+ unsigned int ret = 0;
+
+ /* check the root's tag bit */
+ if (!root_tag_get(root, tag))
+ return 0;
+
+ while (ret < max_items) {
+ unsigned int nr_found;
+ unsigned long next_index; /* Index of next search */
+
+ if (cur_index > max_index)
+ break;
+ nr_found = __lookup_tag(root, results + ret, cur_index,
+ max_items - ret, &next_index, tag);
+ ret += nr_found;
+ if (next_index == 0)
+ break;
+ cur_index = next_index;
+ }
+ return ret;
+}
+
+/**
+ * radix_tree_shrink - shrink height of a radix tree to minimal
+ * @root radix tree root
+ */
+static inline void radix_tree_shrink(struct radix_tree_root *root)
+{
+ /* try to shrink tree height */
+ while (root->height > 0 &&
+ root->rnode->count == 1 &&
+ root->rnode->slots[0]) {
+ struct radix_tree_node *to_free = root->rnode;
+
+ root->rnode = to_free->slots[0];
+ root->height--;
+ /* must only free zeroed nodes into the slab */
+ tag_clear(to_free, 0, 0);
+ tag_clear(to_free, 1, 0);
+ to_free->slots[0] = NULL;
+ to_free->count = 0;
+ radix_tree_node_free(to_free);
+ }
+}
+
+/**
+ * radix_tree_delete - delete an item from a radix tree
+ * @root: radix tree root
+ * @index: index key
+ *
+ * Remove the item at @index from the radix tree rooted at @root.
+ *
+ * Returns the address of the deleted item, or NULL if it was not present.
+ */
+void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
+{
+ struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
+ struct radix_tree_node *slot = NULL;
+ unsigned int height, shift;
+ int tag;
+ int offset;
+
+ height = root->height;
+ if (index > radix_tree_maxindex(height))
+ goto out;
+
+ slot = root->rnode;
+ if (height == 0 && root->rnode) {
+ root_tag_clear_all(root);
+ root->rnode = NULL;
+ goto out;
+ }
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ pathp->node = NULL;
+
+ do {
+ if (slot == NULL)
+ goto out;
+
+ pathp++;
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ pathp->offset = offset;
+ pathp->node = slot;
+ slot = slot->slots[offset];
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ } while (height > 0);
+
+ if (slot == NULL)
+ goto out;
+
+ /*
+ * Clear all tags associated with the just-deleted item
+ */
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+ if (tag_get(pathp->node, tag, pathp->offset))
+ radix_tree_tag_clear(root, index, tag);
+ }
+
+ /* Now free the nodes we do not need anymore */
+ while (pathp->node) {
+ pathp->node->slots[pathp->offset] = NULL;
+ pathp->node->count--;
+
+ if (pathp->node->count) {
+ if (pathp->node == root->rnode)
+ radix_tree_shrink(root);
+ goto out;
+ }
+
+ /* Node with zero slots in use so free it */
+ radix_tree_node_free(pathp->node);
+
+ pathp--;
+ }
+ root_tag_clear_all(root);
+ root->height = 0;
+ root->rnode = NULL;
+
+out:
+ return slot;
+}
+
+/**
+ * radix_tree_tagged - test whether any items in the tree are tagged
+ * @root: radix tree root
+ * @tag: tag to test
+ */
+int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
+{
+ return root_tag_get(root, tag);
+}
+
+static unsigned long __maxindex(unsigned int height)
+{
+ unsigned int tmp = height * RADIX_TREE_MAP_SHIFT;
+ unsigned long index = ~0UL;
+
+ if (tmp < RADIX_TREE_INDEX_BITS)
+ index = (index >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1;
+ return index;
+}
+
+static void radix_tree_init_maxindex(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
+ height_to_maxindex[i] = __maxindex(i);
+}
+
+void radix_tree_init(void)
+{
+ radix_tree_init_maxindex();
+}
diff --git a/radix-tree.h b/radix-tree.h
new file mode 100644
index 00000000..bf96d839
--- /dev/null
+++ b/radix-tree.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+/*
+ * Copyright (C) 2001 Momchil Velikov
+ * Portions Copyright (C) 2001 Christoph Hellwig
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _LINUX_RADIX_TREE_H
+#define _LINUX_RADIX_TREE_H
+
+#if BTRFS_FLAT_INCLUDES
+#include "kerncompat.h"
+#else
+#include <btrfs/kerncompat.h>
+#endif /* BTRFS_FLAT_INCLUDES */
+
+#define RADIX_TREE_MAX_TAGS 2
+
+/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */
+struct radix_tree_root {
+ unsigned int height;
+ gfp_t gfp_mask;
+ struct radix_tree_node *rnode;
+};
+
+#define RADIX_TREE_INIT(mask) { \
+ .height = 0, \
+ .gfp_mask = (mask), \
+ .rnode = NULL, \
+}
+
+#define RADIX_TREE(name, mask) \
+ struct radix_tree_root name = RADIX_TREE_INIT(mask)
+
+#define INIT_RADIX_TREE(root, mask) \
+do { \
+ (root)->height = 0; \
+ (root)->gfp_mask = (mask); \
+ (root)->rnode = NULL; \
+} while (0)
+
+int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
+void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
+void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
+void *radix_tree_delete(struct radix_tree_root *, unsigned long);
+unsigned int
+radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items);
+int radix_tree_preload(gfp_t gfp_mask);
+void radix_tree_init(void);
+void *radix_tree_tag_set(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag);
+void *radix_tree_tag_clear(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag);
+int radix_tree_tag_get(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag);
+unsigned int
+radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items,
+ unsigned int tag);
+int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
+
+static inline void radix_tree_preload_end(void)
+{
+ preempt_enable();
+}
+
+#endif /* _LINUX_RADIX_TREE_H */
diff --git a/raid6.c b/raid6.c
new file mode 100644
index 00000000..a6ee483f
--- /dev/null
+++ b/raid6.c
@@ -0,0 +1,101 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
+ * (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6int1.c
+ *
+ * 1-way unrolled portable integer math RAID-6 instruction set
+ *
+ * This file was postprocessed using unroll.pl and then ported to userspace
+ */
+#include <stdint.h>
+#include <unistd.h>
+#include "kerncompat.h"
+#include "ctree.h"
+#include "disk-io.h"
+
+/*
+ * This is the C data type to use
+ */
+
+/* Change this from BITS_PER_LONG if there is something better... */
+#if BITS_PER_LONG == 64
+# define NBYTES(x) ((x) * 0x0101010101010101UL)
+# define NSIZE 8
+# define NSHIFT 3
+typedef uint64_t unative_t;
+#else
+# define NBYTES(x) ((x) * 0x01010101U)
+# define NSIZE 4
+# define NSHIFT 2
+typedef uint32_t unative_t;
+#endif
+
+/*
+ * These sub-operations are separate inlines since they can sometimes be
+ * specially optimized using architecture-specific hacks.
+ */
+
+/*
+ * The SHLBYTE() operation shifts each byte left by 1, *not*
+ * rolling over into the next byte
+ */
+static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
+{
+ unative_t vv;
+
+ vv = (v << 1) & NBYTES(0xfe);
+ return vv;
+}
+
+/*
+ * The MASK() operation returns 0xFF in any byte for which the high
+ * bit is 1, 0x00 for any byte for which the high bit is 0.
+ */
+static inline __attribute_const__ unative_t MASK(unative_t v)
+{
+ unative_t vv;
+
+ vv = v & NBYTES(0x80);
+ vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */
+ return vv;
+}
+
+
+void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ uint8_t **dptr = (uint8_t **)ptrs;
+ uint8_t *p, *q;
+ int d, z, z0;
+
+ unative_t wd0, wq0, wp0, w10, w20;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ for ( d = 0 ; d < bytes ; d += NSIZE*1 ) {
+ wq0 = wp0 = *(unative_t *)&dptr[z0][d+0*NSIZE];
+ for ( z = z0-1 ; z >= 0 ; z-- ) {
+ wd0 = *(unative_t *)&dptr[z][d+0*NSIZE];
+ wp0 ^= wd0;
+ w20 = MASK(wq0);
+ w10 = SHLBYTE(wq0);
+ w20 &= NBYTES(0x1d);
+ w10 ^= w20;
+ wq0 = w10 ^ wd0;
+ }
+ *(unative_t *)&p[d+NSIZE*0] = wp0;
+ *(unative_t *)&q[d+NSIZE*0] = wq0;
+ }
+}
+
diff --git a/random-test.c b/random-test.c
new file mode 100644
index 00000000..b7c6cdb3
--- /dev/null
+++ b/random-test.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include "kerncompat.h"
+#include "radix-tree.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+
+int keep_running = 1;
+struct btrfs_super_block super;
+
+static int setup_key(struct radix_tree_root *root, struct btrfs_key *key,
+ int exists)
+{
+ int num = rand();
+ unsigned long res[2];
+ int ret;
+
+ key->flags = 0;
+ btrfs_set_key_type(key, BTRFS_STRING_ITEM_KEY);
+ key->offset = 0;
+again:
+ ret = radix_tree_gang_lookup(root, (void **)res, num, 2);
+ if (exists) {
+ if (ret == 0)
+ return -EEXIST;
+ num = res[0];
+ } else if (ret != 0 && num == res[0]) {
+ num++;
+ if (ret > 1 && num == res[1]) {
+ num++;
+ goto again;
+ }
+ }
+ key->objectid = num;
+ return 0;
+}
+
+static int ins_one(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct radix_tree_root *radix)
+{
+ struct btrfs_path path;
+ struct btrfs_key key;
+ int ret;
+ char buf[128];
+ unsigned long oid;
+ btrfs_init_path(&path);
+ ret = setup_key(radix, &key, 0);
+ sprintf(buf, "str-%llu\n", (unsigned long long)key.objectid);
+ ret = btrfs_insert_item(trans, root, &key, buf, strlen(buf));
+ if (ret)
+ goto error;
+ oid = (unsigned long)key.objectid;
+ radix_tree_preload(GFP_KERNEL);
+ ret = radix_tree_insert(radix, oid, (void *)oid);
+ radix_tree_preload_end();
+ if (ret)
+ goto error;
+ return ret;
+error:
+ printf("failed to insert %llu\n", (unsigned long long)key.objectid);
+ return ret;
+}
+
+static int insert_dup(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct radix_tree_root *radix)
+{
+ struct btrfs_path path;
+ struct btrfs_key key;
+ int ret;
+ char buf[128];
+ btrfs_init_path(&path);
+ ret = setup_key(radix, &key, 1);
+ if (ret < 0)
+ return 0;
+ sprintf(buf, "str-%llu\n", (unsigned long long)key.objectid);
+ ret = btrfs_insert_item(trans, root, &key, buf, strlen(buf));
+ if (ret != -EEXIST) {
+ printf("insert on %llu gave us %d\n",
+ (unsigned long long)key.objectid, ret);
+ return ret;
+ }
+ return 0;
+}
+
+static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct radix_tree_root *radix)
+{
+ struct btrfs_path path;
+ struct btrfs_key key;
+ int ret;
+ unsigned long *ptr;
+ btrfs_init_path(&path);
+ ret = setup_key(radix, &key, 1);
+ if (ret < 0)
+ return 0;
+ ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
+ if (ret)
+ goto error;
+ ret = btrfs_del_item(trans, root, &path);
+ btrfs_release_path(&path);
+ if (ret != 0)
+ goto error;
+ ptr = radix_tree_delete(radix, key.objectid);
+ if (!ptr)
+ goto error;
+ return 0;
+error:
+ printf("failed to delete %llu\n", (unsigned long long)key.objectid);
+ return ret;
+}
+
+static int lookup_item(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct radix_tree_root *radix)
+{
+ struct btrfs_path path;
+ struct btrfs_key key;
+ int ret;
+ btrfs_init_path(&path);
+ ret = setup_key(radix, &key, 1);
+ if (ret < 0)
+ return 0;
+ ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
+ btrfs_release_path(&path);
+ if (ret)
+ goto error;
+ return 0;
+error:
+ printf("unable to find key %llu\n", (unsigned long long)key.objectid);
+ return ret;
+}
+
+static int lookup_enoent(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct radix_tree_root *radix)
+{
+ struct btrfs_path path;
+ struct btrfs_key key;
+ int ret;
+ btrfs_init_path(&path);
+ ret = setup_key(radix, &key, 0);
+ if (ret < 0)
+ return ret;
+ ret = btrfs_search_slot(trans, root, &key, &path, 0, 0);
+ btrfs_release_path(&path);
+ if (ret <= 0)
+ goto error;
+ return 0;
+error:
+ printf("able to find key that should not exist %llu\n",
+ (unsigned long long)key.objectid);
+ return -EEXIST;
+}
+
+static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct radix_tree_root *radix, int nr)
+{
+ struct btrfs_path path;
+ struct btrfs_key key;
+ unsigned long found = 0;
+ int ret;
+ int slot;
+ int *ptr;
+ int count = 0;
+
+ key.offset = 0;
+ key.flags = 0;
+ btrfs_set_key_type(&key, BTRFS_STRING_ITEM_KEY);
+ key.objectid = (unsigned long)-1;
+ while(nr-- >= 0) {
+ btrfs_init_path(&path);
+ ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
+ if (ret < 0) {
+ btrfs_release_path(&path);
+ return ret;
+ }
+ if (ret != 0) {
+ if (path.slots[0] == 0) {
+ btrfs_release_path(&path);
+ break;
+ }
+ path.slots[0] -= 1;
+ }
+ slot = path.slots[0];
+ found = btrfs_disk_key_objectid(
+ &path.nodes[0]->leaf.items[slot].key);
+ ret = btrfs_del_item(trans, root, &path);
+ count++;
+ if (ret) {
+ fprintf(stderr,
+ "failed to remove %lu from tree\n",
+ found);
+ return ret;
+ }
+ btrfs_release_path(&path);
+ ptr = radix_tree_delete(radix, found);
+ if (!ptr)
+ goto error;
+ if (!keep_running)
+ break;
+ }
+ return 0;
+error:
+ fprintf(stderr, "failed to delete from the radix %lu\n", found);
+ return -ENOENT;
+}
+
+static int fill_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct radix_tree_root *radix, int count)
+{
+ int i;
+ int ret = 0;
+ for (i = 0; i < count; i++) {
+ ret = ins_one(trans, root, radix);
+ if (ret) {
+ fprintf(stderr, "fill failed\n");
+ goto out;
+ }
+ if (i % 1000 == 0) {
+ ret = btrfs_commit_transaction(trans, root, &super);
+ if (ret) {
+ fprintf(stderr, "fill commit failed\n");
+ return ret;
+ }
+ }
+ if (i && i % 10000 == 0) {
+ printf("bigfill %d\n", i);
+ }
+ if (!keep_running)
+ break;
+ }
+out:
+ return ret;
+}
+
+static int bulk_op(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct radix_tree_root *radix)
+{
+ int ret;
+ int nr = rand() % 5000;
+ static int run_nr = 0;
+
+ /* do the bulk op much less frequently */
+ if (run_nr++ % 100)
+ return 0;
+ ret = empty_tree(trans, root, radix, nr);
+ if (ret)
+ return ret;
+ ret = fill_tree(trans, root, radix, nr);
+ if (ret)
+ return ret;
+ return 0;
+}
+
+
+int (*ops[])(struct btrfs_trans_handle *,
+ struct btrfs_root *root, struct radix_tree_root *radix) =
+ { ins_one, insert_dup, del_one, lookup_item,
+ lookup_enoent, bulk_op };
+
+static int fill_radix(struct btrfs_root *root, struct radix_tree_root *radix)
+{
+ struct btrfs_path path;
+ struct btrfs_key key;
+ unsigned long found = 0;
+ int ret;
+ int slot;
+ int i;
+
+ key.offset = 0;
+ key.flags = 0;
+ btrfs_set_key_type(&key, BTRFS_STRING_ITEM_KEY);
+ key.objectid = (unsigned long)-1;
+ while(1) {
+ btrfs_init_path(&path);
+ ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+ if (ret < 0) {
+ btrfs_release_path(&path);
+ return ret;
+ }
+ slot = path.slots[0];
+ if (ret != 0) {
+ if (slot == 0) {
+ btrfs_release_path(&path);
+ break;
+ }
+ slot -= 1;
+ }
+ for (i = slot; i >= 0; i--) {
+ found = btrfs_disk_key_objectid(&path.nodes[0]->
+ leaf.items[i].key);
+ radix_tree_preload(GFP_KERNEL);
+ ret = radix_tree_insert(radix, found, (void *)found);
+ if (ret) {
+ fprintf(stderr,
+ "failed to insert %lu into radix\n",
+ found);
+ exit(1);
+ }
+
+ radix_tree_preload_end();
+ }
+ btrfs_release_path(&path);
+ key.objectid = found - 1;
+ if (key.objectid > found)
+ break;
+ }
+ return 0;
+}
+void sigstopper(int ignored)
+{
+ keep_running = 0;
+ fprintf(stderr, "caught exit signal, stopping\n");
+}
+
+int print_usage(void)
+{
+ printf("usage: tester [-ih] [-c count] [-f count]\n");
+ printf("\t -c count -- iteration count after filling\n");
+ printf("\t -f count -- run this many random inserts before starting\n");
+ printf("\t -i -- only do initial fill\n");
+ printf("\t -h -- this help text\n");
+ exit(1);
+}
+int main(int ac, char **av)
+{
+ RADIX_TREE(radix, GFP_KERNEL);
+ struct btrfs_root *root;
+ int i;
+ int ret;
+ int count;
+ int op;
+ int iterations = 20000;
+ int init_fill_count = 800000;
+ int err = 0;
+ int initial_only = 0;
+ struct btrfs_trans_handle *trans;
+ radix_tree_init();
+ root = open_ctree("dbfile", &super);
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ exit(1);
+ }
+ fill_radix(root, &radix);
+
+ signal(SIGTERM, sigstopper);
+ signal(SIGINT, sigstopper);
+
+ for (i = 1 ; i < ac ; i++) {
+ if (strcmp(av[i], "-i") == 0) {
+ initial_only = 1;
+ } else if (strcmp(av[i], "-c") == 0) {
+ iterations = atoi(av[i+1]);
+ i++;
+ } else if (strcmp(av[i], "-f") == 0) {
+ init_fill_count = atoi(av[i+1]);
+ i++;
+ } else {
+ print_usage();
+ }
+ }
+ printf("initial fill\n");
+ trans = btrfs_start_transaction(root, 1);
+ ret = fill_tree(trans, root, &radix, init_fill_count);
+ printf("starting run\n");
+ if (ret) {
+ err = ret;
+ goto out;
+ }
+ if (initial_only == 1) {
+ goto out;
+ }
+ for (i = 0; i < iterations; i++) {
+ op = rand() % ARRAY_SIZE(ops);
+ count = rand() % 128;
+ if (i % 2000 == 0) {
+ printf("%d\n", i);
+ fflush(stdout);
+ }
+ if (i && i % 5000 == 0) {
+ printf("open & close, root level %d nritems %d\n",
+ btrfs_header_level(&root->node->node.header),
+ btrfs_header_nritems(&root->node->node.header));
+ close_ctree(root, &super);
+ root = open_ctree("dbfile", &super);
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ goto out;
+ }
+ }
+ while(count--) {
+ ret = ops[op](trans, root, &radix);
+ if (ret) {
+ fprintf(stderr, "op %d failed %d:%d\n",
+ op, i, iterations);
+ btrfs_print_tree(root, root->node, 1);
+ fprintf(stderr, "op %d failed %d:%d\n",
+ op, i, iterations);
+ err = ret;
+ goto out;
+ }
+ if (ops[op] == bulk_op)
+ break;
+ if (keep_running == 0) {
+ err = 0;
+ goto out;
+ }
+ }
+ }
+out:
+ close_ctree(root, &super);
+ return !!err;
+}
+
diff --git a/rbtree-utils.c b/rbtree-utils.c
new file mode 100644
index 00000000..7371bbb4
--- /dev/null
+++ b/rbtree-utils.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2014 Facebook. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "rbtree-utils.h"
+
+int rb_insert(struct rb_root *root, struct rb_node *node,
+ rb_compare_nodes comp)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ int ret;
+
+ while(*p) {
+ parent = *p;
+
+ ret = comp(parent, node);
+ if (ret < 0)
+ p = &(*p)->rb_left;
+ else if (ret > 0)
+ p = &(*p)->rb_right;
+ else
+ return -EEXIST;
+ }
+
+ rb_link_node(node, parent, p);
+ rb_insert_color(node, root);
+ return 0;
+}
+
+struct rb_node *rb_search(struct rb_root *root, void *key, rb_compare_keys comp,
+ struct rb_node **next_ret)
+{
+ struct rb_node *n = root->rb_node;
+ struct rb_node *parent = NULL;
+ int ret = 0;
+
+ while(n) {
+ parent = n;
+
+ ret = comp(n, key);
+ if (ret < 0)
+ n = n->rb_left;
+ else if (ret > 0)
+ n = n->rb_right;
+ else
+ return n;
+ }
+
+ if (!next_ret)
+ return NULL;
+
+ if (parent && ret > 0)
+ parent = rb_next(parent);
+
+ *next_ret = parent;
+ return NULL;
+}
+
+void rb_free_nodes(struct rb_root *root, rb_free_node free_node)
+{
+ struct rb_node *node;
+
+ while ((node = rb_first(root))) {
+ rb_erase(node, root);
+ free_node(node);
+ }
+}
diff --git a/rbtree-utils.h b/rbtree-utils.h
new file mode 100644
index 00000000..718581ff
--- /dev/null
+++ b/rbtree-utils.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2014 Facebook. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __RBTREE_UTILS__
+#define __RBTREE_UTILS__
+
+#include "rbtree.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The common insert/search/free functions */
+typedef int (*rb_compare_nodes)(struct rb_node *node1, struct rb_node *node2);
+typedef int (*rb_compare_keys)(struct rb_node *node, void *key);
+typedef void (*rb_free_node)(struct rb_node *node);
+
+int rb_insert(struct rb_root *root, struct rb_node *node,
+ rb_compare_nodes comp);
+/*
+ * In some cases, we need return the next node if we don't find the node we
+ * specify. At this time, we can use next_ret.
+ */
+struct rb_node *rb_search(struct rb_root *root, void *key, rb_compare_keys comp,
+ struct rb_node **next_ret);
+void rb_free_nodes(struct rb_root *root, rb_free_node free_node);
+
+#define FREE_RB_BASED_TREE(name, free_func) \
+static void free_##name##_tree(struct rb_root *root) \
+{ \
+ rb_free_nodes(root, free_func); \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/rbtree.c b/rbtree.c
new file mode 100644
index 00000000..92590a57
--- /dev/null
+++ b/rbtree.c
@@ -0,0 +1,548 @@
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea@suse.de>
+ (C) 2002 David Woodhouse <dwmw2@infradead.org>
+ (C) 2012 Michel Lespinasse <walken@google.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ linux/lib/rbtree.c
+*/
+
+#include "rbtree_augmented.h"
+
+/*
+ * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree
+ *
+ * 1) A node is either red or black
+ * 2) The root is black
+ * 3) All leaves (NULL) are black
+ * 4) Both children of every red node are black
+ * 5) Every simple path from root to leaves contains the same number
+ * of black nodes.
+ *
+ * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two
+ * consecutive red nodes in a path and every red node is therefore followed by
+ * a black. So if B is the number of black nodes on every simple path (as per
+ * 5), then the longest possible path due to 4 is 2B.
+ *
+ * We shall indicate color with case, where black nodes are uppercase and red
+ * nodes will be lowercase. Unknown color nodes shall be drawn as red within
+ * parentheses and have some accompanying text comment.
+ */
+
+static inline void rb_set_black(struct rb_node *rb)
+{
+ rb->__rb_parent_color |= RB_BLACK;
+}
+
+static inline struct rb_node *rb_red_parent(struct rb_node *red)
+{
+ return (struct rb_node *)red->__rb_parent_color;
+}
+
+/*
+ * Helper function for rotations:
+ * - old's parent and color get assigned to new
+ * - old gets assigned new as a parent and 'color' as a color.
+ */
+static inline void
+__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new,
+ struct rb_root *root, int color)
+{
+ struct rb_node *parent = rb_parent(old);
+ new->__rb_parent_color = old->__rb_parent_color;
+ rb_set_parent_color(old, new, color);
+ __rb_change_child(old, new, parent, root);
+}
+
+static __always_inline void
+__rb_insert(struct rb_node *node, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+ struct rb_node *parent = rb_red_parent(node), *gparent, *tmp;
+
+ while (true) {
+ /*
+ * Loop invariant: node is red
+ *
+ * If there is a black parent, we are done.
+ * Otherwise, take some corrective action as we don't
+ * want a red root or two consecutive red nodes.
+ */
+ if (!parent) {
+ rb_set_parent_color(node, NULL, RB_BLACK);
+ break;
+ } else if (rb_is_black(parent))
+ break;
+
+ gparent = rb_red_parent(parent);
+
+ tmp = gparent->rb_right;
+ if (parent != tmp) { /* parent == gparent->rb_left */
+ if (tmp && rb_is_red(tmp)) {
+ /*
+ * Case 1 - color flips
+ *
+ * G g
+ * / \ / \
+ * p u --> P U
+ * / /
+ * n n
+ *
+ * However, since g's parent might be red, and
+ * 4) does not allow this, we need to recurse
+ * at g.
+ */
+ rb_set_parent_color(tmp, gparent, RB_BLACK);
+ rb_set_parent_color(parent, gparent, RB_BLACK);
+ node = gparent;
+ parent = rb_parent(node);
+ rb_set_parent_color(node, parent, RB_RED);
+ continue;
+ }
+
+ tmp = parent->rb_right;
+ if (node == tmp) {
+ /*
+ * Case 2 - left rotate at parent
+ *
+ * G G
+ * / \ / \
+ * p U --> n U
+ * \ /
+ * n p
+ *
+ * This still leaves us in violation of 4), the
+ * continuation into Case 3 will fix that.
+ */
+ parent->rb_right = tmp = node->rb_left;
+ node->rb_left = parent;
+ if (tmp)
+ rb_set_parent_color(tmp, parent,
+ RB_BLACK);
+ rb_set_parent_color(parent, node, RB_RED);
+ augment_rotate(parent, node);
+ parent = node;
+ tmp = node->rb_right;
+ }
+
+ /*
+ * Case 3 - right rotate at gparent
+ *
+ * G P
+ * / \ / \
+ * p U --> n g
+ * / \
+ * n U
+ */
+ gparent->rb_left = tmp; /* == parent->rb_right */
+ parent->rb_right = gparent;
+ if (tmp)
+ rb_set_parent_color(tmp, gparent, RB_BLACK);
+ __rb_rotate_set_parents(gparent, parent, root, RB_RED);
+ augment_rotate(gparent, parent);
+ break;
+ } else {
+ tmp = gparent->rb_left;
+ if (tmp && rb_is_red(tmp)) {
+ /* Case 1 - color flips */
+ rb_set_parent_color(tmp, gparent, RB_BLACK);
+ rb_set_parent_color(parent, gparent, RB_BLACK);
+ node = gparent;
+ parent = rb_parent(node);
+ rb_set_parent_color(node, parent, RB_RED);
+ continue;
+ }
+
+ tmp = parent->rb_left;
+ if (node == tmp) {
+ /* Case 2 - right rotate at parent */
+ parent->rb_left = tmp = node->rb_right;
+ node->rb_right = parent;
+ if (tmp)
+ rb_set_parent_color(tmp, parent,
+ RB_BLACK);
+ rb_set_parent_color(parent, node, RB_RED);
+ augment_rotate(parent, node);
+ parent = node;
+ tmp = node->rb_left;
+ }
+
+ /* Case 3 - left rotate at gparent */
+ gparent->rb_right = tmp; /* == parent->rb_left */
+ parent->rb_left = gparent;
+ if (tmp)
+ rb_set_parent_color(tmp, gparent, RB_BLACK);
+ __rb_rotate_set_parents(gparent, parent, root, RB_RED);
+ augment_rotate(gparent, parent);
+ break;
+ }
+ }
+}
+
+/*
+ * Inline version for rb_erase() use - we want to be able to inline
+ * and eliminate the dummy_rotate callback there
+ */
+static __always_inline void
+____rb_erase_color(struct rb_node *parent, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+ struct rb_node *node = NULL, *sibling, *tmp1, *tmp2;
+
+ while (true) {
+ /*
+ * Loop invariants:
+ * - node is black (or NULL on first iteration)
+ * - node is not the root (parent is not NULL)
+ * - All leaf paths going through parent and node have a
+ * black node count that is 1 lower than other leaf paths.
+ */
+ sibling = parent->rb_right;
+ if (node != sibling) { /* node == parent->rb_left */
+ if (rb_is_red(sibling)) {
+ /*
+ * Case 1 - left rotate at parent
+ *
+ * P S
+ * / \ / \
+ * N s --> p Sr
+ * / \ / \
+ * Sl Sr N Sl
+ */
+ parent->rb_right = tmp1 = sibling->rb_left;
+ sibling->rb_left = parent;
+ rb_set_parent_color(tmp1, parent, RB_BLACK);
+ __rb_rotate_set_parents(parent, sibling, root,
+ RB_RED);
+ augment_rotate(parent, sibling);
+ sibling = tmp1;
+ }
+ tmp1 = sibling->rb_right;
+ if (!tmp1 || rb_is_black(tmp1)) {
+ tmp2 = sibling->rb_left;
+ if (!tmp2 || rb_is_black(tmp2)) {
+ /*
+ * Case 2 - sibling color flip
+ * (p could be either color here)
+ *
+ * (p) (p)
+ * / \ / \
+ * N S --> N s
+ * / \ / \
+ * Sl Sr Sl Sr
+ *
+ * This leaves us violating 5) which
+ * can be fixed by flipping p to black
+ * if it was red, or by recursing at p.
+ * p is red when coming from Case 1.
+ */
+ rb_set_parent_color(sibling, parent,
+ RB_RED);
+ if (rb_is_red(parent))
+ rb_set_black(parent);
+ else {
+ node = parent;
+ parent = rb_parent(node);
+ if (parent)
+ continue;
+ }
+ break;
+ }
+ /*
+ * Case 3 - right rotate at sibling
+ * (p could be either color here)
+ *
+ * (p) (p)
+ * / \ / \
+ * N S --> N Sl
+ * / \ \
+ * sl Sr s
+ * \
+ * Sr
+ */
+ sibling->rb_left = tmp1 = tmp2->rb_right;
+ tmp2->rb_right = sibling;
+ parent->rb_right = tmp2;
+ if (tmp1)
+ rb_set_parent_color(tmp1, sibling,
+ RB_BLACK);
+ augment_rotate(sibling, tmp2);
+ tmp1 = sibling;
+ sibling = tmp2;
+ }
+ /*
+ * Case 4 - left rotate at parent + color flips
+ * (p and sl could be either color here.
+ * After rotation, p becomes black, s acquires
+ * p's color, and sl keeps its color)
+ *
+ * (p) (s)
+ * / \ / \
+ * N S --> P Sr
+ * / \ / \
+ * (sl) sr N (sl)
+ */
+ parent->rb_right = tmp2 = sibling->rb_left;
+ sibling->rb_left = parent;
+ rb_set_parent_color(tmp1, sibling, RB_BLACK);
+ if (tmp2)
+ rb_set_parent(tmp2, parent);
+ __rb_rotate_set_parents(parent, sibling, root,
+ RB_BLACK);
+ augment_rotate(parent, sibling);
+ break;
+ } else {
+ sibling = parent->rb_left;
+ if (rb_is_red(sibling)) {
+ /* Case 1 - right rotate at parent */
+ parent->rb_left = tmp1 = sibling->rb_right;
+ sibling->rb_right = parent;
+ rb_set_parent_color(tmp1, parent, RB_BLACK);
+ __rb_rotate_set_parents(parent, sibling, root,
+ RB_RED);
+ augment_rotate(parent, sibling);
+ sibling = tmp1;
+ }
+ tmp1 = sibling->rb_left;
+ if (!tmp1 || rb_is_black(tmp1)) {
+ tmp2 = sibling->rb_right;
+ if (!tmp2 || rb_is_black(tmp2)) {
+ /* Case 2 - sibling color flip */
+ rb_set_parent_color(sibling, parent,
+ RB_RED);
+ if (rb_is_red(parent))
+ rb_set_black(parent);
+ else {
+ node = parent;
+ parent = rb_parent(node);
+ if (parent)
+ continue;
+ }
+ break;
+ }
+ /* Case 3 - right rotate at sibling */
+ sibling->rb_right = tmp1 = tmp2->rb_left;
+ tmp2->rb_left = sibling;
+ parent->rb_left = tmp2;
+ if (tmp1)
+ rb_set_parent_color(tmp1, sibling,
+ RB_BLACK);
+ augment_rotate(sibling, tmp2);
+ tmp1 = sibling;
+ sibling = tmp2;
+ }
+ /* Case 4 - left rotate at parent + color flips */
+ parent->rb_left = tmp2 = sibling->rb_right;
+ sibling->rb_right = parent;
+ rb_set_parent_color(tmp1, sibling, RB_BLACK);
+ if (tmp2)
+ rb_set_parent(tmp2, parent);
+ __rb_rotate_set_parents(parent, sibling, root,
+ RB_BLACK);
+ augment_rotate(parent, sibling);
+ break;
+ }
+ }
+}
+
+/* Non-inline version for rb_erase_augmented() use */
+void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+ ____rb_erase_color(parent, root, augment_rotate);
+}
+
+/*
+ * Non-augmented rbtree manipulation functions.
+ *
+ * We use dummy augmented callbacks here, and have the compiler optimize them
+ * out of the rb_insert_color() and rb_erase() function definitions.
+ */
+
+static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {}
+static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {}
+static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {}
+
+static const struct rb_augment_callbacks dummy_callbacks = {
+ dummy_propagate, dummy_copy, dummy_rotate
+};
+
+void rb_insert_color(struct rb_node *node, struct rb_root *root)
+{
+ __rb_insert(node, root, dummy_rotate);
+}
+
+void rb_erase(struct rb_node *node, struct rb_root *root)
+{
+ struct rb_node *rebalance;
+ rebalance = __rb_erase_augmented(node, root, &dummy_callbacks);
+ if (rebalance)
+ ____rb_erase_color(rebalance, root, dummy_rotate);
+}
+
+/*
+ * Augmented rbtree manipulation functions.
+ *
+ * This instantiates the same __always_inline functions as in the non-augmented
+ * case, but this time with user-defined callbacks.
+ */
+
+void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+ __rb_insert(node, root, augment_rotate);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+struct rb_node *rb_first(const struct rb_root *root)
+{
+ struct rb_node *n;
+
+ n = root->rb_node;
+ if (!n)
+ return NULL;
+ while (n->rb_left)
+ n = n->rb_left;
+ return n;
+}
+
+struct rb_node *rb_last(const struct rb_root *root)
+{
+ struct rb_node *n;
+
+ n = root->rb_node;
+ if (!n)
+ return NULL;
+ while (n->rb_right)
+ n = n->rb_right;
+ return n;
+}
+
+struct rb_node *rb_next(const struct rb_node *node)
+{
+ struct rb_node *parent;
+
+ if (RB_EMPTY_NODE(node))
+ return NULL;
+
+ /*
+ * If we have a right-hand child, go down and then left as far
+ * as we can.
+ */
+ if (node->rb_right) {
+ node = node->rb_right;
+ while (node->rb_left)
+ node=node->rb_left;
+ return (struct rb_node *)node;
+ }
+
+ /*
+ * No right-hand children. Everything down and left is smaller than us,
+ * so any 'next' node must be in the general direction of our parent.
+ * Go up the tree; any time the ancestor is a right-hand child of its
+ * parent, keep going up. First time it's a left-hand child of its
+ * parent, said parent is our 'next' node.
+ */
+ while ((parent = rb_parent(node)) && node == parent->rb_right)
+ node = parent;
+
+ return parent;
+}
+
+struct rb_node *rb_prev(const struct rb_node *node)
+{
+ struct rb_node *parent;
+
+ if (RB_EMPTY_NODE(node))
+ return NULL;
+
+ /*
+ * If we have a left-hand child, go down and then right as far
+ * as we can.
+ */
+ if (node->rb_left) {
+ node = node->rb_left;
+ while (node->rb_right)
+ node=node->rb_right;
+ return (struct rb_node *)node;
+ }
+
+ /*
+ * No left-hand children. Go up till we find an ancestor which
+ * is a right-hand child of its parent.
+ */
+ while ((parent = rb_parent(node)) && node == parent->rb_left)
+ node = parent;
+
+ return parent;
+}
+
+void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+ struct rb_root *root)
+{
+ struct rb_node *parent = rb_parent(victim);
+
+ /* Set the surrounding nodes to point to the replacement */
+ __rb_change_child(victim, new, parent, root);
+ if (victim->rb_left)
+ rb_set_parent(victim->rb_left, new);
+ if (victim->rb_right)
+ rb_set_parent(victim->rb_right, new);
+
+ /* Copy the pointers/colour from the victim to the replacement */
+ *new = *victim;
+}
+
+static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
+{
+ for (;;) {
+ if (node->rb_left)
+ node = node->rb_left;
+ else if (node->rb_right)
+ node = node->rb_right;
+ else
+ return (struct rb_node *)node;
+ }
+}
+
+struct rb_node *rb_next_postorder(const struct rb_node *node)
+{
+ const struct rb_node *parent;
+ if (!node)
+ return NULL;
+ parent = rb_parent(node);
+
+ /* If we're sitting on node, we've already seen our children */
+ if (parent && node == parent->rb_left && parent->rb_right) {
+ /* If we are the parent's left node, go to the parent's right
+ * node then all the way down to the left */
+ return rb_left_deepest_node(parent->rb_right);
+ } else
+ /* Otherwise we are the parent's right node, and the parent
+ * should be next */
+ return (struct rb_node *)parent;
+}
+
+struct rb_node *rb_first_postorder(const struct rb_root *root)
+{
+ if (!root->rb_node)
+ return NULL;
+
+ return rb_left_deepest_node(root->rb_node);
+}
diff --git a/rbtree.h b/rbtree.h
new file mode 100644
index 00000000..47b662a3
--- /dev/null
+++ b/rbtree.h
@@ -0,0 +1,118 @@
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea@suse.de>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ linux/include/linux/rbtree.h
+
+ To use rbtrees you'll have to implement your own insert and search cores.
+ This will avoid us to use callbacks and to drop drammatically performances.
+ I know it's not the cleaner way, but in C (not in C++) to get
+ performances and genericity...
+
+ See Documentation/rbtree.txt for documentation and samples.
+*/
+
+#ifndef _LINUX_RBTREE_H
+#define _LINUX_RBTREE_H
+#if BTRFS_FLAT_INCLUDES
+#include "kerncompat.h"
+#else
+#include <btrfs/kerncompat.h>
+#endif /* BTRFS_FLAT_INCLUDES */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rb_node {
+ unsigned long __rb_parent_color;
+ struct rb_node *rb_right;
+ struct rb_node *rb_left;
+} __attribute__((aligned(sizeof(long))));
+ /* The alignment might seem pointless, but allegedly CRIS needs it */
+
+struct rb_root {
+ struct rb_node *rb_node;
+};
+
+
+#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3))
+
+#define RB_ROOT (struct rb_root) { NULL, }
+#define rb_entry(ptr, type, member) container_of(ptr, type, member)
+
+#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
+
+/* 'empty' nodes are nodes that are known not to be inserted in an rtbree */
+#define RB_EMPTY_NODE(node) \
+ ((node)->__rb_parent_color == (unsigned long)(node))
+#define RB_CLEAR_NODE(node) \
+ ((node)->__rb_parent_color = (unsigned long)(node))
+
+
+extern void rb_insert_color(struct rb_node *, struct rb_root *);
+extern void rb_erase(struct rb_node *, struct rb_root *);
+
+
+/* Find logical next and previous nodes in a tree */
+extern struct rb_node *rb_next(const struct rb_node *);
+extern struct rb_node *rb_prev(const struct rb_node *);
+extern struct rb_node *rb_first(const struct rb_root *);
+extern struct rb_node *rb_last(const struct rb_root *);
+
+/* Postorder iteration - always visit the parent after its children */
+extern struct rb_node *rb_first_postorder(const struct rb_root *);
+extern struct rb_node *rb_next_postorder(const struct rb_node *);
+
+/* Fast replacement of a single node without remove/rebalance/add/rebalance */
+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new_node,
+ struct rb_root *root);
+
+static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
+ struct rb_node ** rb_link)
+{
+ node->__rb_parent_color = (unsigned long)parent;
+ node->rb_left = node->rb_right = NULL;
+
+ *rb_link = node;
+}
+
+#define rb_entry_safe(ptr, type, member) \
+ ({ typeof(ptr) ____ptr = (ptr); \
+ ____ptr ? rb_entry(____ptr, type, member) : NULL; \
+ })
+
+/**
+ * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of
+ * given type safe against removal of rb_node entry
+ *
+ * @pos: the 'type *' to use as a loop cursor.
+ * @n: another 'type *' to use as temporary storage
+ * @root: 'rb_root *' of the rbtree.
+ * @field: the name of the rb_node field within 'type'.
+ */
+#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \
+ for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \
+ pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \
+ typeof(*pos), field); 1; }); \
+ pos = n)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LINUX_RBTREE_H */
diff --git a/rbtree_augmented.h b/rbtree_augmented.h
new file mode 100644
index 00000000..5d269784
--- /dev/null
+++ b/rbtree_augmented.h
@@ -0,0 +1,249 @@
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea@suse.de>
+ (C) 2002 David Woodhouse <dwmw2@infradead.org>
+ (C) 2012 Michel Lespinasse <walken@google.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ linux/include/linux/rbtree_augmented.h
+*/
+
+#ifndef _LINUX_RBTREE_AUGMENTED_H
+#define _LINUX_RBTREE_AUGMENTED_H
+
+#include "rbtree.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Please note - only struct rb_augment_callbacks and the prototypes for
+ * rb_insert_augmented() and rb_erase_augmented() are intended to be public.
+ * The rest are implementation details you are not expected to depend on.
+ *
+ * See Documentation/rbtree.txt for documentation and samples.
+ */
+
+struct rb_augment_callbacks {
+ void (*propagate)(struct rb_node *node, struct rb_node *stop);
+ void (*copy)(struct rb_node *old, struct rb_node *new);
+ void (*rotate)(struct rb_node *old, struct rb_node *new);
+};
+
+extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+/*
+ * Fixup the rbtree and update the augmented information when rebalancing.
+ *
+ * On insertion, the user must update the augmented information on the path
+ * leading to the inserted node, then call rb_link_node() as usual and
+ * rb_augment_inserted() instead of the usual rb_insert_color() call.
+ * If rb_augment_inserted() rebalances the rbtree, it will callback into
+ * a user provided function to update the augmented information on the
+ * affected subtrees.
+ */
+static inline void
+rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+ const struct rb_augment_callbacks *augment)
+{
+ __rb_insert_augmented(node, root, augment->rotate);
+}
+
+#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \
+ rbtype, rbaugmented, rbcompute) \
+static inline void \
+rbname ## _propagate(struct rb_node *rb, struct rb_node *stop) \
+{ \
+ while (rb != stop) { \
+ rbstruct *node = rb_entry(rb, rbstruct, rbfield); \
+ rbtype augmented = rbcompute(node); \
+ if (node->rbaugmented == augmented) \
+ break; \
+ node->rbaugmented = augmented; \
+ rb = rb_parent(&node->rbfield); \
+ } \
+} \
+static inline void \
+rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \
+{ \
+ rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \
+ rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \
+ new->rbaugmented = old->rbaugmented; \
+} \
+static void \
+rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \
+{ \
+ rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \
+ rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \
+ new->rbaugmented = old->rbaugmented; \
+ old->rbaugmented = rbcompute(old); \
+} \
+rbstatic const struct rb_augment_callbacks rbname = { \
+ rbname ## _propagate, rbname ## _copy, rbname ## _rotate \
+};
+
+
+#define RB_RED 0
+#define RB_BLACK 1
+
+#define __rb_parent(pc) ((struct rb_node *)(pc & ~3))
+
+#define __rb_color(pc) ((pc) & 1)
+#define __rb_is_black(pc) __rb_color(pc)
+#define __rb_is_red(pc) (!__rb_color(pc))
+#define rb_color(rb) __rb_color((rb)->__rb_parent_color)
+#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color)
+#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color)
+
+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
+{
+ rb->__rb_parent_color = rb_color(rb) | (unsigned long)p;
+}
+
+static inline void rb_set_parent_color(struct rb_node *rb,
+ struct rb_node *p, int color)
+{
+ rb->__rb_parent_color = (unsigned long)p | color;
+}
+
+static inline void
+__rb_change_child(struct rb_node *old, struct rb_node *new,
+ struct rb_node *parent, struct rb_root *root)
+{
+ if (parent) {
+ if (parent->rb_left == old)
+ parent->rb_left = new;
+ else
+ parent->rb_right = new;
+ } else
+ root->rb_node = new;
+}
+
+extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+
+static __always_inline struct rb_node *
+__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+ const struct rb_augment_callbacks *augment)
+{
+ struct rb_node *child = node->rb_right, *tmp = node->rb_left;
+ struct rb_node *parent, *rebalance;
+ unsigned long pc;
+
+ if (!tmp) {
+ /*
+ * Case 1: node to erase has no more than 1 child (easy!)
+ *
+ * Note that if there is one child it must be red due to 5)
+ * and node must be black due to 4). We adjust colors locally
+ * so as to bypass __rb_erase_color() later on.
+ */
+ pc = node->__rb_parent_color;
+ parent = __rb_parent(pc);
+ __rb_change_child(node, child, parent, root);
+ if (child) {
+ child->__rb_parent_color = pc;
+ rebalance = NULL;
+ } else
+ rebalance = __rb_is_black(pc) ? parent : NULL;
+ tmp = parent;
+ } else if (!child) {
+ /* Still case 1, but this time the child is node->rb_left */
+ tmp->__rb_parent_color = pc = node->__rb_parent_color;
+ parent = __rb_parent(pc);
+ __rb_change_child(node, tmp, parent, root);
+ rebalance = NULL;
+ tmp = parent;
+ } else {
+ struct rb_node *successor = child, *child2;
+ tmp = child->rb_left;
+ if (!tmp) {
+ /*
+ * Case 2: node's successor is its right child
+ *
+ * (n) (s)
+ * / \ / \
+ * (x) (s) -> (x) (c)
+ * \
+ * (c)
+ */
+ parent = successor;
+ child2 = successor->rb_right;
+ augment->copy(node, successor);
+ } else {
+ /*
+ * Case 3: node's successor is leftmost under
+ * node's right child subtree
+ *
+ * (n) (s)
+ * / \ / \
+ * (x) (y) -> (x) (y)
+ * / /
+ * (p) (p)
+ * / /
+ * (s) (c)
+ * \
+ * (c)
+ */
+ do {
+ parent = successor;
+ successor = tmp;
+ tmp = tmp->rb_left;
+ } while (tmp);
+ parent->rb_left = child2 = successor->rb_right;
+ successor->rb_right = child;
+ rb_set_parent(child, successor);
+ augment->copy(node, successor);
+ augment->propagate(parent, successor);
+ }
+
+ successor->rb_left = tmp = node->rb_left;
+ rb_set_parent(tmp, successor);
+
+ pc = node->__rb_parent_color;
+ tmp = __rb_parent(pc);
+ __rb_change_child(node, successor, tmp, root);
+ if (child2) {
+ successor->__rb_parent_color = pc;
+ rb_set_parent_color(child2, parent, RB_BLACK);
+ rebalance = NULL;
+ } else {
+ unsigned long pc2 = successor->__rb_parent_color;
+ successor->__rb_parent_color = pc;
+ rebalance = __rb_is_black(pc2) ? parent : NULL;
+ }
+ tmp = successor;
+ }
+
+ augment->propagate(tmp, NULL);
+ return rebalance;
+}
+
+static __always_inline void
+rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+ const struct rb_augment_callbacks *augment)
+{
+ struct rb_node *rebalance = __rb_erase_augmented(node, root, augment);
+ if (rebalance)
+ __rb_erase_color(rebalance, root, augment->rotate);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LINUX_RBTREE_AUGMENTED_H */
diff --git a/repair.c b/repair.c
new file mode 100644
index 00000000..4f747421
--- /dev/null
+++ b/repair.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2012 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "ctree.h"
+#include "extent-cache.h"
+#include "utils.h"
+#include "repair.h"
+
+int btrfs_add_corrupt_extent_record(struct btrfs_fs_info *info,
+ struct btrfs_key *first_key,
+ u64 start, u64 len, int level)
+
+{
+ int ret = 0;
+ struct btrfs_corrupt_block *corrupt;
+
+ if (!info->corrupt_blocks)
+ return 0;
+
+ corrupt = malloc(sizeof(*corrupt));
+ if (!corrupt)
+ return -ENOMEM;
+
+ memcpy(&corrupt->key, first_key, sizeof(*first_key));
+ corrupt->cache.start = start;
+ corrupt->cache.size = len;
+ corrupt->level = level;
+
+ ret = insert_cache_extent(info->corrupt_blocks, &corrupt->cache);
+ if (ret)
+ free(corrupt);
+ BUG_ON(ret && ret != -EEXIST);
+ return ret;
+}
+
diff --git a/repair.h b/repair.h
new file mode 100644
index 00000000..3fc0e8b6
--- /dev/null
+++ b/repair.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2012 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_REPAIR_H__
+#define __BTRFS_REPAIR_H__
+
+#include "ctree.h"
+
+struct btrfs_corrupt_block {
+ struct cache_extent cache;
+ struct btrfs_key key;
+ int level;
+};
+
+int btrfs_add_corrupt_extent_record(struct btrfs_fs_info *info,
+ struct btrfs_key *first_key,
+ u64 start, u64 len, int level);
+
+#endif
diff --git a/root-tree.c b/root-tree.c
new file mode 100644
index 00000000..934d02ef
--- /dev/null
+++ b/root-tree.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "ctree.h"
+#include "transaction.h"
+#include "disk-io.h"
+#include "print-tree.h"
+
+int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
+ struct btrfs_root_item *item, struct btrfs_key *key)
+{
+ struct btrfs_path *path;
+ struct btrfs_key search_key;
+ struct btrfs_key found_key;
+ struct extent_buffer *l;
+ int ret;
+ int slot;
+
+ search_key.objectid = objectid;
+ search_key.type = BTRFS_ROOT_ITEM_KEY;
+ search_key.offset = (u64)-1;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+ ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (path->slots[0] == 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ BUG_ON(ret == 0);
+ l = path->nodes[0];
+ slot = path->slots[0] - 1;
+ btrfs_item_key_to_cpu(l, &found_key, slot);
+ if (found_key.objectid != objectid) {
+ ret = -ENOENT;
+ goto out;
+ }
+ read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
+ sizeof(*item));
+ memcpy(key, &found_key, sizeof(found_key));
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *key, struct btrfs_root_item
+ *item)
+{
+ struct btrfs_path *path;
+ struct extent_buffer *l;
+ int ret;
+ int slot;
+ unsigned long ptr;
+ u32 old_len;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+ ret = btrfs_search_slot(trans, root, key, path, 0, 1);
+ if (ret < 0)
+ goto out;
+ BUG_ON(ret != 0);
+ l = path->nodes[0];
+ slot = path->slots[0];
+ ptr = btrfs_item_ptr_offset(l, slot);
+ old_len = btrfs_item_size_nr(l, slot);
+
+ /*
+ * If this is the first time we update the root item which originated
+ * from an older kernel, we need to enlarge the item size to make room
+ * for the added fields.
+ */
+ if (old_len < sizeof(*item)) {
+ btrfs_release_path(path);
+ ret = btrfs_search_slot(trans, root, key, path,
+ -1, 1);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = btrfs_del_item(trans, root, path);
+ if (ret < 0) {
+ goto out;
+ }
+ btrfs_release_path(path);
+ ret = btrfs_insert_empty_item(trans, root, path,
+ key, sizeof(*item));
+ if (ret < 0) {
+ goto out;
+ }
+ l = path->nodes[0];
+ slot = path->slots[0];
+ ptr = btrfs_item_ptr_offset(l, slot);
+ }
+
+ /*
+ * Update generation_v2 so at the next mount we know the new root
+ * fields are valid.
+ */
+ btrfs_set_root_generation_v2(item, btrfs_root_generation(item));
+
+ write_extent_buffer(l, item, ptr, sizeof(*item));
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *key, struct btrfs_root_item
+ *item)
+{
+ int ret;
+
+ /*
+ * Make sure generation v1 and v2 match. See update_root for details.
+ */
+ btrfs_set_root_generation_v2(item, btrfs_root_generation(item));
+ ret = btrfs_insert_item(trans, root, key, item, sizeof(*item));
+ return ret;
+}
+
+/*
+ * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY
+ * or BTRFS_ROOT_BACKREF_KEY.
+ *
+ * The dirid, sequence, name and name_len refer to the directory entry
+ * that is referencing the root.
+ *
+ * For a forward ref, the root_id is the id of the tree referencing
+ * the root and ref_id is the id of the subvol or snapshot.
+ *
+ * For a back ref the root_id is the id of the subvol or snapshot and
+ * ref_id is the id of the tree referencing it.
+ */
+int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *tree_root,
+ u64 root_id, u8 type, u64 ref_id,
+ u64 dirid, u64 sequence,
+ const char *name, int name_len)
+{
+ struct btrfs_key key;
+ int ret;
+ struct btrfs_path *path;
+ struct btrfs_root_ref *ref;
+ struct extent_buffer *leaf;
+ unsigned long ptr;
+
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = root_id;
+ key.type = type;
+ key.offset = ref_id;
+
+ ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
+ sizeof(*ref) + name_len);
+ BUG_ON(ret);
+
+ leaf = path->nodes[0];
+ ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
+ btrfs_set_root_ref_dirid(leaf, ref, dirid);
+ btrfs_set_root_ref_sequence(leaf, ref, sequence);
+ btrfs_set_root_ref_name_len(leaf, ref, name_len);
+ ptr = (unsigned long)(ref + 1);
+ write_extent_buffer(leaf, name, ptr, name_len);
+ btrfs_mark_buffer_dirty(leaf);
+
+ btrfs_free_path(path);
+ return ret;
+}
diff --git a/send-stream.c b/send-stream.c
new file mode 100644
index 00000000..66c04884
--- /dev/null
+++ b/send-stream.c
@@ -0,0 +1,499 @@
+/*
+ * Copyright (C) 2012 Alexander Block. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <uuid/uuid.h>
+#include <unistd.h>
+
+#include "send.h"
+#include "send-stream.h"
+#include "crc32c.h"
+
+struct btrfs_send_stream {
+ int fd;
+ char read_buf[BTRFS_SEND_BUF_SIZE];
+
+ int cmd;
+ struct btrfs_cmd_header *cmd_hdr;
+ struct btrfs_tlv_header *cmd_attrs[BTRFS_SEND_A_MAX + 1];
+ u32 version;
+
+ struct btrfs_send_ops *ops;
+ void *user;
+};
+
+static int read_buf(struct btrfs_send_stream *s, void *buf, int len)
+{
+ int ret;
+ int pos = 0;
+
+ while (pos < len) {
+ ret = read(s->fd, (char*)buf + pos, len - pos);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr, "ERROR: read from stream failed. %s\n",
+ strerror(-ret));
+ goto out;
+ }
+ if (ret == 0) {
+ ret = 1;
+ goto out;
+ }
+ pos += ret;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/*
+ * Reads a single command from kernel space and decodes the TLV's into
+ * s->cmd_attrs
+ */
+static int read_cmd(struct btrfs_send_stream *s)
+{
+ int ret;
+ int cmd;
+ int cmd_len;
+ int tlv_type;
+ int tlv_len;
+ char *data;
+ int pos;
+ struct btrfs_tlv_header *tlv_hdr;
+ u32 crc;
+ u32 crc2;
+
+ memset(s->cmd_attrs, 0, sizeof(s->cmd_attrs));
+
+ ret = read_buf(s, s->read_buf, sizeof(*s->cmd_hdr));
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ ret = -EINVAL;
+ fprintf(stderr, "ERROR: unexpected EOF in stream.\n");
+ goto out;
+ }
+
+ s->cmd_hdr = (struct btrfs_cmd_header *)s->read_buf;
+ cmd = le16_to_cpu(s->cmd_hdr->cmd);
+ cmd_len = le32_to_cpu(s->cmd_hdr->len);
+
+ data = s->read_buf + sizeof(*s->cmd_hdr);
+ ret = read_buf(s, data, cmd_len);
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ ret = -EINVAL;
+ fprintf(stderr, "ERROR: unexpected EOF in stream.\n");
+ goto out;
+ }
+
+ crc = le32_to_cpu(s->cmd_hdr->crc);
+ s->cmd_hdr->crc = 0;
+
+ crc2 = crc32c(0, (unsigned char*)s->read_buf,
+ sizeof(*s->cmd_hdr) + cmd_len);
+
+ if (crc != crc2) {
+ ret = -EINVAL;
+ fprintf(stderr, "ERROR: crc32 mismatch in command.\n");
+ goto out;
+ }
+
+ pos = 0;
+ while (pos < cmd_len) {
+ tlv_hdr = (struct btrfs_tlv_header *)data;
+ tlv_type = le16_to_cpu(tlv_hdr->tlv_type);
+ tlv_len = le16_to_cpu(tlv_hdr->tlv_len);
+
+ if (tlv_type <= 0 || tlv_type > BTRFS_SEND_A_MAX ||
+ tlv_len < 0 || tlv_len > BTRFS_SEND_BUF_SIZE) {
+ fprintf(stderr, "ERROR: invalid tlv in cmd. "
+ "tlv_type = %d, tlv_len = %d\n",
+ tlv_type, tlv_len);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ s->cmd_attrs[tlv_type] = tlv_hdr;
+
+ data += sizeof(*tlv_hdr) + tlv_len;
+ pos += sizeof(*tlv_hdr) + tlv_len;
+ }
+
+ s->cmd = cmd;
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int tlv_get(struct btrfs_send_stream *s, int attr, void **data, int *len)
+{
+ int ret;
+ struct btrfs_tlv_header *h;
+
+ if (attr <= 0 || attr > BTRFS_SEND_A_MAX) {
+ fprintf(stderr, "ERROR: invalid attribute requested. "
+ "attr = %d\n",
+ attr);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ h = s->cmd_attrs[attr];
+ if (!h) {
+ fprintf(stderr, "ERROR: attribute %d requested "
+ "but not present.\n", attr);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ *len = le16_to_cpu(h->tlv_len);
+ *data = h + 1;
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+#define __TLV_GOTO_FAIL(expr) \
+ if ((ret = expr) < 0) \
+ goto tlv_get_failed;
+
+#define __TLV_DO_WHILE_GOTO_FAIL(expr) \
+ do { \
+ __TLV_GOTO_FAIL(expr) \
+ } while (0)
+
+
+#define TLV_GET(s, attr, data, len) \
+ __TLV_DO_WHILE_GOTO_FAIL(tlv_get(s, attr, data, len))
+
+#define TLV_CHECK_LEN(expected, got) \
+ do { \
+ if (expected != got) { \
+ fprintf(stderr, "ERROR: invalid size for attribute. " \
+ "expected = %d, got = %d\n", \
+ (int)expected, (int)got); \
+ ret = -EINVAL; \
+ goto tlv_get_failed; \
+ } \
+ } while (0)
+
+#define TLV_GET_INT(s, attr, bits, v) \
+ do { \
+ __le##bits *__tmp; \
+ int __len; \
+ TLV_GET(s, attr, (void**)&__tmp, &__len); \
+ TLV_CHECK_LEN(sizeof(*__tmp), __len); \
+ *v = get_unaligned_le##bits(__tmp); \
+ } while (0)
+
+#define TLV_GET_U8(s, attr, v) TLV_GET_INT(s, attr, 8, v)
+#define TLV_GET_U16(s, attr, v) TLV_GET_INT(s, attr, 16, v)
+#define TLV_GET_U32(s, attr, v) TLV_GET_INT(s, attr, 32, v)
+#define TLV_GET_U64(s, attr, v) TLV_GET_INT(s, attr, 64, v)
+
+static int tlv_get_string(struct btrfs_send_stream *s, int attr, char **str)
+{
+ int ret;
+ void *data;
+ int len = 0;
+
+ TLV_GET(s, attr, &data, &len);
+
+ *str = malloc(len + 1);
+ if (!*str)
+ return -ENOMEM;
+
+ memcpy(*str, data, len);
+ (*str)[len] = 0;
+ ret = 0;
+
+tlv_get_failed:
+ return ret;
+}
+#define TLV_GET_STRING(s, attr, str) \
+ __TLV_DO_WHILE_GOTO_FAIL(tlv_get_string(s, attr, str))
+
+static int tlv_get_timespec(struct btrfs_send_stream *s,
+ int attr, struct timespec *ts)
+{
+ int ret;
+ int len;
+ struct btrfs_timespec *bts;
+
+ TLV_GET(s, attr, (void**)&bts, &len);
+ TLV_CHECK_LEN(sizeof(*bts), len);
+
+ ts->tv_sec = le64_to_cpu(bts->sec);
+ ts->tv_nsec = le32_to_cpu(bts->nsec);
+ ret = 0;
+
+tlv_get_failed:
+ return ret;
+}
+#define TLV_GET_TIMESPEC(s, attr, ts) \
+ __TLV_DO_WHILE_GOTO_FAIL(tlv_get_timespec(s, attr, ts))
+
+static int tlv_get_uuid(struct btrfs_send_stream *s, int attr, u8 *uuid)
+{
+ int ret;
+ int len;
+ void *data;
+
+ TLV_GET(s, attr, &data, &len);
+ TLV_CHECK_LEN(BTRFS_UUID_SIZE, len);
+ memcpy(uuid, data, BTRFS_UUID_SIZE);
+
+ ret = 0;
+
+tlv_get_failed:
+ return ret;
+}
+#define TLV_GET_UUID(s, attr, uuid) \
+ __TLV_DO_WHILE_GOTO_FAIL(tlv_get_uuid(s, attr, uuid))
+
+static int read_and_process_cmd(struct btrfs_send_stream *s)
+{
+ int ret;
+ char *path = NULL;
+ char *path_to = NULL;
+ char *clone_path = NULL;
+ char *xattr_name = NULL;
+ void *xattr_data = NULL;
+ void *data = NULL;
+ struct timespec at;
+ struct timespec ct;
+ struct timespec mt;
+ u8 uuid[BTRFS_UUID_SIZE];
+ u8 clone_uuid[BTRFS_UUID_SIZE];
+ u64 tmp;
+ u64 tmp2;
+ u64 ctransid;
+ u64 clone_ctransid;
+ u64 mode;
+ u64 dev;
+ u64 clone_offset;
+ u64 offset;
+ int len;
+ int xattr_len;
+
+ ret = read_cmd(s);
+ if (ret)
+ goto out;
+
+ switch (s->cmd) {
+ case BTRFS_SEND_C_SUBVOL:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ TLV_GET_UUID(s, BTRFS_SEND_A_UUID, uuid);
+ TLV_GET_U64(s, BTRFS_SEND_A_CTRANSID, &ctransid);
+ ret = s->ops->subvol(path, uuid, ctransid, s->user);
+ break;
+ case BTRFS_SEND_C_SNAPSHOT:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ TLV_GET_UUID(s, BTRFS_SEND_A_UUID, uuid);
+ TLV_GET_U64(s, BTRFS_SEND_A_CTRANSID, &ctransid);
+ TLV_GET_UUID(s, BTRFS_SEND_A_CLONE_UUID, clone_uuid);
+ TLV_GET_U64(s, BTRFS_SEND_A_CLONE_CTRANSID, &clone_ctransid);
+ ret = s->ops->snapshot(path, uuid, ctransid, clone_uuid,
+ clone_ctransid, s->user);
+ break;
+ case BTRFS_SEND_C_MKFILE:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ ret = s->ops->mkfile(path, s->user);
+ break;
+ case BTRFS_SEND_C_MKDIR:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ ret = s->ops->mkdir(path, s->user);
+ break;
+ case BTRFS_SEND_C_MKNOD:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ TLV_GET_U64(s, BTRFS_SEND_A_MODE, &mode);
+ TLV_GET_U64(s, BTRFS_SEND_A_RDEV, &dev);
+ ret = s->ops->mknod(path, mode, dev, s->user);
+ break;
+ case BTRFS_SEND_C_MKFIFO:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ ret = s->ops->mkfifo(path, s->user);
+ break;
+ case BTRFS_SEND_C_MKSOCK:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ ret = s->ops->mksock(path, s->user);
+ break;
+ case BTRFS_SEND_C_SYMLINK:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH_LINK, &path_to);
+ ret = s->ops->symlink(path, path_to, s->user);
+ break;
+ case BTRFS_SEND_C_RENAME:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH_TO, &path_to);
+ ret = s->ops->rename(path, path_to, s->user);
+ break;
+ case BTRFS_SEND_C_LINK:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH_LINK, &path_to);
+ ret = s->ops->link(path, path_to, s->user);
+ break;
+ case BTRFS_SEND_C_UNLINK:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ ret = s->ops->unlink(path, s->user);
+ break;
+ case BTRFS_SEND_C_RMDIR:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ ret = s->ops->rmdir(path, s->user);
+ break;
+ case BTRFS_SEND_C_WRITE:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ TLV_GET_U64(s, BTRFS_SEND_A_FILE_OFFSET, &offset);
+ TLV_GET(s, BTRFS_SEND_A_DATA, &data, &len);
+ ret = s->ops->write(path, data, offset, len, s->user);
+ break;
+ case BTRFS_SEND_C_CLONE:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ TLV_GET_U64(s, BTRFS_SEND_A_FILE_OFFSET, &offset);
+ TLV_GET_U64(s, BTRFS_SEND_A_CLONE_LEN, &len);
+ TLV_GET_UUID(s, BTRFS_SEND_A_CLONE_UUID, clone_uuid);
+ TLV_GET_U64(s, BTRFS_SEND_A_CLONE_CTRANSID, &clone_ctransid);
+ TLV_GET_STRING(s, BTRFS_SEND_A_CLONE_PATH, &clone_path);
+ TLV_GET_U64(s, BTRFS_SEND_A_CLONE_OFFSET, &clone_offset);
+ ret = s->ops->clone(path, offset, len, clone_uuid,
+ clone_ctransid, clone_path, clone_offset,
+ s->user);
+ break;
+ case BTRFS_SEND_C_SET_XATTR:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ TLV_GET_STRING(s, BTRFS_SEND_A_XATTR_NAME, &xattr_name);
+ TLV_GET(s, BTRFS_SEND_A_XATTR_DATA, &xattr_data, &xattr_len);
+ ret = s->ops->set_xattr(path, xattr_name, xattr_data,
+ xattr_len, s->user);
+ break;
+ case BTRFS_SEND_C_REMOVE_XATTR:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ TLV_GET_STRING(s, BTRFS_SEND_A_XATTR_NAME, &xattr_name);
+ ret = s->ops->remove_xattr(path, xattr_name, s->user);
+ break;
+ case BTRFS_SEND_C_TRUNCATE:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ TLV_GET_U64(s, BTRFS_SEND_A_SIZE, &tmp);
+ ret = s->ops->truncate(path, tmp, s->user);
+ break;
+ case BTRFS_SEND_C_CHMOD:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ TLV_GET_U64(s, BTRFS_SEND_A_MODE, &tmp);
+ ret = s->ops->chmod(path, tmp, s->user);
+ break;
+ case BTRFS_SEND_C_CHOWN:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ TLV_GET_U64(s, BTRFS_SEND_A_UID, &tmp);
+ TLV_GET_U64(s, BTRFS_SEND_A_GID, &tmp2);
+ ret = s->ops->chown(path, tmp, tmp2, s->user);
+ break;
+ case BTRFS_SEND_C_UTIMES:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ TLV_GET_TIMESPEC(s, BTRFS_SEND_A_ATIME, &at);
+ TLV_GET_TIMESPEC(s, BTRFS_SEND_A_MTIME, &mt);
+ TLV_GET_TIMESPEC(s, BTRFS_SEND_A_CTIME, &ct);
+ ret = s->ops->utimes(path, &at, &mt, &ct, s->user);
+ break;
+ case BTRFS_SEND_C_UPDATE_EXTENT:
+ TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path);
+ TLV_GET_U64(s, BTRFS_SEND_A_FILE_OFFSET, &offset);
+ TLV_GET_U64(s, BTRFS_SEND_A_SIZE, &tmp);
+ ret = s->ops->update_extent(path, offset, tmp, s->user);
+ break;
+ case BTRFS_SEND_C_END:
+ ret = 1;
+ break;
+ }
+
+tlv_get_failed:
+out:
+ free(path);
+ free(path_to);
+ free(clone_path);
+ free(xattr_name);
+ return ret;
+}
+
+/*
+ * If max_errors is 0, then don't stop processing the stream if one of the
+ * callbacks in btrfs_send_ops structure returns an error. If greater than
+ * zero, stop after max_errors errors happened.
+ */
+int btrfs_read_and_process_send_stream(int fd,
+ struct btrfs_send_ops *ops, void *user,
+ int honor_end_cmd,
+ u64 max_errors)
+{
+ int ret;
+ struct btrfs_send_stream s;
+ struct btrfs_stream_header hdr;
+ u64 errors = 0;
+ int last_err = 0;
+
+ s.fd = fd;
+ s.ops = ops;
+ s.user = user;
+
+ ret = read_buf(&s, &hdr, sizeof(hdr));
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ ret = 1;
+ goto out;
+ }
+
+ if (strcmp(hdr.magic, BTRFS_SEND_STREAM_MAGIC)) {
+ ret = -EINVAL;
+ fprintf(stderr, "ERROR: Unexpected header\n");
+ goto out;
+ }
+
+ s.version = le32_to_cpu(hdr.version);
+ if (s.version > BTRFS_SEND_STREAM_VERSION) {
+ ret = -EINVAL;
+ fprintf(stderr, "ERROR: Stream version %d not supported. "
+ "Please upgrade btrfs-progs\n", s.version);
+ goto out;
+ }
+
+ while (1) {
+ ret = read_and_process_cmd(&s);
+ if (ret < 0) {
+ last_err = ret;
+ errors++;
+ if (max_errors > 0 && errors >= max_errors)
+ goto out;
+ } else if (ret > 0) {
+ if (!honor_end_cmd)
+ ret = 0;
+ goto out;
+ }
+ }
+
+out:
+ if (last_err && !ret)
+ ret = last_err;
+
+ return ret;
+}
diff --git a/send-stream.h b/send-stream.h
new file mode 100644
index 00000000..39901f86
--- /dev/null
+++ b/send-stream.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2012 Alexander Block. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_SEND_STREAM_H__
+#define __BTRFS_SEND_STREAM_H__
+
+/*
+ * NOTE: this file is public API, any incompatible change has to update
+ * library version
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if BTRFS_FLAT_INCLUDES
+#include "kerncompat.h"
+#else
+#include <btrfs/kerncompat.h>
+#endif /* BTRFS_FLAT_INCLUDES */
+
+struct btrfs_send_ops {
+ int (*subvol)(const char *path, const u8 *uuid, u64 ctransid,
+ void *user);
+ int (*snapshot)(const char *path, const u8 *uuid, u64 ctransid,
+ const u8 *parent_uuid, u64 parent_ctransid,
+ void *user);
+ int (*mkfile)(const char *path, void *user);
+ int (*mkdir)(const char *path, void *user);
+ int (*mknod)(const char *path, u64 mode, u64 dev, void *user);
+ int (*mkfifo)(const char *path, void *user);
+ int (*mksock)(const char *path, void *user);
+ int (*symlink)(const char *path, const char *lnk, void *user);
+ int (*rename)(const char *from, const char *to, void *user);
+ int (*link)(const char *path, const char *lnk, void *user);
+ int (*unlink)(const char *path, void *user);
+ int (*rmdir)(const char *path, void *user);
+ int (*write)(const char *path, const void *data, u64 offset, u64 len,
+ void *user);
+ int (*clone)(const char *path, u64 offset, u64 len,
+ const u8 *clone_uuid, u64 clone_ctransid,
+ const char *clone_path, u64 clone_offset,
+ void *user);
+ int (*set_xattr)(const char *path, const char *name, const void *data,
+ int len, void *user);
+ int (*remove_xattr)(const char *path, const char *name, void *user);
+ int (*truncate)(const char *path, u64 size, void *user);
+ int (*chmod)(const char *path, u64 mode, void *user);
+ int (*chown)(const char *path, u64 uid, u64 gid, void *user);
+ int (*utimes)(const char *path, struct timespec *at,
+ struct timespec *mt, struct timespec *ct,
+ void *user);
+ int (*update_extent)(const char *path, u64 offset, u64 len, void *user);
+};
+
+int btrfs_read_and_process_send_stream(int fd,
+ struct btrfs_send_ops *ops, void *user,
+ int honor_end_cmd,
+ u64 max_errors);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/send-test.c b/send-test.c
new file mode 100644
index 00000000..4645b898
--- /dev/null
+++ b/send-test.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright (C) 2013 SUSE. All rights reserved.
+ *
+ * This code is adapted from cmds-send.c and cmds-receive.c,
+ * Both of which are:
+ *
+ * Copyright (C) 2012 Alexander Block. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <unistd.h>
+#include <stdint.h>
+#include <dirent.h>
+#include <pthread.h>
+#include <math.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <libgen.h>
+#include <mntent.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <asm/types.h>
+#include <uuid/uuid.h>
+
+/*
+ * This should be compilable without the rest of the btrfs-progs
+ * source distribution.
+ */
+#if BTRFS_FLAT_INCLUDES
+#include "send-utils.h"
+#include "send-stream.h"
+#else
+#include <btrfs/send-utils.h>
+#include <btrfs/send-stream.h>
+#endif /* BTRFS_FLAT_INCLUDES */
+
+static int pipefd[2];
+struct btrfs_ioctl_send_args io_send = {0, };
+static char *subvol_path;
+static char *root_path;
+
+struct recv_args {
+ char *full_subvol_path;
+ char *root_path;
+};
+
+void usage(int error)
+{
+ printf("send-test <btrfs root> <subvol>\n");
+ if (error)
+ exit(error);
+}
+
+static int print_subvol(const char *path, const u8 *uuid, u64 ctransid,
+ void *user)
+{
+ struct recv_args *r = user;
+ char uuid_str[BTRFS_UUID_UNPARSED_SIZE];
+
+ r->full_subvol_path = path_cat(r->root_path, path);
+ uuid_unparse(uuid, uuid_str);
+
+ printf("subvol\t%s\t%llu\t%s\n", uuid_str,
+ (unsigned long long)ctransid, r->full_subvol_path);
+
+ return 0;
+}
+
+static int print_snapshot(const char *path, const u8 *uuid, u64 ctransid,
+ const u8 *parent_uuid, u64 parent_ctransid,
+ void *user)
+{
+ struct recv_args *r = user;
+ char uuid_str[BTRFS_UUID_UNPARSED_SIZE];
+ char parent_uuid_str[BTRFS_UUID_UNPARSED_SIZE];
+
+ r->full_subvol_path = path_cat(r->root_path, path);
+ uuid_unparse(uuid, uuid_str);
+ uuid_unparse(parent_uuid, parent_uuid_str);
+
+ printf("snapshot\t%s\t%llu\t%s\t%llu\t%s\n", uuid_str,
+ (unsigned long long)ctransid, parent_uuid_str,
+ (unsigned long long)parent_ctransid, r->full_subvol_path);
+
+ return 0;
+}
+
+static int print_mkfile(const char *path, void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("mkfile\t%s\n", full_path);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_mkdir(const char *path, void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("mkdir\t%s\n", full_path);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_mknod(const char *path, u64 mode, u64 dev, void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("mknod\t%llo\t0x%llx\t%s\n", (unsigned long long)mode,
+ (unsigned long long)dev, full_path);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_mkfifo(const char *path, void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("mkfifo\t%s\n", full_path);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_mksock(const char *path, void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("mksock\t%s\n", full_path);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_symlink(const char *path, const char *lnk, void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("symlink\t%s\t%s\n", lnk, full_path);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_rename(const char *from, const char *to, void *user)
+{
+ struct recv_args *r = user;
+ char *full_from = path_cat(r->full_subvol_path, from);
+ char *full_to = path_cat(r->full_subvol_path, to);
+
+ printf("rename\t%s\t%s\n", from, to);
+
+ free(full_from);
+ free(full_to);
+ return 0;
+}
+
+static int print_link(const char *path, const char *lnk, void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("link\t%s\t%s\n", lnk, full_path);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_unlink(const char *path, void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("unlink\t%s\n", full_path);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_rmdir(const char *path, void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("rmdir\t%s\n", full_path);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_write(const char *path, const void *data, u64 offset,
+ u64 len, void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("write\t%llu\t%llu\t%s\n", (unsigned long long)offset,
+ (unsigned long long)len, full_path);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_clone(const char *path, u64 offset, u64 len,
+ const u8 *clone_uuid, u64 clone_ctransid,
+ const char *clone_path, u64 clone_offset,
+ void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("clone\t%s\t%s\n", full_path, clone_path);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_set_xattr(const char *path, const char *name,
+ const void *data, int len, void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("set_xattr\t%s\t%s\t%d\n", full_path,
+ name, len);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_remove_xattr(const char *path, const char *name, void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("remove_xattr\t%s\t%s\n", full_path, name);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_truncate(const char *path, u64 size, void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("truncate\t%llu\t%s\n", (unsigned long long)size, full_path);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_chmod(const char *path, u64 mode, void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("chmod\t%llo\t%s\n", (unsigned long long)mode, full_path);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_chown(const char *path, u64 uid, u64 gid, void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("chown\t%llu\t%llu\t%s\n", (unsigned long long)uid,
+ (unsigned long long)gid, full_path);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_utimes(const char *path, struct timespec *at,
+ struct timespec *mt, struct timespec *ct,
+ void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("utimes\t%s\n", full_path);
+
+ free(full_path);
+ return 0;
+}
+
+static int print_update_extent(const char *path, u64 offset, u64 len,
+ void *user)
+{
+ struct recv_args *r = user;
+ char *full_path = path_cat(r->full_subvol_path, path);
+
+ printf("update_extent\t%s\t%llu\t%llu\n", full_path, offset, len);
+
+ free(full_path);
+ return 0;
+}
+
+static struct btrfs_send_ops send_ops_print = {
+ .subvol = print_subvol,
+ .snapshot = print_snapshot,
+ .mkfile = print_mkfile,
+ .mkdir = print_mkdir,
+ .mknod = print_mknod,
+ .mkfifo = print_mkfifo,
+ .mksock = print_mksock,
+ .symlink = print_symlink,
+ .rename = print_rename,
+ .link = print_link,
+ .unlink = print_unlink,
+ .rmdir = print_rmdir,
+ .write = print_write,
+ .clone = print_clone,
+ .set_xattr = print_set_xattr,
+ .remove_xattr = print_remove_xattr,
+ .truncate = print_truncate,
+ .chmod = print_chmod,
+ .chown = print_chown,
+ .utimes = print_utimes,
+ .update_extent = print_update_extent,
+};
+
+static void *process_thread(void *arg_)
+{
+ int ret;
+
+ while (1) {
+ ret = btrfs_read_and_process_send_stream(pipefd[0],
+ &send_ops_print, arg_, 0);
+ if (ret)
+ break;
+ }
+
+ if (ret > 0)
+ ret = 0;
+
+ return ERR_PTR(ret);
+}
+
+int main(int argc, char **argv)
+{
+ int ret = 0;
+ int subvol_fd;
+ pthread_t t_read;
+ void *t_err = NULL;
+ struct recv_args r;
+
+ if (argc != 3)
+ usage(EINVAL);
+
+ root_path = realpath(argv[1], NULL);
+ if (!root_path) {
+ ret = errno;
+ usage(ret);
+ }
+
+ subvol_path = realpath(argv[2], NULL);
+ if (!subvol_path) {
+ ret = errno;
+ usage(ret);
+ }
+
+ r.full_subvol_path = subvol_path;
+ r.root_path = root_path;
+
+ subvol_fd = open(subvol_path, O_RDONLY|O_NOATIME);
+ if (subvol_fd < 0) {
+ ret = errno;
+ fprintf(stderr, "ERROR: Subvolume open failed. %s\n",
+ strerror(ret));
+ goto out;
+ }
+
+ ret = pipe(pipefd);
+ if (ret < 0) {
+ ret = errno;
+ fprintf(stderr, "ERROR: pipe failed. %s\n", strerror(ret));
+ goto out;
+ }
+
+ ret = pthread_create(&t_read, NULL, process_thread, &r);
+ if (ret < 0) {
+ ret = errno;
+ fprintf(stderr, "ERROR: pthread create failed. %s\n",
+ strerror(ret));
+ goto out;
+ }
+
+ io_send.send_fd = pipefd[1];
+ io_send.clone_sources_count = 0;
+ io_send.clone_sources = NULL;
+ io_send.parent_root = 0;
+ io_send.flags = BTRFS_SEND_FLAG_NO_FILE_DATA;
+
+ ret = ioctl(subvol_fd, BTRFS_IOC_SEND, &io_send);
+ if (ret < 0) {
+ ret = errno;
+ fprintf(stderr, "ERROR: send ioctl failed with %d: %s\n", ret,
+ strerror(ret));
+ goto out;
+ }
+
+ close(pipefd[1]);
+
+ ret = pthread_join(t_read, &t_err);
+ if (ret) {
+ fprintf(stderr, "ERROR: pthread_join failed: %s\n",
+ strerror(ret));
+ goto out;
+ }
+ if (t_err) {
+ ret = (long int)t_err;
+ fprintf(stderr, "ERROR: failed to process send stream, ret=%ld "
+ "(%s)\n", (long int)t_err, strerror(ret));
+ goto out;
+ }
+
+out:
+ return !!ret;
+}
diff --git a/send-utils.c b/send-utils.c
new file mode 100644
index 00000000..3c369b86
--- /dev/null
+++ b/send-utils.c
@@ -0,0 +1,771 @@
+/*
+ * Copyright (C) 2012 Alexander Block. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <uuid/uuid.h>
+#include <limits.h>
+#include <errno.h>
+
+#include "ctree.h"
+#include "send-utils.h"
+#include "ioctl.h"
+#include "btrfs-list.h"
+
+static int btrfs_subvolid_resolve_sub(int fd, char *path, size_t *path_len,
+ u64 subvol_id);
+
+static int btrfs_get_root_id_by_sub_path(int mnt_fd, const char *sub_path,
+ u64 *root_id)
+{
+ int ret;
+ int subvol_fd;
+
+ subvol_fd = openat(mnt_fd, sub_path, O_RDONLY);
+ if (subvol_fd < 0) {
+ ret = -errno;
+ fprintf(stderr, "ERROR: open %s failed. %s\n", sub_path,
+ strerror(-ret));
+ return ret;
+ }
+
+ ret = btrfs_list_get_path_rootid(subvol_fd, root_id);
+ close(subvol_fd);
+ return ret;
+}
+
+static int btrfs_read_root_item_raw(int mnt_fd, u64 root_id, size_t buf_len,
+ u32 *read_len, void *buf)
+{
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ struct btrfs_ioctl_search_header *sh;
+ unsigned long off = 0;
+ int found = 0;
+ int i;
+
+ *read_len = 0;
+ memset(&args, 0, sizeof(args));
+
+ sk->tree_id = BTRFS_ROOT_TREE_OBJECTID;
+
+ /*
+ * there may be more than one ROOT_ITEM key if there are
+ * snapshots pending deletion, we have to loop through
+ * them.
+ */
+ sk->min_objectid = root_id;
+ sk->max_objectid = root_id;
+ sk->max_type = BTRFS_ROOT_ITEM_KEY;
+ sk->min_type = BTRFS_ROOT_ITEM_KEY;
+ sk->max_offset = (u64)-1;
+ sk->max_transid = (u64)-1;
+ sk->nr_items = 4096;
+
+ while (1) {
+ ret = ioctl(mnt_fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: can't perform the search - %s\n",
+ strerror(errno));
+ return 0;
+ }
+ /* the ioctl returns the number of item it found in nr_items */
+ if (sk->nr_items == 0)
+ break;
+
+ off = 0;
+ for (i = 0; i < sk->nr_items; i++) {
+ struct btrfs_root_item *item;
+ sh = (struct btrfs_ioctl_search_header *)(args.buf +
+ off);
+
+ off += sizeof(*sh);
+ item = (struct btrfs_root_item *)(args.buf + off);
+ off += sh->len;
+
+ sk->min_objectid = sh->objectid;
+ sk->min_type = sh->type;
+ sk->min_offset = sh->offset;
+
+ if (sh->objectid > root_id)
+ break;
+
+ if (sh->objectid == root_id &&
+ sh->type == BTRFS_ROOT_ITEM_KEY) {
+ if (sh->len > buf_len) {
+ /* btrfs-progs is too old for kernel */
+ fprintf(stderr,
+ "ERROR: buf for read_root_item_raw() is too small, get newer btrfs tools!\n");
+ return -EOVERFLOW;
+ }
+ memcpy(buf, item, sh->len);
+ *read_len = sh->len;
+ found = 1;
+ }
+ }
+ if (sk->min_offset < (u64)-1)
+ sk->min_offset++;
+ else
+ break;
+
+ if (sk->min_type != BTRFS_ROOT_ITEM_KEY ||
+ sk->min_objectid != root_id)
+ break;
+ }
+
+ return found ? 0 : -ENOENT;
+}
+
+/*
+ * Read a root item from the tree. In case we detect a root item smaller then
+ * sizeof(root_item), we know it's an old version of the root structure and
+ * initialize all new fields to zero. The same happens if we detect mismatching
+ * generation numbers as then we know the root was once mounted with an older
+ * kernel that was not aware of the root item structure change.
+ */
+static int btrfs_read_root_item(int mnt_fd, u64 root_id,
+ struct btrfs_root_item *item)
+{
+ int ret;
+ u32 read_len;
+
+ ret = btrfs_read_root_item_raw(mnt_fd, root_id, sizeof(*item),
+ &read_len, item);
+ if (ret)
+ return ret;
+
+ if (read_len < sizeof(*item) ||
+ btrfs_root_generation(item) != btrfs_root_generation_v2(item))
+ memset(&item->generation_v2, 0,
+ sizeof(*item) - offsetof(struct btrfs_root_item,
+ generation_v2));
+
+ return 0;
+}
+
+#ifdef BTRFS_COMPAT_SEND_NO_UUID_TREE
+static struct rb_node *tree_insert(struct rb_root *root,
+ struct subvol_info *si,
+ enum subvol_search_type type)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct subvol_info *entry;
+ __s64 comp;
+
+ while (*p) {
+ parent = *p;
+ if (type == subvol_search_by_received_uuid) {
+ entry = rb_entry(parent, struct subvol_info,
+ rb_received_node);
+
+ comp = memcmp(entry->received_uuid, si->received_uuid,
+ BTRFS_UUID_SIZE);
+ if (!comp) {
+ if (entry->stransid < si->stransid)
+ comp = -1;
+ else if (entry->stransid > si->stransid)
+ comp = 1;
+ else
+ comp = 0;
+ }
+ } else if (type == subvol_search_by_uuid) {
+ entry = rb_entry(parent, struct subvol_info,
+ rb_local_node);
+ comp = memcmp(entry->uuid, si->uuid, BTRFS_UUID_SIZE);
+ } else if (type == subvol_search_by_root_id) {
+ entry = rb_entry(parent, struct subvol_info,
+ rb_root_id_node);
+ comp = entry->root_id - si->root_id;
+ } else if (type == subvol_search_by_path) {
+ entry = rb_entry(parent, struct subvol_info,
+ rb_path_node);
+ comp = strcmp(entry->path, si->path);
+ } else {
+ BUG();
+ }
+
+ if (comp < 0)
+ p = &(*p)->rb_left;
+ else if (comp > 0)
+ p = &(*p)->rb_right;
+ else
+ return parent;
+ }
+
+ if (type == subvol_search_by_received_uuid) {
+ rb_link_node(&si->rb_received_node, parent, p);
+ rb_insert_color(&si->rb_received_node, root);
+ } else if (type == subvol_search_by_uuid) {
+ rb_link_node(&si->rb_local_node, parent, p);
+ rb_insert_color(&si->rb_local_node, root);
+ } else if (type == subvol_search_by_root_id) {
+ rb_link_node(&si->rb_root_id_node, parent, p);
+ rb_insert_color(&si->rb_root_id_node, root);
+ } else if (type == subvol_search_by_path) {
+ rb_link_node(&si->rb_path_node, parent, p);
+ rb_insert_color(&si->rb_path_node, root);
+ }
+ return NULL;
+}
+#endif
+
+int btrfs_subvolid_resolve(int fd, char *path, size_t path_len, u64 subvol_id)
+{
+ if (path_len < 1)
+ return -EOVERFLOW;
+ path[0] = '\0';
+ path_len--;
+ path[path_len] = '\0';
+ return btrfs_subvolid_resolve_sub(fd, path, &path_len, subvol_id);
+}
+
+static int btrfs_subvolid_resolve_sub(int fd, char *path, size_t *path_len,
+ u64 subvol_id)
+{
+ int ret;
+ struct btrfs_ioctl_search_args search_arg;
+ struct btrfs_ioctl_ino_lookup_args ino_lookup_arg;
+ struct btrfs_ioctl_search_header *search_header;
+ struct btrfs_root_ref *backref_item;
+
+ if (subvol_id == BTRFS_FS_TREE_OBJECTID) {
+ if (*path_len < 1)
+ return -EOVERFLOW;
+ *path = '\0';
+ (*path_len)--;
+ return 0;
+ }
+
+ memset(&search_arg, 0, sizeof(search_arg));
+ search_arg.key.tree_id = BTRFS_ROOT_TREE_OBJECTID;
+ search_arg.key.min_objectid = subvol_id;
+ search_arg.key.max_objectid = subvol_id;
+ search_arg.key.min_type = BTRFS_ROOT_BACKREF_KEY;
+ search_arg.key.max_type = BTRFS_ROOT_BACKREF_KEY;
+ search_arg.key.max_offset = (u64)-1;
+ search_arg.key.max_transid = (u64)-1;
+ search_arg.key.nr_items = 1;
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &search_arg);
+ if (ret < 0) {
+ fprintf(stderr,
+ "ioctl(BTRFS_IOC_TREE_SEARCH, subvol_id %llu) ret=%d, error: %s\n",
+ (unsigned long long)subvol_id, ret, strerror(errno));
+ return ret;
+ }
+
+ if (search_arg.key.nr_items < 1) {
+ fprintf(stderr,
+ "failed to lookup subvol_id %llu!\n",
+ (unsigned long long)subvol_id);
+ return -ENOENT;
+ }
+ search_header = (struct btrfs_ioctl_search_header *)search_arg.buf;
+ backref_item = (struct btrfs_root_ref *)(search_header + 1);
+ if (search_header->offset != BTRFS_FS_TREE_OBJECTID) {
+ int sub_ret;
+
+ sub_ret = btrfs_subvolid_resolve_sub(fd, path, path_len,
+ search_header->offset);
+ if (sub_ret)
+ return sub_ret;
+ if (*path_len < 1)
+ return -EOVERFLOW;
+ strcat(path, "/");
+ (*path_len)--;
+ }
+
+ if (btrfs_stack_root_ref_dirid(backref_item) !=
+ BTRFS_FIRST_FREE_OBJECTID) {
+ int len;
+
+ memset(&ino_lookup_arg, 0, sizeof(ino_lookup_arg));
+ ino_lookup_arg.treeid = search_header->offset;
+ ino_lookup_arg.objectid =
+ btrfs_stack_root_ref_dirid(backref_item);
+ ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_lookup_arg);
+ if (ret < 0) {
+ fprintf(stderr,
+ "ioctl(BTRFS_IOC_INO_LOOKUP) ret=%d, error: %s\n",
+ ret, strerror(errno));
+ return ret;
+ }
+
+ len = strlen(ino_lookup_arg.name);
+ if (*path_len < len)
+ return -EOVERFLOW;
+ strcat(path, ino_lookup_arg.name);
+ (*path_len) -= len;
+ }
+
+ if (*path_len < btrfs_stack_root_ref_name_len(backref_item))
+ return -EOVERFLOW;
+ strncat(path, (char *)(backref_item + 1),
+ btrfs_stack_root_ref_name_len(backref_item));
+ (*path_len) -= btrfs_stack_root_ref_name_len(backref_item);
+ return 0;
+}
+
+#ifdef BTRFS_COMPAT_SEND_NO_UUID_TREE
+static int count_bytes(void *buf, int len, char b)
+{
+ int cnt = 0;
+ int i;
+
+ for (i = 0; i < len; i++) {
+ if (((char *)buf)[i] == b)
+ cnt++;
+ }
+ return cnt;
+}
+
+void subvol_uuid_search_add(struct subvol_uuid_search *s,
+ struct subvol_info *si)
+{
+ int cnt;
+
+ tree_insert(&s->root_id_subvols, si, subvol_search_by_root_id);
+ tree_insert(&s->path_subvols, si, subvol_search_by_path);
+
+ cnt = count_bytes(si->uuid, BTRFS_UUID_SIZE, 0);
+ if (cnt != BTRFS_UUID_SIZE)
+ tree_insert(&s->local_subvols, si, subvol_search_by_uuid);
+ cnt = count_bytes(si->received_uuid, BTRFS_UUID_SIZE, 0);
+ if (cnt != BTRFS_UUID_SIZE)
+ tree_insert(&s->received_subvols, si,
+ subvol_search_by_received_uuid);
+}
+
+static struct subvol_info *tree_search(struct rb_root *root,
+ u64 root_id, const u8 *uuid,
+ u64 stransid, const char *path,
+ enum subvol_search_type type)
+{
+ struct rb_node *n = root->rb_node;
+ struct subvol_info *entry;
+ __s64 comp;
+
+ while (n) {
+ if (type == subvol_search_by_received_uuid) {
+ entry = rb_entry(n, struct subvol_info,
+ rb_received_node);
+ comp = memcmp(entry->received_uuid, uuid,
+ BTRFS_UUID_SIZE);
+ if (!comp) {
+ if (entry->stransid < stransid)
+ comp = -1;
+ else if (entry->stransid > stransid)
+ comp = 1;
+ else
+ comp = 0;
+ }
+ } else if (type == subvol_search_by_uuid) {
+ entry = rb_entry(n, struct subvol_info, rb_local_node);
+ comp = memcmp(entry->uuid, uuid, BTRFS_UUID_SIZE);
+ } else if (type == subvol_search_by_root_id) {
+ entry = rb_entry(n, struct subvol_info,
+ rb_root_id_node);
+ comp = entry->root_id - root_id;
+ } else if (type == subvol_search_by_path) {
+ entry = rb_entry(n, struct subvol_info, rb_path_node);
+ comp = strcmp(entry->path, path);
+ } else {
+ BUG();
+ }
+ if (comp < 0)
+ n = n->rb_left;
+ else if (comp > 0)
+ n = n->rb_right;
+ else
+ return entry;
+ }
+ return NULL;
+}
+
+/*
+ * this function will be only called if kernel dosen't support uuid tree.
+ */
+static struct subvol_info *subvol_uuid_search_old(struct subvol_uuid_search *s,
+ u64 root_id, const u8 *uuid, u64 transid,
+ const char *path,
+ enum subvol_search_type type)
+{
+ struct rb_root *root;
+ if (type == subvol_search_by_received_uuid)
+ root = &s->received_subvols;
+ else if (type == subvol_search_by_uuid)
+ root = &s->local_subvols;
+ else if (type == subvol_search_by_root_id)
+ root = &s->root_id_subvols;
+ else if (type == subvol_search_by_path)
+ root = &s->path_subvols;
+ else
+ return NULL;
+ return tree_search(root, root_id, uuid, transid, path, type);
+}
+#else
+void subvol_uuid_search_add(struct subvol_uuid_search *s,
+ struct subvol_info *si)
+{
+ if (si) {
+ free(si->path);
+ free(si);
+ }
+}
+#endif
+
+struct subvol_info *subvol_uuid_search(struct subvol_uuid_search *s,
+ u64 root_id, const u8 *uuid, u64 transid,
+ const char *path,
+ enum subvol_search_type type)
+{
+ int ret = 0;
+ struct btrfs_root_item root_item;
+ struct subvol_info *info = NULL;
+
+#ifdef BTRFS_COMPAT_SEND_NO_UUID_TREE
+ if (!s->uuid_tree_existed)
+ return subvol_uuid_search_old(s, root_id, uuid, transid,
+ path, type);
+#endif
+ switch (type) {
+ case subvol_search_by_received_uuid:
+ ret = btrfs_lookup_uuid_received_subvol_item(s->mnt_fd, uuid,
+ &root_id);
+ break;
+ case subvol_search_by_uuid:
+ ret = btrfs_lookup_uuid_subvol_item(s->mnt_fd, uuid, &root_id);
+ break;
+ case subvol_search_by_root_id:
+ break;
+ case subvol_search_by_path:
+ ret = btrfs_get_root_id_by_sub_path(s->mnt_fd, path, &root_id);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret)
+ goto out;
+
+ ret = btrfs_read_root_item(s->mnt_fd, root_id, &root_item);
+ if (ret)
+ goto out;
+
+ info = calloc(1, sizeof(*info));
+ info->root_id = root_id;
+ memcpy(info->uuid, root_item.uuid, BTRFS_UUID_SIZE);
+ memcpy(info->received_uuid, root_item.received_uuid, BTRFS_UUID_SIZE);
+ memcpy(info->parent_uuid, root_item.parent_uuid, BTRFS_UUID_SIZE);
+ info->ctransid = btrfs_root_ctransid(&root_item);
+ info->otransid = btrfs_root_otransid(&root_item);
+ info->stransid = btrfs_root_stransid(&root_item);
+ info->rtransid = btrfs_root_rtransid(&root_item);
+ if (type == subvol_search_by_path) {
+ info->path = strdup(path);
+ } else {
+ info->path = malloc(PATH_MAX);
+ ret = btrfs_subvolid_resolve(s->mnt_fd, info->path,
+ PATH_MAX, root_id);
+ }
+
+out:
+ if (ret && info) {
+ free(info->path);
+ free(info);
+ info = NULL;
+ }
+
+ return info;
+}
+
+#ifdef BTRFS_COMPAT_SEND_NO_UUID_TREE
+static int is_uuid_tree_supported(int fd)
+{
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+
+ memset(&args, 0, sizeof(args));
+
+ sk->tree_id = BTRFS_ROOT_TREE_OBJECTID;
+
+ sk->min_objectid = BTRFS_UUID_TREE_OBJECTID;
+ sk->max_objectid = BTRFS_UUID_TREE_OBJECTID;
+ sk->max_type = BTRFS_ROOT_ITEM_KEY;
+ sk->min_type = BTRFS_ROOT_ITEM_KEY;
+ sk->max_offset = (u64)-1;
+ sk->max_transid = (u64)-1;
+ sk->nr_items = 1;
+
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0)
+ return ret;
+
+ /* the ioctl returns the number of item it found in nr_items */
+ if (sk->nr_items == 0)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * this function is mainly used to read all root items
+ * it will be only used when we use older kernel which uuid
+ * tree is not supported yet
+ */
+int subvol_uuid_search_init(int mnt_fd, struct subvol_uuid_search *s)
+{
+ int ret;
+ struct btrfs_ioctl_search_args args;
+ struct btrfs_ioctl_search_key *sk = &args.key;
+ struct btrfs_ioctl_search_header *sh;
+ struct btrfs_root_item *root_item_ptr;
+ struct btrfs_root_item root_item = {};
+ struct subvol_info *si = NULL;
+ int root_item_valid = 0;
+ unsigned long off = 0;
+ int i;
+ char *path;
+
+ s->mnt_fd = mnt_fd;
+
+ s->root_id_subvols = RB_ROOT;
+ s->local_subvols = RB_ROOT;
+ s->received_subvols = RB_ROOT;
+ s->path_subvols = RB_ROOT;
+
+ ret = is_uuid_tree_supported(mnt_fd);
+ if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: check if we support uuid tree fails - %s\n",
+ strerror(errno));
+ return ret;
+ } else if (ret) {
+ /* uuid tree is supported */
+ s->uuid_tree_existed = 1;
+ return 0;
+ }
+ memset(&args, 0, sizeof(args));
+
+ sk->tree_id = BTRFS_ROOT_TREE_OBJECTID;
+
+ sk->max_objectid = (u64)-1;
+ sk->max_offset = (u64)-1;
+ sk->max_transid = (u64)-1;
+ sk->min_type = BTRFS_ROOT_ITEM_KEY;
+ sk->max_type = BTRFS_ROOT_BACKREF_KEY;
+ sk->nr_items = 4096;
+
+ while (1) {
+ ret = ioctl(mnt_fd, BTRFS_IOC_TREE_SEARCH, &args);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: can't perform the search - %s\n",
+ strerror(errno));
+ return ret;
+ }
+ if (sk->nr_items == 0)
+ break;
+
+ off = 0;
+
+ for (i = 0; i < sk->nr_items; i++) {
+ sh = (struct btrfs_ioctl_search_header *)(args.buf +
+ off);
+ off += sizeof(*sh);
+
+ if ((sh->objectid != 5 &&
+ sh->objectid < BTRFS_FIRST_FREE_OBJECTID) ||
+ sh->objectid > BTRFS_LAST_FREE_OBJECTID)
+ goto skip;
+
+ if (sh->type == BTRFS_ROOT_ITEM_KEY) {
+ /* older kernels don't have uuids+times */
+ if (sh->len < sizeof(root_item)) {
+ root_item_valid = 0;
+ goto skip;
+ }
+ root_item_ptr = (struct btrfs_root_item *)
+ (args.buf + off);
+ memcpy(&root_item, root_item_ptr,
+ sizeof(root_item));
+ root_item_valid = 1;
+ } else if (sh->type == BTRFS_ROOT_BACKREF_KEY ||
+ root_item_valid) {
+ if (!root_item_valid)
+ goto skip;
+
+ path = btrfs_list_path_for_root(mnt_fd,
+ sh->objectid);
+ if (!path)
+ path = strdup("");
+ if (IS_ERR(path)) {
+ ret = PTR_ERR(path);
+ fprintf(stderr, "ERROR: unable to "
+ "resolve path "
+ "for root %llu\n",
+ sh->objectid);
+ goto out;
+ }
+
+ si = calloc(1, sizeof(*si));
+ si->root_id = sh->objectid;
+ memcpy(si->uuid, root_item.uuid,
+ BTRFS_UUID_SIZE);
+ memcpy(si->parent_uuid, root_item.parent_uuid,
+ BTRFS_UUID_SIZE);
+ memcpy(si->received_uuid,
+ root_item.received_uuid,
+ BTRFS_UUID_SIZE);
+ si->ctransid = btrfs_root_ctransid(&root_item);
+ si->otransid = btrfs_root_otransid(&root_item);
+ si->stransid = btrfs_root_stransid(&root_item);
+ si->rtransid = btrfs_root_rtransid(&root_item);
+ si->path = path;
+ subvol_uuid_search_add(s, si);
+ root_item_valid = 0;
+ } else {
+ goto skip;
+ }
+
+skip:
+ off += sh->len;
+
+ /*
+ * record the mins in sk so we can make sure the
+ * next search doesn't repeat this root
+ */
+ sk->min_objectid = sh->objectid;
+ sk->min_offset = sh->offset;
+ sk->min_type = sh->type;
+ }
+ sk->nr_items = 4096;
+ if (sk->min_offset < (u64)-1)
+ sk->min_offset++;
+ else if (sk->min_objectid < (u64)-1) {
+ sk->min_objectid++;
+ sk->min_offset = 0;
+ sk->min_type = 0;
+ } else
+ break;
+ }
+
+out:
+ return ret;
+}
+
+void subvol_uuid_search_finit(struct subvol_uuid_search *s)
+{
+ struct rb_root *root = &s->root_id_subvols;
+ struct rb_node *node;
+
+ if (!s->uuid_tree_existed)
+ return;
+
+ while ((node = rb_first(root))) {
+ struct subvol_info *entry =
+ rb_entry(node, struct subvol_info, rb_root_id_node);
+
+ free(entry->path);
+ rb_erase(node, root);
+ free(entry);
+ }
+
+ s->root_id_subvols = RB_ROOT;
+ s->local_subvols = RB_ROOT;
+ s->received_subvols = RB_ROOT;
+ s->path_subvols = RB_ROOT;
+}
+#else
+int subvol_uuid_search_init(int mnt_fd, struct subvol_uuid_search *s)
+{
+ s->mnt_fd = mnt_fd;
+
+ return 0;
+}
+
+void subvol_uuid_search_finit(struct subvol_uuid_search *s)
+{
+}
+#endif
+
+int path_cat_out(char *out, const char *p1, const char *p2)
+{
+ int p1_len = strlen(p1);
+ int p2_len = strlen(p2);
+
+ if (p1_len + p2_len + 2 >= PATH_MAX)
+ return -ENAMETOOLONG;
+
+ if (p1_len && p1[p1_len - 1] == '/')
+ p1_len--;
+ if (p2_len && p2[p2_len - 1] == '/')
+ p2_len--;
+ sprintf(out, "%.*s/%.*s", p1_len, p1, p2_len, p2);
+
+ return 0;
+}
+
+__attribute__((deprecated))
+char *path_cat(const char *p1, const char *p2)
+{
+ int p1_len = strlen(p1);
+ int p2_len = strlen(p2);
+ char *new = malloc(p1_len + p2_len + 2);
+
+ path_cat_out(new, p1, p2);
+
+ return new;
+}
+
+int path_cat3_out(char *out, const char *p1, const char *p2, const char *p3)
+{
+ int p1_len = strlen(p1);
+ int p2_len = strlen(p2);
+ int p3_len = strlen(p3);
+
+ if (p1_len + p2_len + p3_len + 3 >= PATH_MAX)
+ return -ENAMETOOLONG;
+
+ if (p1_len && p1[p1_len - 1] == '/')
+ p1_len--;
+ if (p2_len && p2[p2_len - 1] == '/')
+ p2_len--;
+ if (p3_len && p3[p3_len - 1] == '/')
+ p3_len--;
+ sprintf(out, "%.*s/%.*s/%.*s", p1_len, p1, p2_len, p2, p3_len, p3);
+
+ return 0;
+}
+
+__attribute__((deprecated))
+char *path_cat3(const char *p1, const char *p2, const char *p3)
+{
+ int p1_len = strlen(p1);
+ int p2_len = strlen(p2);
+ int p3_len = strlen(p3);
+ char *new = malloc(p1_len + p2_len + p3_len + 3);
+
+ path_cat3_out(new, p1, p2, p3);
+
+ return new;
+}
diff --git a/send-utils.h b/send-utils.h
new file mode 100644
index 00000000..677a1db2
--- /dev/null
+++ b/send-utils.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2012 Alexander Block. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_SEND_UTILS_H__
+#define __BTRFS_SEND_UTILS_H__
+
+#if BTRFS_FLAT_INCLUDES
+#include "kerncompat.h"
+#include "ctree.h"
+#include "rbtree.h"
+#else
+#include <btrfs/kerncompat.h>
+#include <btrfs/ctree.h>
+#include <btrfs/rbtree.h>
+#endif /* BTRFS_FLAT_INCLUDES */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Compatibility code for kernels < 3.12; the UUID tree is not available there
+ * and we have to do the slow search. This should be deprecated someday.
+ */
+#define BTRFS_COMPAT_SEND_NO_UUID_TREE 1
+
+enum subvol_search_type {
+ subvol_search_by_root_id,
+ subvol_search_by_uuid,
+ subvol_search_by_received_uuid,
+ subvol_search_by_path,
+};
+
+struct subvol_info {
+#ifdef BTRFS_COMPAT_SEND_NO_UUID_TREE
+ struct rb_node rb_root_id_node;
+ struct rb_node rb_local_node;
+ struct rb_node rb_received_node;
+ struct rb_node rb_path_node;
+#endif
+
+ u64 root_id;
+ u8 uuid[BTRFS_UUID_SIZE];
+ u8 parent_uuid[BTRFS_UUID_SIZE];
+ u8 received_uuid[BTRFS_UUID_SIZE];
+ u64 ctransid;
+ u64 otransid;
+ u64 stransid;
+ u64 rtransid;
+
+ char *path;
+};
+
+struct subvol_uuid_search {
+ int mnt_fd;
+#ifdef BTRFS_COMPAT_SEND_NO_UUID_TREE
+ int uuid_tree_existed;
+
+ struct rb_root root_id_subvols;
+ struct rb_root local_subvols;
+ struct rb_root received_subvols;
+ struct rb_root path_subvols;
+#endif
+};
+
+int subvol_uuid_search_init(int mnt_fd, struct subvol_uuid_search *s);
+void subvol_uuid_search_finit(struct subvol_uuid_search *s);
+struct subvol_info *subvol_uuid_search(struct subvol_uuid_search *s,
+ u64 root_id, const u8 *uuid, u64 transid,
+ const char *path,
+ enum subvol_search_type type);
+void subvol_uuid_search_add(struct subvol_uuid_search *s,
+ struct subvol_info *si);
+
+int btrfs_subvolid_resolve(int fd, char *path, size_t path_len, u64 subvol_id);
+
+/*
+ * DEPRECATED: the functions path_cat and path_cat3 are unsafe and should not
+ * be used, use the _out variants and always check the return code.
+ */
+__attribute__((deprecated))
+char *path_cat(const char *p1, const char *p2);
+__attribute__((deprecated))
+char *path_cat3(const char *p1, const char *p2, const char *p3);
+
+int path_cat_out(char *out, const char *p1, const char *p2);
+int path_cat3_out(char *out, const char *p1, const char *p2, const char *p3);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/send.h b/send.h
new file mode 100644
index 00000000..ae9aa029
--- /dev/null
+++ b/send.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2012 Alexander Block. All rights reserved.
+ * Copyright (C) 2012 STRATO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_SEND_H__
+#define __BTRFS_SEND_H__
+
+#include "ctree.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BTRFS_SEND_STREAM_MAGIC "btrfs-stream"
+#define BTRFS_SEND_STREAM_VERSION 1
+
+#define BTRFS_SEND_BUF_SIZE (1024 * 64)
+#define BTRFS_SEND_READ_SIZE (1024 * 48)
+
+enum btrfs_tlv_type {
+ BTRFS_TLV_U8,
+ BTRFS_TLV_U16,
+ BTRFS_TLV_U32,
+ BTRFS_TLV_U64,
+ BTRFS_TLV_BINARY,
+ BTRFS_TLV_STRING,
+ BTRFS_TLV_UUID,
+ BTRFS_TLV_TIMESPEC,
+};
+
+struct btrfs_stream_header {
+ char magic[sizeof(BTRFS_SEND_STREAM_MAGIC)];
+ __le32 version;
+} __attribute__ ((__packed__));
+
+struct btrfs_cmd_header {
+ /* len excluding the header */
+ __le32 len;
+ __le16 cmd;
+ /* crc including the header with zero crc field */
+ __le32 crc;
+} __attribute__ ((__packed__));
+
+struct btrfs_tlv_header {
+ __le16 tlv_type;
+ /* len excluding the header */
+ __le16 tlv_len;
+} __attribute__ ((__packed__));
+
+/* commands */
+enum btrfs_send_cmd {
+ BTRFS_SEND_C_UNSPEC,
+
+ BTRFS_SEND_C_SUBVOL,
+ BTRFS_SEND_C_SNAPSHOT,
+
+ BTRFS_SEND_C_MKFILE,
+ BTRFS_SEND_C_MKDIR,
+ BTRFS_SEND_C_MKNOD,
+ BTRFS_SEND_C_MKFIFO,
+ BTRFS_SEND_C_MKSOCK,
+ BTRFS_SEND_C_SYMLINK,
+
+ BTRFS_SEND_C_RENAME,
+ BTRFS_SEND_C_LINK,
+ BTRFS_SEND_C_UNLINK,
+ BTRFS_SEND_C_RMDIR,
+
+ BTRFS_SEND_C_SET_XATTR,
+ BTRFS_SEND_C_REMOVE_XATTR,
+
+ BTRFS_SEND_C_WRITE,
+ BTRFS_SEND_C_CLONE,
+
+ BTRFS_SEND_C_TRUNCATE,
+ BTRFS_SEND_C_CHMOD,
+ BTRFS_SEND_C_CHOWN,
+ BTRFS_SEND_C_UTIMES,
+
+ BTRFS_SEND_C_END,
+ BTRFS_SEND_C_UPDATE_EXTENT,
+ __BTRFS_SEND_C_MAX,
+};
+#define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1)
+
+/* attributes in send stream */
+enum {
+ BTRFS_SEND_A_UNSPEC,
+
+ BTRFS_SEND_A_UUID,
+ BTRFS_SEND_A_CTRANSID,
+
+ BTRFS_SEND_A_INO,
+ BTRFS_SEND_A_SIZE,
+ BTRFS_SEND_A_MODE,
+ BTRFS_SEND_A_UID,
+ BTRFS_SEND_A_GID,
+ BTRFS_SEND_A_RDEV,
+ BTRFS_SEND_A_CTIME,
+ BTRFS_SEND_A_MTIME,
+ BTRFS_SEND_A_ATIME,
+ BTRFS_SEND_A_OTIME,
+
+ BTRFS_SEND_A_XATTR_NAME,
+ BTRFS_SEND_A_XATTR_DATA,
+
+ BTRFS_SEND_A_PATH,
+ BTRFS_SEND_A_PATH_TO,
+ BTRFS_SEND_A_PATH_LINK,
+
+ BTRFS_SEND_A_FILE_OFFSET,
+ BTRFS_SEND_A_DATA,
+
+ BTRFS_SEND_A_CLONE_UUID,
+ BTRFS_SEND_A_CLONE_CTRANSID,
+ BTRFS_SEND_A_CLONE_PATH,
+ BTRFS_SEND_A_CLONE_OFFSET,
+ BTRFS_SEND_A_CLONE_LEN,
+
+ __BTRFS_SEND_A_MAX,
+};
+#define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
+
+#ifdef __KERNEL__
+long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/show-blocks b/show-blocks
new file mode 100755
index 00000000..0164be96
--- /dev/null
+++ b/show-blocks
@@ -0,0 +1,325 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2007 Oracle. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public
+# License v2 as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this program; if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 021110-1307, USA.
+#
+import sys, os, signal, time, commands, tempfile, random
+
+# numpy seems to override random() with something else. Instantiate our
+# own here
+randgen = random.Random()
+randgen.seed(50)
+
+from optparse import OptionParser
+from matplotlib import rcParams
+from matplotlib.font_manager import fontManager, FontProperties
+import numpy
+
+rcParams['numerix'] = 'numpy'
+rcParams['backend'] = 'Agg'
+rcParams['interactive'] = 'False'
+from pylab import *
+
+class AnnoteFinder:
+ """
+ callback for matplotlib to display an annotation when points are clicked on. The
+ point which is closest to the click and within xtol and ytol is identified.
+
+ Register this function like this:
+
+ scatter(xdata, ydata)
+ af = AnnoteFinder(xdata, ydata, annotes)
+ connect('button_press_event', af)
+ """
+
+ def __init__(self, axis=None):
+ if axis is None:
+ self.axis = gca()
+ else:
+ self.axis= axis
+ self.drawnAnnotations = {}
+ self.links = []
+
+ def clear(self):
+ for k in self.drawnAnnotations.keys():
+ self.drawnAnnotations[k].set_visible(False)
+
+ def __call__(self, event):
+ if event.inaxes:
+ if event.button != 1:
+ self.clear()
+ draw()
+ return
+ clickX = event.xdata
+ clickY = event.ydata
+ if (self.axis is None) or (self.axis==event.inaxes):
+ self.drawAnnote(event.inaxes, clickX, clickY)
+
+ def drawAnnote(self, axis, x, y):
+ """
+ Draw the annotation on the plot
+ """
+ if self.drawnAnnotations.has_key((x,y)):
+ markers = self.drawnAnnotations[(x,y)]
+ markers.set_visible(not markers.get_visible())
+ draw()
+ else:
+ t = axis.text(x,y, "(%3.2f, %3.2f)"%(x,y), bbox=dict(facecolor='red',
+ alpha=0.8))
+ self.drawnAnnotations[(x,y)] = t
+ draw()
+
+def loaddata(fh,delimiter=None, converters=None):
+
+ #14413824 8192 extent back ref root 5 gen 10 owner 282 num_refs 1
+ def iter(fh, delimiter, converters):
+ global total_data
+ global total_metadata
+ for i,line in enumerate(fh):
+ line = line.split(' ')
+ start = float(line[0])
+ len = float(line[1])
+ owner = float(line[10])
+ root = float(line[6])
+ if owner <= 255:
+ total_metadata += int(len)
+ else:
+ total_data += int(len)
+ if start < zoommin or (zoommax != 0 and start > zoommax):
+ continue
+ yield start
+ yield len
+ yield owner
+ yield root
+ X = numpy.fromiter(iter(fh, delimiter, converters), dtype=float)
+ return X
+
+def run_debug_tree(device):
+ p = os.popen('btrfs-debug-tree -e ' + device)
+ data = loaddata(p)
+ return data
+
+def shapeit(X):
+ lines = len(X) / 4
+ X.shape = (lines, 4)
+
+def line_picker(line, mouseevent):
+ if mouseevent.xdata is None: return False, dict()
+ print "%d %d\n", mouseevent.xdata, mouseevent.ydata
+ return False, dict()
+
+def xycalc(byte):
+ byte = byte / bytes_per_cell
+ yval = floor(byte / num_cells)
+ xval = byte % num_cells
+ return (xval, yval + 1)
+
+# record the color used for each root the first time we find it
+root_colors = {}
+# there are lots of good colormaps to choose from
+# http://www.scipy.org/Cookbook/Matplotlib/Show_colormaps
+#
+meta_cmap = get_cmap("gist_ncar")
+data_done = False
+
+def plotone(a, xvals, yvals, owner, root, lines, labels):
+ global data_done
+ add_label = False
+
+ if owner:
+ if options.meta_only:
+ return
+ color = "blue"
+ label = "Data"
+ if not data_done:
+ add_label = True
+ data_done = True
+ else:
+ if options.data_only:
+ return
+ if root not in root_colors:
+ color = meta_cmap(randgen.random())
+ label = "Meta %d" % int(root)
+ root_colors[root] = (color, label)
+ add_label = True
+ else:
+ color, label = root_colors[root]
+
+ plotlines = a.plot(xvals, yvals, 's', color=color, mfc=color, mec=color,
+ markersize=.23, label=label)
+ if add_label:
+ lines += plotlines
+ labels.append(label)
+ print "add label %s" % label
+
+def parse_zoom():
+ def parse_num(s):
+ mult = 1
+ c = s.lower()[-1]
+ if c == 't':
+ mult = 1024 * 1024 * 1024 * 1024
+ elif c == 'g':
+ mult = 1024 * 1024 * 1024
+ elif c == 'm':
+ mult = 1024 * 1024
+ elif c == 'k':
+ mult = 1024
+ else:
+ c = None
+ if c:
+ num = int(s[:-1]) * mult
+ else:
+ num = int(s)
+ return num
+
+ if not options.zoom:
+ return (0, 0)
+
+ vals = options.zoom.split(':')
+ if len(vals) != 2:
+ sys.stderr.write("warning: unable to parse zoom %s\n" % options.zoom)
+ return (0, 0)
+ zoommin = parse_num(vals[0])
+ zoommax = parse_num(vals[1])
+ return (zoommin, zoommax)
+
+usage = "usage: %prog [options]"
+parser = OptionParser(usage=usage)
+parser.add_option("-d", "--device", help="Btrfs device", default="")
+parser.add_option("-i", "--input-file", help="debug-tree data", default="")
+parser.add_option("-o", "--output", help="Output file", default="blocks.png")
+parser.add_option("-z", "--zoom", help="Zoom", default=None)
+parser.add_option("", "--data-only", help="Only print data blocks",
+ default=False, action="store_true")
+parser.add_option("", "--meta-only", help="Only print metadata blocks",
+ default=False, action="store_true")
+
+(options,args) = parser.parse_args()
+
+if not options.device and not options.input_file:
+ parser.print_help()
+ sys.exit(1)
+
+zoommin, zoommax = parse_zoom()
+total_data = 0
+total_metadata = 0
+
+if options.device:
+ data = run_debug_tree(options.device)
+elif options.input_file:
+ data = loaddata(file(options.input_file))
+shapeit(data)
+
+# try to drop out the least common data points by creating
+# a historgram of the sectors seen.
+sectors = data[:,0]
+sizes = data[:,1]
+datalen = len(data)
+sectormax = numpy.max(sectors)
+sectormin = 0
+num_cells = 800
+total_cells = num_cells * num_cells
+byte_range = sectormax - sectormin
+bytes_per_cell = byte_range / total_cells
+
+f = figure(figsize=(8,6))
+
+# Throughput goes at the botoom
+a = subplot(1, 1, 1)
+subplots_adjust(right=0.7)
+datai = 0
+xvals = []
+yvals = []
+last_owner = 0
+last_root = 0
+lines = []
+labels = []
+while datai < datalen:
+ row = data[datai]
+ datai += 1
+ byte = row[0]
+ size = row[1]
+ owner = row[2]
+ root = row[3]
+
+ if owner <= 255:
+ owner = 0
+ else:
+ owner = 1
+
+ if len(xvals) and (owner != last_owner or last_root != root):
+ plotone(a, xvals, yvals, last_owner, last_root, lines, labels)
+ xvals = []
+ yvals = []
+ cell = 0
+ while cell < size:
+ xy = xycalc(byte)
+ byte += bytes_per_cell
+ cell += bytes_per_cell
+ if xy:
+ xvals.append(xy[0])
+ yvals.append(xy[1])
+ last_owner = owner
+ last_root = root
+
+if xvals:
+ plotone(a, xvals, yvals, last_owner, last_root, lines, labels)
+
+# make sure the final second goes on the x axes
+ticks = []
+a.set_xticks(ticks)
+ticks = a.get_yticks()
+
+first_tick = ticks[1] * bytes_per_cell * num_cells
+if first_tick > 1024 * 1024 * 1024 * 1024:
+ scale = 1024 * 1024 * 1024 * 1024;
+ scalestr = "TB"
+elif first_tick > 1024 * 1024 * 1024:
+ scale = 1024 * 1024 * 1024;
+ scalestr = "GB"
+elif first_tick > 1024 * 1024:
+ scale = 1024 * 1024;
+ scalestr = "MB"
+elif first_tick > 1024:
+ scale = 1024;
+ scalestr = "KB"
+else:
+ scalestr = "Bytes"
+ scale = 1
+
+ylabels = [ str(int((x * bytes_per_cell * num_cells) / scale)) for x in ticks ]
+a.set_yticklabels(ylabels)
+a.set_ylabel('Disk offset (%s)' % scalestr)
+a.set_xlim(0, num_cells)
+a.set_title('Blocks')
+
+a.legend(lines, labels, loc=(1.05, 0.8), shadow=True, pad=0.1, numpoints=1,
+ handletextsep = 0.005,
+ labelsep = 0.01,
+ markerscale=10,
+ prop=FontProperties(size='x-small') )
+
+if total_data == 0:
+ percent_meta = 100
+else:
+ percent_meta = (float(total_metadata) / float(total_data)) * 100
+
+print "Total metadata bytes %d data %d ratio %.3f" % (total_metadata,
+ total_data, percent_meta)
+print "saving graph to %s" % options.output
+savefig(options.output, orientation='landscape')
+show()
+
diff --git a/string-table.c b/string-table.c
new file mode 100644
index 00000000..5eda1ac5
--- /dev/null
+++ b/string-table.c
@@ -0,0 +1,151 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+#include "string-table.h"
+
+/*
+ * This function create an array of char * which will represent a table
+ */
+struct string_table *table_create(int columns, int rows)
+{
+ struct string_table *tab;
+ int size;
+
+ size = sizeof(struct string_table) + rows * columns * sizeof(char*);
+ tab = calloc(1, size);
+
+ if (!tab)
+ return NULL;
+
+ tab->ncols = columns;
+ tab->nrows = rows;
+
+ return tab;
+}
+
+/*
+ * This function is like a vprintf, but store the results in a cell of
+ * the table.
+ * If fmt starts with '<', the text is left aligned; if fmt starts with
+ * '>' the text is right aligned. If fmt is equal to '=' the text will
+ * be replaced by a '=====' dimensioned on the basis of the column width
+ */
+char *table_vprintf(struct string_table *tab, int column, int row,
+ char *fmt, va_list ap)
+{
+ int idx = tab->ncols * row + column;
+ char *msg = calloc(100, 1);
+
+ if (!msg)
+ return NULL;
+
+ if (tab->cells[idx])
+ free(tab->cells[idx]);
+ tab->cells[idx] = msg;
+ vsnprintf(msg, 99, fmt, ap);
+
+ return msg;
+}
+
+/*
+ * This function is like a printf, but store the results in a cell of
+ * the table.
+ */
+__attribute__ ((format (printf, 4, 5)))
+char *table_printf(struct string_table *tab, int column, int row,
+ char *fmt, ...)
+{
+ va_list ap;
+ char *ret;
+
+ va_start(ap, fmt);
+ ret = table_vprintf(tab, column, row, fmt, ap);
+ va_end(ap);
+
+ return ret;
+}
+
+/*
+ * This function dumps the table. Every "=" string will be replaced by
+ * a "=======" length as the column
+ */
+void table_dump(struct string_table *tab)
+{
+ int sizes[tab->ncols];
+ int i, j;
+
+ for (i = 0; i < tab->ncols; i++) {
+ sizes[i] = 0;
+ for (j = 0; j < tab->nrows; j++) {
+ int idx = i + j * tab->ncols;
+ int len;
+
+ if (!tab->cells[idx])
+ continue;
+
+ len = strlen(tab->cells[idx]) - 1;
+ if (len == 0 || tab->cells[idx][0] == '*')
+ continue;
+
+ if (len > sizes[i])
+ sizes[i] = len;
+ }
+ }
+
+ for (j = 0; j < tab->nrows; j++) {
+ for (i = 0; i < tab->ncols; i++) {
+ int idx = i + j * tab->ncols;
+ char *cell = tab->cells[idx];
+
+ if (!cell || !strlen(cell)) {
+ printf("%*s", sizes[i], "");
+ } else if (cell && cell[0] == '*' && cell[1]) {
+ int k = sizes[i];
+
+ while (k--)
+ putchar(cell[1]);
+ } else {
+ printf("%*s",
+ cell[0] == '<' ? -sizes[i] : sizes[i],
+ cell + 1);
+ }
+ if (i != (tab->ncols - 1))
+ putchar(' ');
+ }
+ putchar('\n');
+ }
+}
+
+/*
+ * Deallocate a table and all of its content
+ */
+void table_free(struct string_table *tab)
+{
+ int i, count;
+
+ count = tab->ncols * tab->nrows;
+
+ for (i = 0; i < count; i++)
+ if (tab->cells[i])
+ free(tab->cells[i]);
+
+ free(tab);
+}
diff --git a/string-table.h b/string-table.h
new file mode 100644
index 00000000..c1695d8d
--- /dev/null
+++ b/string-table.h
@@ -0,0 +1,34 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __STRING_TABLE_H__
+#define __STRING_TABLE_H__
+
+struct string_table {
+ int ncols;
+ int nrows;
+ char *cells[];
+};
+
+struct string_table *table_create(int columns, int rows);
+char *table_printf(struct string_table *tab, int column, int row,
+ char *fmt, ...);
+char *table_vprintf(struct string_table *tab, int column, int row,
+ char *fmt, va_list ap);
+void table_dump(struct string_table *tab);
+void table_free(struct string_table *);
+
+#endif
diff --git a/super-recover.c b/super-recover.c
new file mode 100644
index 00000000..e2c31294
--- /dev/null
+++ b/super-recover.c
@@ -0,0 +1,336 @@
+/*
+ * Copyright (C) 2013 FUJITSU LIMITED. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <uuid/uuid.h>
+#include <errno.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <getopt.h>
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "list.h"
+#include "utils.h"
+#include "crc32c.h"
+#include "volumes.h"
+#include "commands.h"
+
+struct btrfs_recover_superblock {
+ struct btrfs_fs_devices *fs_devices;
+
+ struct list_head good_supers;
+ struct list_head bad_supers;
+
+ u64 max_generation;
+};
+
+struct super_block_record {
+ struct list_head list;
+
+ char *device_name;
+ struct btrfs_super_block sb;
+
+ u64 bytenr;
+};
+
+static
+void init_recover_superblock(struct btrfs_recover_superblock *recover)
+{
+ INIT_LIST_HEAD(&recover->good_supers);
+ INIT_LIST_HEAD(&recover->bad_supers);
+
+ recover->fs_devices = NULL;
+ recover->max_generation = 0;
+}
+
+static
+void free_recover_superblock(struct btrfs_recover_superblock *recover)
+{
+ struct super_block_record *record;
+
+ if (!recover->fs_devices)
+ return;
+
+ while (!list_empty(&recover->good_supers)) {
+ record = list_entry(recover->good_supers.next,
+ struct super_block_record, list);
+ list_del_init(&record->list);
+ free(record->device_name);
+ free(record);
+ }
+
+ while (!list_empty(&recover->bad_supers)) {
+ record = list_entry(recover->bad_supers.next,
+ struct super_block_record, list);
+ list_del_init(&record->list);
+ free(record->device_name);
+ free(record);
+ }
+}
+
+static int check_super(u64 bytenr, struct btrfs_super_block *sb)
+{
+ int csum_size = btrfs_super_csum_size(sb);
+ char result[csum_size];
+ u32 crc = ~(u32)0;
+
+ if (btrfs_super_bytenr(sb) != bytenr)
+ return 0;
+ if (sb->magic != cpu_to_le64(BTRFS_MAGIC))
+ return 0;
+
+ crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE,
+ crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+ btrfs_csum_final(crc, result);
+
+ return !memcmp(sb, &result, csum_size);
+}
+
+static int add_superblock_record(struct btrfs_super_block *sb, char *fname,
+ u64 bytenr, struct list_head *head)
+{
+ struct super_block_record *record;
+
+ record = malloc(sizeof(struct super_block_record));
+ if (!record)
+ return -ENOMEM;
+
+ record->device_name = strdup(fname);
+ if (!record->device_name) {
+ free(record);
+ return -ENOMEM;
+ }
+ memcpy(&record->sb, sb, sizeof(*sb));
+ record->bytenr = bytenr;
+ list_add_tail(&record->list, head);
+
+ return 0;
+}
+
+static int
+read_dev_supers(char *filename, struct btrfs_recover_superblock *recover)
+{
+ int i, ret, fd;
+ u8 buf[BTRFS_SUPER_INFO_SIZE];
+ u64 max_gen, bytenr;
+ /* just ignore errno that were set in btrfs_scan_fs_devices() */
+ errno = 0;
+
+ struct btrfs_super_block *sb = (struct btrfs_super_block *)buf;
+
+ fd = open(filename, O_RDONLY, 0666);
+ if (fd < 0)
+ return -errno;
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ ret = pread64(fd, buf, sizeof(buf), bytenr);
+ if (ret < sizeof(buf)) {
+ ret = -errno;
+ goto out;
+ }
+ ret = check_super(bytenr, sb);
+ if (ret) {
+ ret = add_superblock_record(sb, filename, bytenr,
+ &recover->good_supers);
+ if (ret)
+ goto out;
+ max_gen = btrfs_super_generation(sb);
+ if (max_gen > recover->max_generation)
+ recover->max_generation = max_gen;
+ } else {
+ ret = add_superblock_record(sb, filename, bytenr,
+ &recover->bad_supers);
+ if (ret)
+ goto out;
+ }
+ }
+out:
+ close(fd);
+ return ret;
+}
+
+static int read_fs_supers(struct btrfs_recover_superblock *recover)
+{
+ struct super_block_record *record;
+ struct super_block_record *next_record;
+ struct btrfs_device *dev;
+ int ret;
+ u64 gen;
+
+ list_for_each_entry(dev, &recover->fs_devices->devices,
+ dev_list) {
+ ret = read_dev_supers(dev->name, recover);
+ if (ret)
+ return ret;
+ }
+ list_for_each_entry_safe(record, next_record,
+ &recover->good_supers, list) {
+ gen = btrfs_super_generation(&record->sb);
+ if (gen < recover->max_generation)
+ list_move_tail(&record->list, &recover->bad_supers);
+ }
+
+ return 0;
+}
+
+static struct super_block_record *recover_get_good_super(
+ struct btrfs_recover_superblock *recover)
+{
+ struct super_block_record *record;
+ record = list_entry(recover->good_supers.next,
+ struct super_block_record, list);
+ return record;
+}
+
+static void print_all_devices(struct list_head *devices)
+{
+ struct btrfs_device *dev;
+
+ printf("All Devices:\n");
+ list_for_each_entry(dev, devices, dev_list) {
+ printf("\t");
+ printf("Device: id = %llu, name = %s\n",
+ dev->devid, dev->name);
+ }
+ printf("\n");
+}
+
+static void print_super_info(struct super_block_record *record)
+{
+ printf("\t\tdevice name = %s\n", record->device_name);
+ printf("\t\tsuperblock bytenr = %llu\n", record->bytenr);
+}
+
+static void print_all_supers(struct btrfs_recover_superblock *recover)
+{
+ struct super_block_record *record;
+
+ printf("\t[All good supers]:\n");
+ list_for_each_entry(record, &recover->good_supers, list) {
+ print_super_info(record);
+ printf("\n");
+ }
+
+ printf("\t[All bad supers]:\n");
+ list_for_each_entry(record, &recover->bad_supers, list) {
+ print_super_info(record);
+ printf("\n");
+ }
+ printf("\n");
+}
+
+static void recover_err_str(int ret)
+{
+ switch (ret) {
+ case 0:
+ printf("All supers are valid, no need to recover\n");
+ break;
+ case 1:
+ printf("Usage or syntax errors\n");
+ break;
+ case 2:
+ printf("Recovered bad superblocks successful\n");
+ break;
+ case 3:
+ printf("Failed to recover bad superblocks\n");
+ break;
+ case 4:
+ printf("Aborted to recover bad superblocks\n");
+ break;
+ default:
+ printf("Unknown recover result\n");
+ break;
+ }
+}
+
+int btrfs_recover_superblocks(const char *dname,
+ int verbose, int yes)
+{
+ int fd, ret;
+ struct btrfs_recover_superblock recover;
+ struct super_block_record *record;
+ struct btrfs_root *root = NULL;
+
+ fd = open(dname, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "open %s error\n", dname);
+ return 1;
+ }
+ init_recover_superblock(&recover);
+
+ ret = btrfs_scan_fs_devices(fd, dname, &recover.fs_devices, 0, 1, 0);
+ close(fd);
+ if (ret) {
+ ret = 1;
+ goto no_recover;
+ }
+
+ if (verbose)
+ print_all_devices(&recover.fs_devices->devices);
+
+ ret = read_fs_supers(&recover);
+ if (ret) {
+ ret = 1;
+ goto no_recover;
+ }
+ if (verbose) {
+ printf("Before Recovering:\n");
+ print_all_supers(&recover);
+ }
+
+ if (list_empty(&recover.bad_supers))
+ goto no_recover;
+
+ if (!yes) {
+ ret = ask_user("Make sure this is a btrfs disk otherwise the tool will destroy other fs, Are you sure?");
+ if (!ret) {
+ ret = 4;
+ goto no_recover;
+ }
+ }
+ record = recover_get_good_super(&recover);
+ root = open_ctree(record->device_name, record->bytenr,
+ OPEN_CTREE_RECOVER_SUPER | OPEN_CTREE_WRITES);
+ if (!root) {
+ ret = 3;
+ goto no_recover;
+ }
+ /* reset super_bytenr in order that we will rewite all supers */
+ root->fs_info->super_bytenr = BTRFS_SUPER_INFO_OFFSET;
+ ret = write_all_supers(root);
+ if (!ret)
+ ret = 2;
+ else
+ ret = 3;
+
+ close_ctree(root);
+no_recover:
+ recover_err_str(ret);
+ free_recover_superblock(&recover);
+ /* check if we have freed fs_deivces in close_ctree() */
+ if (!root)
+ btrfs_close_devices(recover.fs_devices);
+ return ret;
+}
+
diff --git a/task-utils.c b/task-utils.c
new file mode 100644
index 00000000..12b00027
--- /dev/null
+++ b/task-utils.c
@@ -0,0 +1,143 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <pthread.h>
+#include <sys/timerfd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "task-utils.h"
+
+struct task_info *task_init(void *(*threadfn)(void *), int (*postfn)(void *),
+ void *thread_private)
+{
+ struct task_info *info = calloc(1, sizeof(struct task_info));
+
+ if (!info)
+ return NULL;
+
+ info->private_data = thread_private;
+ info->threadfn = threadfn;
+ info->postfn = postfn;
+
+ return info;
+}
+
+int task_start(struct task_info *info)
+{
+ int ret;
+
+ if (!info)
+ return -1;
+
+ if (!info->threadfn)
+ return -1;
+
+ ret = pthread_create(&info->id, NULL, info->threadfn,
+ info->private_data);
+
+ if (ret)
+ info->id = 0;
+
+ return ret;
+}
+
+void task_stop(struct task_info *info)
+{
+ if (!info)
+ return;
+
+ if (info->id > 0) {
+ pthread_cancel(info->id);
+ pthread_join(info->id, NULL);
+ info->id = 0;
+ }
+
+ if (info->periodic.timer_fd) {
+ close(info->periodic.timer_fd);
+ info->periodic.timer_fd = 0;
+ }
+
+ if (info->postfn)
+ info->postfn(info->private_data);
+}
+
+void task_deinit(struct task_info *info)
+{
+ if (!info)
+ return;
+
+ free(info);
+}
+
+int task_period_start(struct task_info *info, unsigned int period_ms)
+{
+ unsigned int ns;
+ unsigned int sec;
+ struct itimerspec itval;
+
+ if (!info)
+ return -1;
+
+ info->periodic.timer_fd = timerfd_create(CLOCK_MONOTONIC, 0);
+ if (info->periodic.timer_fd == -1) {
+ info->periodic.timer_fd = 0;
+ return info->periodic.timer_fd;
+ }
+
+ info->periodic.wakeups_missed = 0;
+
+ sec = period_ms / 1000;
+ ns = (period_ms - (sec * 1000)) * 1000;
+ itval.it_interval.tv_sec = sec;
+ itval.it_interval.tv_nsec = ns;
+ itval.it_value.tv_sec = sec;
+ itval.it_value.tv_nsec = ns;
+
+ return timerfd_settime(info->periodic.timer_fd, 0, &itval, NULL);
+};
+
+void task_period_wait(struct task_info *info)
+{
+ unsigned long long missed;
+ int ret;
+
+ if (!info)
+ return;
+
+ if (info->periodic.timer_fd == 0)
+ return;
+
+ ret = read(info->periodic.timer_fd, &missed, sizeof (missed));
+ if (ret == -1)
+ return;
+
+ if (missed > 0)
+ info->periodic.wakeups_missed += (missed - 1);
+}
+
+void task_period_stop(struct task_info *info)
+{
+ if (!info)
+ return;
+
+ if (info->periodic.timer_fd) {
+ timerfd_settime(info->periodic.timer_fd, 0, NULL, NULL);
+ close(info->periodic.timer_fd);
+ info->periodic.timer_fd = -1;
+ }
+}
diff --git a/task-utils.h b/task-utils.h
new file mode 100644
index 00000000..91d5a646
--- /dev/null
+++ b/task-utils.h
@@ -0,0 +1,47 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __TASK_UTILS_H__
+#define __TASK_UTILS_H__
+
+#include <pthread.h>
+
+struct periodic_info {
+ int timer_fd;
+ unsigned long long wakeups_missed;
+};
+
+struct task_info {
+ struct periodic_info periodic;
+ pthread_t id;
+ void *private_data;
+ void *(*threadfn)(void *);
+ int (*postfn)(void *);
+};
+
+/* task life cycle */
+struct task_info *task_init(void *(*threadfn)(void *), int (*postfn)(void *),
+ void *thread_private);
+int task_start(struct task_info *info);
+void task_stop(struct task_info *info);
+void task_deinit(struct task_info *info);
+
+/* periodic life cycle */
+int task_period_start(struct task_info *info, unsigned int period_ms);
+void task_period_wait(struct task_info *info);
+void task_period_stop(struct task_info *info);
+
+#endif
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 00000000..be3bda82
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,136 @@
+# Btrfs-progs tests
+
+Run the tests from the top directory:
+
+```shell
+$ make test
+$ make test-fsck
+$ make test-convert
+```
+
+or selectively from the `tests/` directory:
+
+```shell
+$ ./fsck-tests.sh
+$ ./misc-tests.sh
+```
+
+The verbose output of the tests is logged into a file named after the test
+category, eg. `fsck-tests-results.txt`.
+
+## Selective testing
+
+The test are prefixed by a number for ordering and uniquenes. To run a
+particular test use:
+
+```shell
+$ make TEST=MASK test
+```
+
+where `MASK` is a glob expression that will execute only tests
+that match the MASK. Here the test number comes handy:
+
+```shell
+$ make TEST=001\* test-fsck
+$ TEST=001\* ./fsck-tests.sh
+```
+
+will run the first test in fsck-tests subdirectory.
+
+
+## Test structure
+
+*tests/fsck-tests/:*
+
+ * tests targeted at bugs that are fixable by fsck
+
+*tests/convert-tests/:*
+
+ * coverage tests of ext2/3/4 and btrfs-convert options
+
+*tests/fuzz-tests/:*
+
+ * collection of fuzzed or crafted images
+ * tests that are supposed to run various utilities on the images and not
+ crash
+
+*tests/misc-tests/:*
+
+ * anything that does not fit to the above, the test driver script will only
+ execute `./test.sh` in the test directory
+
+*tests/common:*
+
+ * script with helpers
+
+*tests/test.img:*
+
+ * default testing image, the file is never deleted by the scripts but
+ truncated to 0 bytes, so it keeps it's permissions. It's eg. possible to
+ host it on NFS, make it `chmod a+w` for root.
+
+
+## Other tuning, environment variables
+
+### Instrumentation
+
+It's possible to wrap the tested commands to utilities that might do more
+checking or catch failures at runtime. This can be done by setting the
+`INSTRUMENT` environment variable:
+
+```shell
+INSTRUMENT=valgrind ./fuzz-tests.sh # in tests/
+make INSTRUMENT=valgrind test-fuzz # in the top directory
+```
+
+The variable is prepended to the command *unquoted*, all sorts of shell tricks
+are possible.
+
+Note: instrumentation is not applied to privileged commands (anything that uses
+the root helper).
+
+### Verbosity
+
+Setting the variable `TEST_LOG=tty` will print all commands executed by some of
+the wrappers (`run_check` etc), other commands are silent.
+
+### Permissions
+
+Some commands require root privileges (to mount/umount, access loop devices).
+It is assumed that `sudo` will work in some way (no password, password asked
+and cached). Note that instrumentation is not applied in this case, for safety
+reasons. You need to modify the test script instead.
+
+### Cleanup
+
+The tests are supposed to cleanup after themselves if they pass. In case of
+failure, the rest of the tests are skipped and intermediate files, mounts and
+loop devices are kept. This should help to investigate the test failure but at
+least the mounts and loop devices need to be cleaned before the next run.
+
+This is partially done by the script `clean-tests.sh`, you may want to check
+the loop devices as they are managed on a per-test basis.
+
+## New test
+
+1. Pick the category for the new test or fallback to `misc-tests` if not sure. For
+an easy start copy an existing `test.sh` script from some test that might be
+close to the purpose of your new test.
+
+* Use the highest unused number in the sequence, write a short descriptive title
+and join by dashes `-`.
+
+* Write a short description of the bug and how it's teste to the comment at the
+begining of `test.sh`.
+
+* Write the test commands, comment anything that's not obvious.
+
+* Test your test. Use the `TEST` variable to jump right to your test:
+```shell
+$ make TEST=012\* tests-misc # from top directory
+$ TEST=012\* ./misc-tests.sh # from tests/
+```
+
+* The commit changelog should reference a commit that either introduced or
+ fixed the bug (or both). Subject line of the shall mention the name of the
+ new directory for ease of search, eg. `btrfs-progs: tests: add 012-subvolume-sync-must-wait`
diff --git a/tests/clean-tests.sh b/tests/clean-tests.sh
new file mode 100755
index 00000000..f7fefdda
--- /dev/null
+++ b/tests/clean-tests.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+# remove all intermediate files from tests
+
+SCRIPT_DIR=$(dirname $(readlink -f $0))
+TOP=$(readlink -f $SCRIPT_DIR/../)
+source $TOP/tests/common
+
+setup_root_helper
+
+if [ "$BUILD_VERBOSE" = 1 ]; then
+ verbose=-print
+fi
+
+$SUDO_HELPER umount "$TEST_MNT" &>/dev/null
+
+if ! cd $TOP/tests; then
+ echo "ERROR: cannot cd to $TOP/tests"
+ exit 1
+fi
+
+find fsck-tests -type f -name '*.restored' $verbose -delete
+
+# do not remove, the file could have special permissions set
+echo -n > test.img
diff --git a/tests/common b/tests/common
new file mode 100644
index 00000000..61780486
--- /dev/null
+++ b/tests/common
@@ -0,0 +1,240 @@
+#!/bin/bash
+#
+# Common routines for all tests
+#
+
+_fail()
+{
+ echo "$*" | tee -a $RESULTS
+ exit 1
+}
+
+# log a message to the results file
+_log()
+{
+ echo "$*" | tee -a $RESULTS
+}
+
+_not_run()
+{
+ echo " [NOTRUN] $*"
+ exit 0
+}
+
+run_check()
+{
+ echo "############### $@" >> $RESULTS 2>&1
+ if [ "$TEST_LOG" = 'tty' ]; then echo "CMD: $@" > /dev/tty; fi
+ if [ "$1" = 'root_helper' ]; then
+ "$@" >> $RESULTS 2>&1 || _fail "failed: $@"
+ else
+ $INSTRUMENT "$@" >> $RESULTS 2>&1 || _fail "failed: $@"
+ fi
+}
+
+# same as run_check but the stderr+stdout output is duplicated on stdout and
+# can be processed further
+run_check_stdout()
+{
+ echo "############### $@" >> $RESULTS 2>&1
+ if [ "$TEST_LOG" = 'tty' ]; then echo "CMD(stdout): $@" > /dev/tty; fi
+ if [ "$1" = 'root_helper' ]; then
+ "$@" 2>&1 | tee -a $RESULTS || _fail "failed: $@"
+ else
+ $INSTRUMENT "$@" 2>&1 | tee -a $RESULTS || _fail "failed: $@"
+ fi
+}
+
+# same as run_check but does not fail the test, output is logged
+run_mayfail()
+{
+ echo "############### $@" >> $RESULTS 2>&1
+ if [ "$TEST_LOG" = 'tty' ]; then echo "CMD(mayfail): $@" > /dev/tty; fi
+ if [ "$1" = 'root_helper' ]; then
+ "$@" >> $RESULTS 2>&1
+ else
+ $INSTRUMENT "$@" >> $RESULTS 2>&1
+ fi
+ if [ $? != 0 ]; then
+ echo "failed (ignored): $@" >> $RESULTS
+ return 1
+ fi
+}
+
+check_prereq()
+{
+ if ! [ -f $TOP/$1 ]; then
+ _fail "Failed prerequisities: $1";
+ fi
+}
+
+check_image()
+{
+ local image
+
+ image=$1
+ echo "testing image $(basename $image)" >> $RESULTS
+ $TOP/btrfs check $image >> $RESULTS 2>&1
+ [ $? -eq 0 ] && _fail "btrfs check should have detected corruption"
+
+ run_check $TOP/btrfs check --repair $image
+ run_check $TOP/btrfs check $image
+}
+
+# Extract a usable image from packed formats
+# - raw btrfs filesystem images, suffix .raw
+# - dtto compressed by XZ, suffix .raw.xz
+# - meta-dump images with suffix .img
+# - dtto compressed by XZ, suffix .img.xz
+extract_image()
+{
+ local image
+ local cleanme
+
+ image="$1"
+ case "$image" in
+ *.img)
+ rm -f $image.restored
+ : ;;
+ *.img.xz)
+ xz --decompress --keep "$image" || \
+ _fail "failed to decompress image $image" >&2
+ image=${image%%.xz}
+ rm -f $image.restored
+ cleanme=$image
+ ;;
+ *.raw)
+ cp --sparse=auto $image $image.restored
+ ;;
+ *.raw.xz)
+ xz --decompress --keep "$image" || \
+ _fail "failed to decompress image $image" >&2
+ image=${image%%.xz}
+ mv "$image" "$image".restored
+ ;;
+ esac
+
+ if ! [ -f $image.restored ]; then
+ echo "restoring image $(basename $image)" >> $RESULTS
+ $TOP/btrfs-image -r $image $image.restored \
+ &>> $RESULTS \
+ || _fail "failed to restore image $image" >&2
+ fi
+
+ [ -f "$cleanme" ] && rm -f "$cleanme"
+
+ echo "$image.restored"
+}
+
+# Process all image dumps in a given directory
+check_all_images()
+{
+ local dir
+ local extracted
+
+ dir="$1"
+ for image in $(find $dir \( -iname '*.img' -o \
+ -iname '*.img.xz' -o \
+ -iname '*.raw' -o \
+ -iname '*.raw.xz' \) | sort)
+ do
+ extracted=$(extract_image "$image")
+ check_image "$extracted"
+ rm -f "$extracted"
+ done
+}
+
+# some tests need to mount the recovered image and do verifications call
+# 'setup_root_helper' and then check for have_root_helper == 1 if the test
+# needs to fail otherwise; using sudo by default for now
+SUDO_HELPER=
+NEED_SUDO_VALIDATE=unknown
+export SUDO_HELPER
+export NEED_SUDO_VALIDATE
+root_helper()
+{
+ if [ $UID -eq 0 ]; then
+ "$@"
+ else
+ if [ "$NEED_SUDO_VALIDATE" = 'yes' ]; then
+ sudo -v -n &>/dev/null || \
+ _not_run "Need to validate sudo credentials"
+ sudo -n "$@"
+ elif [ "$NEED_SUDO_VALIDATE" = 'no' ]; then
+ sudo -n /bin/true &> /dev/null || \
+ _not_run "Need to validate sudo user settings"
+ sudo -n "$@"
+ else
+ # should not happen
+ _not_run "Need to validate root privileges"
+ fi
+ fi
+}
+
+setup_root_helper()
+{
+ if [ $UID -eq 0 -o -n "$SUDO_HELPER" ]; then
+ return
+ fi
+
+ # Test for old sudo or special settings, which make sudo -v fail even
+ # if user setting is NOPASSWD
+ sudo -n /bin/true &>/dev/null && NEED_SUDO_VALIDATE=no
+
+ # Newer sudo or default sudo setting
+ sudo -v -n &>/dev/null && NEED_SUDO_VALIDATE=yes
+
+ if [ "$NEED_SUDO_VALIDATE" = 'unknown' ]; then
+ _not_run "Need to validate root privileges"
+ fi
+ SUDO_HELPER=root_helper
+}
+
+prepare_test_dev()
+{
+ # num[K/M/G/T...]
+ local size="$1"
+
+ [[ "$TEST_DEV" ]] && return
+ [[ "$size" ]] || size='2G'
+
+ echo "\$TEST_DEV not given, use $TOP/test/test.img as fallback" >> \
+ $RESULTS
+ TEST_DEV="$TOP/tests/test.img"
+
+ truncate -s "$size" "$TEST_DEV" || _not_run "create file for loop device failed"
+}
+
+run_check_mount_test_dev()
+{
+ setup_root_helper
+
+ local loop_opt
+ if [[ -b "$TEST_DEV" ]]; then
+ loop_opt=""
+ elif [[ -f "$TEST_DEV" ]]; then
+ loop_opt="-o loop"
+ else
+ _fail "Invalid \$TEST_DEV: $TEST_DEV"
+ fi
+
+ [[ -d "$TEST_MNT" ]] || {
+ _fail "Invalid \$TEST_MNT: $TEST_MNT"
+ }
+
+ run_check $SUDO_HELPER mount $loop_opt "$@" "$TEST_DEV" "$TEST_MNT"
+}
+
+run_check_umount_test_dev()
+{
+ setup_root_helper
+ run_check $SUDO_HELPER umount "$@" "$TEST_DEV"
+}
+
+init_env()
+{
+ TEST_MNT="${TEST_MNT:-$TOP/tests/mnt}"
+ export TEST_MNT
+ mkdir -p "$TEST_MNT" || { echo "Failed mkdir -p $TEST_MNT"; exit 1; }
+}
+init_env
diff --git a/tests/convert-tests.sh b/tests/convert-tests.sh
new file mode 100644
index 00000000..0bfb41f8
--- /dev/null
+++ b/tests/convert-tests.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+#
+# convert ext2/3/4 images to btrfs images, and make sure the results are
+# clean.
+#
+
+unset TOP
+unset LANG
+LANG=C
+SCRIPT_DIR=$(dirname $(readlink -f $0))
+TOP=$(readlink -f $SCRIPT_DIR/../)
+RESULTS="$TOP/tests/convert-tests-results.txt"
+
+source $TOP/tests/common
+
+rm -f $RESULTS
+
+setup_root_helper
+prepare_test_dev 256M
+
+CHECKSUMTMP=$(mktemp --tmpdir btrfs-progs-convert.XXXXXXXXXX)
+
+convert_test() {
+ local features
+ local nodesize
+
+ features="$1"
+ shift
+
+ if [ -z "$features" ]; then
+ echo " [TEST/conv] $1, btrfs defaults"
+ else
+ echo " [TEST/conv] $1, btrfs $features"
+ fi
+ nodesize=$2
+ shift 2
+ echo "creating ext image with: $*" >> $RESULTS
+ # TEST_DEV not removed as the file might have special permissions, eg.
+ # when test image is on NFS and would not be writable for root
+ run_check truncate -s 0 $TEST_DEV
+ # 256MB is the smallest acceptable btrfs image.
+ run_check truncate -s 256M $TEST_DEV
+ run_check $* -F $TEST_DEV
+
+ # create a file to check btrfs-convert can convert regular file
+ # correct
+ run_check_mount_test_dev
+ run_check $SUDO_HELPER dd if=/dev/zero of=$TEST_MNT/test bs=$nodesize \
+ count=1 1>/dev/null 2>&1
+ run_check_stdout md5sum $TEST_MNT/test > $CHECKSUMTMP
+ run_check_umount_test_dev
+
+ run_check $TOP/btrfs-convert ${features:+-O "$features"} -N "$nodesize" $TEST_DEV
+ run_check $TOP/btrfs check $TEST_DEV
+ run_check $TOP/btrfs-show-super $TEST_DEV
+
+ run_check_mount_test_dev
+ run_check_stdout md5sum -c $CHECKSUMTMP |
+ grep -q 'OK' || _fail "file validation failed."
+ run_check_umount_test_dev
+}
+
+if ! [ -z "$TEST" ]; then
+ echo " [TEST/conv] skipped all convert tests, TEST=$TEST"
+ exit 0
+fi
+
+for feature in '' 'extref' 'skinny-metadata' 'no-holes'; do
+ convert_test "$feature" "ext2 4k nodesize" 4096 mke2fs -b 4096
+ convert_test "$feature" "ext3 4k nodesize" 4096 mke2fs -j -b 4096
+ convert_test "$feature" "ext4 4k nodesize" 4096 mke2fs -t ext4 -b 4096
+ convert_test "$feature" "ext2 8k nodesize" 8192 mke2fs -b 4096
+ convert_test "$feature" "ext3 8k nodesize" 8192 mke2fs -j -b 4096
+ convert_test "$feature" "ext4 8k nodesize" 8192 mke2fs -t ext4 -b 4096
+ convert_test "$feature" "ext2 16k nodesize" 16384 mke2fs -b 4096
+ convert_test "$feature" "ext3 16k nodesize" 16384 mke2fs -j -b 4096
+ convert_test "$feature" "ext4 16k nodesize" 16384 mke2fs -t ext4 -b 4096
+ convert_test "$feature" "ext2 32k nodesize" 32768 mke2fs -b 4096
+ convert_test "$feature" "ext3 32k nodesize" 32768 mke2fs -j -b 4096
+ convert_test "$feature" "ext4 32k nodesize" 32768 mke2fs -t ext4 -b 4096
+ convert_test "$feature" "ext2 64k nodesize" 65536 mke2fs -b 4096
+ convert_test "$feature" "ext3 64k nodesize" 65536 mke2fs -j -b 4096
+ convert_test "$feature" "ext4 64k nodesize" 65536 mke2fs -t ext4 -b 4096
+done
+
+rm $CHECKSUMTMP
diff --git a/tests/fsck-tests.sh b/tests/fsck-tests.sh
new file mode 100755
index 00000000..2aab4ff2
--- /dev/null
+++ b/tests/fsck-tests.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+#
+# loop through all of our bad images and make sure fsck repairs them properly
+#
+# It's GPL, same as everything else in this tree.
+#
+
+unset TOP
+unset LANG
+LANG=C
+SCRIPT_DIR=$(dirname $(readlink -f $0))
+TOP=$(readlink -f $SCRIPT_DIR/../)
+TEST_DEV=${TEST_DEV:-}
+RESULTS="$TOP/tests/fsck-tests-results.txt"
+
+source $TOP/tests/common
+
+# Allow child test to use $TOP and $RESULTS
+export TOP
+export RESULTS
+# For custom script needs to verfiy recovery
+export LANG
+
+rm -f $RESULTS
+
+# test rely on corrupting blocks tool
+check_prereq btrfs-corrupt-block
+check_prereq btrfs-image
+check_prereq btrfs
+
+run_one_test() {
+ local testname
+
+ testname="$1"
+ echo " [TEST/fsck] $(basename $testname)"
+ cd $testname
+ echo "=== Entering $testname" >> $RESULTS
+ if [ -x test.sh ]; then
+ # Type 2
+ ./test.sh
+ if [ $? -ne 0 ]; then
+ _fail "test failed for case $(basename $testname)"
+ fi
+ else
+ # Type 1
+ check_all_images `pwd`
+ fi
+ cd $TOP
+}
+
+# Each dir contains one type of error for btrfsck test.
+# Each dir must be one of the following 2 types:
+# 1) Only btrfs-image dump
+# Only contains one or several btrfs-image dumps (.img)
+# Each image will be tested by generic test routine
+# (btrfsck --repair and btrfsck).
+# This is for case that btree-healthy images.
+# 2) Custom test script
+# This dir contains test.sh which will do custom image
+# generation/check/verification.
+# This is for case btrfs-image can't dump or case needs extra
+# check/verify
+
+for i in $(find $TOP/tests/fsck-tests -maxdepth 1 -mindepth 1 -type d \
+ ${TEST:+-name "$TEST"} | sort)
+do
+ run_one_test "$i"
+done
diff --git a/tests/fsck-tests/001-bad-file-extent-bytenr/default_case.img b/tests/fsck-tests/001-bad-file-extent-bytenr/default_case.img
new file mode 100644
index 00000000..d2a05bb8
--- /dev/null
+++ b/tests/fsck-tests/001-bad-file-extent-bytenr/default_case.img
Binary files differ
diff --git a/tests/fsck-tests/002-bad-transid/default_case.img b/tests/fsck-tests/002-bad-transid/default_case.img
new file mode 100644
index 00000000..85bd87cd
--- /dev/null
+++ b/tests/fsck-tests/002-bad-transid/default_case.img
Binary files differ
diff --git a/tests/fsck-tests/003-shift-offsets/default_case.img b/tests/fsck-tests/003-shift-offsets/default_case.img
new file mode 100644
index 00000000..ce23f673
--- /dev/null
+++ b/tests/fsck-tests/003-shift-offsets/default_case.img
Binary files differ
diff --git a/tests/fsck-tests/004-no-dir-index/default_case.img b/tests/fsck-tests/004-no-dir-index/default_case.img
new file mode 100644
index 00000000..6f2483e6
--- /dev/null
+++ b/tests/fsck-tests/004-no-dir-index/default_case.img
Binary files differ
diff --git a/tests/fsck-tests/005-bad-item-offset/default_case.img b/tests/fsck-tests/005-bad-item-offset/default_case.img
new file mode 100644
index 00000000..e11e1e32
--- /dev/null
+++ b/tests/fsck-tests/005-bad-item-offset/default_case.img
Binary files differ
diff --git a/tests/fsck-tests/006-bad-root-items/default_case.tar.xz b/tests/fsck-tests/006-bad-root-items/default_case.tar.xz
new file mode 100644
index 00000000..125d8e7d
--- /dev/null
+++ b/tests/fsck-tests/006-bad-root-items/default_case.tar.xz
Binary files differ
diff --git a/tests/fsck-tests/006-bad-root-items/skinny_case.tar.xz b/tests/fsck-tests/006-bad-root-items/skinny_case.tar.xz
new file mode 100644
index 00000000..ed99dc4d
--- /dev/null
+++ b/tests/fsck-tests/006-bad-root-items/skinny_case.tar.xz
Binary files differ
diff --git a/tests/fsck-tests/006-bad-root-items/test.sh b/tests/fsck-tests/006-bad-root-items/test.sh
new file mode 100755
index 00000000..84332348
--- /dev/null
+++ b/tests/fsck-tests/006-bad-root-items/test.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+source $TOP/tests/common
+
+check_prereq btrfs
+
+echo "extracting image default_case.tar.xz" >> $RESULTS
+tar --no-same-owner -xJf default_case.tar.xz || \
+ _fail "failed to extract default_case.tar.xz"
+check_image test.img
+
+echo "extracting image skinny_case.tar.xz" >> $RESULTS
+tar --no-same-owner -xJf skinny_case.tar.xz || \
+ _fail "failed to extract skinny_case.tar.xz"
+check_image test.img
+
+rm test.img
diff --git a/tests/fsck-tests/007-bad-offset-snapshots/default_case.img b/tests/fsck-tests/007-bad-offset-snapshots/default_case.img
new file mode 100644
index 00000000..b87e9977
--- /dev/null
+++ b/tests/fsck-tests/007-bad-offset-snapshots/default_case.img
Binary files differ
diff --git a/tests/fsck-tests/008-bad-dir-index-name/default_case.img b/tests/fsck-tests/008-bad-dir-index-name/default_case.img
new file mode 100644
index 00000000..e004737f
--- /dev/null
+++ b/tests/fsck-tests/008-bad-dir-index-name/default_case.img
Binary files differ
diff --git a/tests/fsck-tests/009-no-dir-item-or-index/default_case.img b/tests/fsck-tests/009-no-dir-item-or-index/default_case.img
new file mode 100644
index 00000000..d7f22692
--- /dev/null
+++ b/tests/fsck-tests/009-no-dir-item-or-index/default_case.img
Binary files differ
diff --git a/tests/fsck-tests/010-no-rootdir-inode-item/default_case.img b/tests/fsck-tests/010-no-rootdir-inode-item/default_case.img
new file mode 100644
index 00000000..6b66fdad
--- /dev/null
+++ b/tests/fsck-tests/010-no-rootdir-inode-item/default_case.img
Binary files differ
diff --git a/tests/fsck-tests/011-no-inode-item/default_case.img b/tests/fsck-tests/011-no-inode-item/default_case.img
new file mode 100644
index 00000000..352fc021
--- /dev/null
+++ b/tests/fsck-tests/011-no-inode-item/default_case.img
Binary files differ
diff --git a/tests/fsck-tests/012-leaf-corruption/no_data_extent.tar.xz b/tests/fsck-tests/012-leaf-corruption/no_data_extent.tar.xz
new file mode 100644
index 00000000..547e5455
--- /dev/null
+++ b/tests/fsck-tests/012-leaf-corruption/no_data_extent.tar.xz
Binary files differ
diff --git a/tests/fsck-tests/012-leaf-corruption/test.sh b/tests/fsck-tests/012-leaf-corruption/test.sh
new file mode 100755
index 00000000..a308727d
--- /dev/null
+++ b/tests/fsck-tests/012-leaf-corruption/test.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+
+source $TOP/tests/common
+
+check_prereq btrfs-image
+
+# Check file list for leaf corruption, no regular/preallocated
+# file extent case.
+# Corrupted leaf is 20832256, which contains inode 1862~1872
+#
+# 1862, ref from leaf 20828160 key 24(DIR_ITEM)
+# 1863, ref from leaf 605388 item key 11(DIR_ITEM)
+# 1864, no ref to rebuild, no need to rebuild
+# 1865, ref from leaf 19767296 key 23(DIR_ITEM)
+# 1866-1868 no ref to rebuild, all refs in corrupted leaf
+# 1869, ref from leaf 4976640 key 22(DIR_ITEM)
+# 1870 no ref to rebuild, all refs in corrupted leaf
+# 1871, ref from leaf 19746816 key 38(DIR_ITEM)
+# 1872, ref from leaf 19767296 key 14(DIR_ITEM)
+# The list format is:
+# INO SIZE MODE NAME
+# INO: inode number
+# SIZE: file size, only checked for regular file
+# MODE: raw file mode, get from stat
+# NAME: file name
+leaf_no_data_ext_list=(
+ 1862 0 40700 "install.d"
+ 1862 0 40700 "install.d"
+ 1863 0 40700 "gdb"
+ 1865 0 40700 "iptables"
+ 1869 0 40700 "snmp"
+ 1871 0 100700 "machine-id"
+ 1872 0 100700 "adjtime"
+ 1877 0 40700 "del"
+)
+
+generate_leaf_corrupt_no_data_ext()
+{
+ dest=$1
+ echo "generating leaf_corrupt_no_data_ext.btrfs-image" >> $RESULTS
+ tar --no-same-owner -xJf ./no_data_extent.tar.xz || \
+ _fail "failed to extract leaf_corrupt_no_data_ext.btrfs-image"
+ $TOP/btrfs-image -r test.img.btrfs-image $dest || \
+ _fail "failed to extract leaf_corrupt_no_data_ext.btrfs-image"
+
+ # leaf at 4206592 and 20905984 contains no regular data
+ # extent, clear its csum to corrupt the leaf.
+ for x in 4206592 20905984; do
+ dd if=/dev/zero of=$dest bs=1 count=32 conv=notrunc seek=$x \
+ 1>/dev/null 2>&1
+ done
+}
+
+check_inode()
+{
+ path=$1
+ ino=$2
+ size=$3
+ mode=$4
+ name=$5
+
+ # Check whether the inode exists
+ exists=$($SUDO_HELPER find $path -inum $ino)
+ if [ -z "$exists" ]; then
+ _fail "inode $ino not recovered correctly"
+ fi
+
+ # Check inode type
+ found_mode=$(printf "%o" 0x$($SUDO_HELPER stat $exists -c %f))
+ if [ $found_mode -ne $mode ]; then
+ echo "$found_mode"
+ _fail "inode $ino modes not recovered"
+ fi
+
+ # Check inode size
+ found_size=$($SUDO_HELPER stat $exists -c %s)
+ if [ $mode -ne 41700 -a $found_size -ne $size ]; then
+ _fail "inode $ino size not recovered correctly"
+ fi
+
+ # Check inode name
+ if [ "$(basename $exists)" != "$name" ]; then
+ _fail "inode $ino name not recovered correctly"
+ else
+ return 0
+ fi
+}
+
+# Check salvaged data in the recovered image
+check_leaf_corrupt_no_data_ext()
+{
+ image=$1
+ $SUDO_HELPER mount -o loop $image -o ro $TEST_MNT
+
+ i=0
+ while [ $i -lt ${#leaf_no_data_ext_list[@]} ]; do
+ check_inode $TEST_MNT/lost+found \
+ ${leaf_no_data_ext_list[i]} \
+ ${leaf_no_data_ext_list[i + 1]} \
+ ${leaf_no_data_ext_list[i + 2]} \
+ ${leaf_no_data_ext_list[i + 3]} \
+ ${leaf_no_data_ext_list[i + 4]}
+ ((i+=4))
+ done
+ $SUDO_HELPER umount $TEST_MNT
+}
+
+setup_root_helper
+
+generate_leaf_corrupt_no_data_ext test.img
+check_image test.img
+check_leaf_corrupt_no_data_ext test.img
+
+rm test.img
+rm test.img.btrfs-image
+# Not used, its function is the same as generate_leaf_corrupt_no_data_ext()
+rm generate_image.sh
diff --git a/tests/fsck-tests/013-extent-tree-rebuild/test.sh b/tests/fsck-tests/013-extent-tree-rebuild/test.sh
new file mode 100755
index 00000000..ff7d28e5
--- /dev/null
+++ b/tests/fsck-tests/013-extent-tree-rebuild/test.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+source $TOP/tests/common
+
+check_prereq btrfs-corrupt-block
+check_prereq btrfs-debug-tree
+check_prereq mkfs.btrfs
+check_prereq btrfs
+
+setup_root_helper
+prepare_test_dev 1G
+
+# test whether fsck can rebuild a corrupted extent tree
+test_extent_tree_rebuild()
+{
+ run_check $SUDO_HELPER $TOP/mkfs.btrfs -f $TEST_DEV
+
+ run_check_mount_test_dev
+ run_check $SUDO_HELPER cp -aR /lib/modules/`uname -r`/ $TEST_MNT
+
+ for i in `seq 1 100`;do
+ run_check $SUDO_HELPER $TOP/btrfs sub snapshot $TEST_MNT \
+ $TEST_MNT/snapaaaaaaa_$i
+ done
+ run_check_umount_test_dev
+
+ # get extent root bytenr
+ extent_root_bytenr=`$SUDO_HELPER $TOP/btrfs-debug-tree -r $TEST_DEV | \
+ grep extent | awk '{print $7}'`
+ if [ -z $extent_root_bytenr ];then
+ _fail "fail to get extent root bytenr"
+ fi
+
+ # corrupt extent root node block
+ run_check $SUDO_HELPER $TOP/btrfs-corrupt-block -l $extent_root_bytenr \
+ -b 4096 $TEST_DEV
+
+ $SUDO_HELPER $TOP/btrfs check $TEST_DEV >& /dev/null && \
+ _fail "btrfs check should detect failure"
+ run_check $SUDO_HELPER $TOP/btrfs check --init-extent-tree $TEST_DEV
+ run_check $SUDO_HELPER $TOP/btrfs check $TEST_DEV
+}
+
+test_extent_tree_rebuild
diff --git a/tests/fsck-tests/014-no-extent-info/default_case.img b/tests/fsck-tests/014-no-extent-info/default_case.img
new file mode 100644
index 00000000..1ff27434
--- /dev/null
+++ b/tests/fsck-tests/014-no-extent-info/default_case.img
Binary files differ
diff --git a/tests/fsck-tests/015-check-bad-memory-access/bko-97171-btrfs-image.raw.txt b/tests/fsck-tests/015-check-bad-memory-access/bko-97171-btrfs-image.raw.txt
new file mode 100644
index 00000000..9685ed46
--- /dev/null
+++ b/tests/fsck-tests/015-check-bad-memory-access/bko-97171-btrfs-image.raw.txt
@@ -0,0 +1,254 @@
+URL: https://bugzilla.kernel.org/show_bug.cgi?id=97171
+
+The btrfs-image attached to this bug causes the btrfs-userland tool to use
+uninitialized memory and ultimately overwrite what seems to be arbitrary memory
+locations, dying in the process. Reproduced on x86-64 and i686.
+
+The kernel seems to be less affected and fails to mount the image. If
+/usr/sbin/btrfs is not setuid (which it probably never is), things should be
+safe. I didn't investigate further though.
+
+gdb output:
+
+GNU gdb (GDB) Fedora 7.8.2-38.fc21
+[... lots of other errors...]
+Ignoring transid failure
+root 5 inode 260 errors 1000, some csum missing
+ unresolved ref dir 256 index 7 namelen 5 name b.bin filetype 1 errors 2, no dir index
+ unresolved ref dir 256 index 7 namelen 5 name b.fin filetype 1 errors 5, no dir item, no inode ref
+root 5 inode 261 errors 200, dir isize wrong
+
+Program received signal SIGSEGV, Segmentation fault.
+0x000000000089bb70 in ?? ()
+(gdb) bt
+#0 0x000000000089bb70 in ?? ()
+#1 0x00007fffffffdb50 in ?? ()
+#2 0x0000000000894b20 in ?? ()
+#3 0x00000032629b88e0 in _IO_2_1_stdout_ () from /lib64/libc.so.6
+#4 0x000000000088c010 in ?? ()
+#5 0x0000000000000000 in ?? ()
+
+
+valgrind output:
+
+[...lots of errors...]
+==12638== Conditional jump or move depends on uninitialised value(s)
+==12638== at 0x436E77: check_block.part.14 (ctree.c:548)
+==12638== by 0x438954: UnknownInlinedFun (kerncompat.h:91)
+==12638== by 0x438954: btrfs_search_slot (ctree.c:1120)
+==12638== by 0x40DD1F: count_csum_range (cmds-check.c:1419)
+==12638== by 0x40DD1F: process_file_extent (cmds-check.c:1551)
+==12638== by 0x40DD1F: process_one_leaf (cmds-check.c:1617)
+==12638== by 0x40DD1F: walk_down_tree (cmds-check.c:1742)
+==12638== by 0x40DD1F: check_fs_root (cmds-check.c:3380)
+==12638== by 0x40DD1F: check_fs_roots.isra.51 (cmds-check.c:3516)
+==12638== by 0x4C64B0F: ???
+==12638== by 0x4C30A2F: ???
+==12638== by 0x4C468CF: ???
+==12638== by 0x32629B88DF: ??? (in /usr/lib64/libc-2.20.so)
+==12638== by 0x4C3657F: ???
+==12638==
+==12638== Conditional jump or move depends on uninitialised value(s)
+==12638== at 0x4A0B0E7: memcpy@@GLIBC_2.14 (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
+==12638== by 0x436E99: UnknownInlinedFun (ctree.h:1613)
+==12638== by 0x436E99: check_block.part.14 (ctree.c:550)
+==12638== by 0x438954: UnknownInlinedFun (kerncompat.h:91)
+==12638== by 0x438954: btrfs_search_slot (ctree.c:1120)
+==12638== by 0x40DD1F: count_csum_range (cmds-check.c:1419)
+==12638== by 0x40DD1F: process_file_extent (cmds-check.c:1551)
+==12638== by 0x40DD1F: process_one_leaf (cmds-check.c:1617)
+==12638== by 0x40DD1F: walk_down_tree (cmds-check.c:1742)
+==12638== by 0x40DD1F: check_fs_root (cmds-check.c:3380)
+==12638== by 0x40DD1F: check_fs_roots.isra.51 (cmds-check.c:3516)
+==12638== by 0x4C64B0F: ???
+==12638== by 0x4C30A2F: ???
+==12638== by 0x4C468CF: ???
+==12638== by 0x32629B88DF: ??? (in /usr/lib64/libc-2.20.so)
+==12638== by 0x4C3657F: ???
+==12638==
+==12638== Conditional jump or move depends on uninitialised value(s)
+==12638== at 0x4A0B2AC: memcpy@@GLIBC_2.14 (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
+==12638== by 0x436E99: UnknownInlinedFun (ctree.h:1613)
+==12638== by 0x436E99: check_block.part.14 (ctree.c:550)
+==12638== by 0x438954: UnknownInlinedFun (kerncompat.h:91)
+==12638== by 0x438954: btrfs_search_slot (ctree.c:1120)
+==12638== by 0x40DD1F: count_csum_range (cmds-check.c:1419)
+==12638== by 0x40DD1F: process_file_extent (cmds-check.c:1551)
+==12638== by 0x40DD1F: process_one_leaf (cmds-check.c:1617)
+==12638== by 0x40DD1F: walk_down_tree (cmds-check.c:1742)
+==12638== by 0x40DD1F: check_fs_root (cmds-check.c:3380)
+==12638== by 0x40DD1F: check_fs_roots.isra.51 (cmds-check.c:3516)
+==12638== by 0x4C64B0F: ???
+==12638== by 0x4C30A2F: ???
+==12638== by 0x4C468CF: ???
+==12638== by 0x32629B88DF: ??? (in /usr/lib64/libc-2.20.so)
+==12638== by 0x4C3657F: ???
+==12638==
+==12638== Conditional jump or move depends on uninitialised value(s)
+==12638== at 0x4A0B151: memcpy@@GLIBC_2.14 (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
+==12638== by 0x436E99: UnknownInlinedFun (ctree.h:1613)
+==12638== by 0x436E99: check_block.part.14 (ctree.c:550)
+==12638== by 0x438954: UnknownInlinedFun (kerncompat.h:91)
+==12638== by 0x438954: btrfs_search_slot (ctree.c:1120)
+==12638== by 0x40DD1F: count_csum_range (cmds-check.c:1419)
+==12638== by 0x40DD1F: process_file_extent (cmds-check.c:1551)
+==12638== by 0x40DD1F: process_one_leaf (cmds-check.c:1617)
+==12638== by 0x40DD1F: walk_down_tree (cmds-check.c:1742)
+==12638== by 0x40DD1F: check_fs_root (cmds-check.c:3380)
+==12638== by 0x40DD1F: check_fs_roots.isra.51 (cmds-check.c:3516)
+==12638== by 0x4C64B0F: ???
+==12638== by 0x4C30A2F: ???
+==12638== by 0x4C468CF: ???
+==12638== by 0x32629B88DF: ??? (in /usr/lib64/libc-2.20.so)
+==12638== by 0x4C3657F: ???
+==12638==
+==12638== Conditional jump or move depends on uninitialised value(s)
+==12638== at 0x4A0B162: memcpy@@GLIBC_2.14 (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
+==12638== by 0x436E99: UnknownInlinedFun (ctree.h:1613)
+==12638== by 0x436E99: check_block.part.14 (ctree.c:550)
+==12638== by 0x438954: UnknownInlinedFun (kerncompat.h:91)
+==12638== by 0x438954: btrfs_search_slot (ctree.c:1120)
+==12638== by 0x40DD1F: count_csum_range (cmds-check.c:1419)
+==12638== by 0x40DD1F: process_file_extent (cmds-check.c:1551)
+==12638== by 0x40DD1F: process_one_leaf (cmds-check.c:1617)
+==12638== by 0x40DD1F: walk_down_tree (cmds-check.c:1742)
+==12638== by 0x40DD1F: check_fs_root (cmds-check.c:3380)
+==12638== by 0x40DD1F: check_fs_roots.isra.51 (cmds-check.c:3516)
+==12638== by 0x4C64B0F: ???
+==12638== by 0x4C30A2F: ???
+==12638== by 0x4C468CF: ???
+==12638== by 0x32629B88DF: ??? (in /usr/lib64/libc-2.20.so)
+==12638== by 0x4C3657F: ???
+==12638==
+==12638== Conditional jump or move depends on uninitialised value(s)
+==12638== at 0x4A0B176: memcpy@@GLIBC_2.14 (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
+==12638== by 0x436E99: UnknownInlinedFun (ctree.h:1613)
+==12638== by 0x436E99: check_block.part.14 (ctree.c:550)
+==12638== by 0x438954: UnknownInlinedFun (kerncompat.h:91)
+==12638== by 0x438954: btrfs_search_slot (ctree.c:1120)
+==12638== by 0x40DD1F: count_csum_range (cmds-check.c:1419)
+==12638== by 0x40DD1F: process_file_extent (cmds-check.c:1551)
+==12638== by 0x40DD1F: process_one_leaf (cmds-check.c:1617)
+==12638== by 0x40DD1F: walk_down_tree (cmds-check.c:1742)
+==12638== by 0x40DD1F: check_fs_root (cmds-check.c:3380)
+==12638== by 0x40DD1F: check_fs_roots.isra.51 (cmds-check.c:3516)
+==12638== by 0x4C64B0F: ???
+==12638== by 0x4C30A2F: ???
+==12638== by 0x4C468CF: ???
+==12638== by 0x32629B88DF: ??? (in /usr/lib64/libc-2.20.so)
+==12638== by 0x4C3657F: ???
+==12638==
+==12638== Conditional jump or move depends on uninitialised value(s)
+==12638== at 0x4A0B2CE: memcpy@@GLIBC_2.14 (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
+==12638== by 0x436E99: UnknownInlinedFun (ctree.h:1613)
+==12638== by 0x436E99: check_block.part.14 (ctree.c:550)
+==12638== by 0x438954: UnknownInlinedFun (kerncompat.h:91)
+==12638== by 0x438954: btrfs_search_slot (ctree.c:1120)
+==12638== by 0x40DD1F: count_csum_range (cmds-check.c:1419)
+==12638== by 0x40DD1F: process_file_extent (cmds-check.c:1551)
+==12638== by 0x40DD1F: process_one_leaf (cmds-check.c:1617)
+==12638== by 0x40DD1F: walk_down_tree (cmds-check.c:1742)
+==12638== by 0x40DD1F: check_fs_root (cmds-check.c:3380)
+==12638== by 0x40DD1F: check_fs_roots.isra.51 (cmds-check.c:3516)
+==12638== by 0x4C64B0F: ???
+==12638== by 0x4C30A2F: ???
+==12638== by 0x4C468CF: ???
+==12638== by 0x32629B88DF: ??? (in /usr/lib64/libc-2.20.so)
+==12638== by 0x4C3657F: ???
+==12638==
+==12638== Conditional jump or move depends on uninitialised value(s)
+==12638== at 0x4A0B34A: memcpy@@GLIBC_2.14 (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
+==12638== by 0x436E99: UnknownInlinedFun (ctree.h:1613)
+==12638== by 0x436E99: check_block.part.14 (ctree.c:550)
+==12638== by 0x438954: UnknownInlinedFun (kerncompat.h:91)
+==12638== by 0x438954: btrfs_search_slot (ctree.c:1120)
+==12638== by 0x40DD1F: count_csum_range (cmds-check.c:1419)
+==12638== by 0x40DD1F: process_file_extent (cmds-check.c:1551)
+==12638== by 0x40DD1F: process_one_leaf (cmds-check.c:1617)
+==12638== by 0x40DD1F: walk_down_tree (cmds-check.c:1742)
+==12638== by 0x40DD1F: check_fs_root (cmds-check.c:3380)
+==12638== by 0x40DD1F: check_fs_roots.isra.51 (cmds-check.c:3516)
+==12638== by 0x4C64B0F: ???
+==12638== by 0x4C30A2F: ???
+==12638== by 0x4C468CF: ???
+==12638== by 0x32629B88DF: ??? (in /usr/lib64/libc-2.20.so)
+==12638== by 0x4C3657F: ???
+==12638==
+==12638== Use of uninitialised value of size 8
+==12638== at 0x4A0B3A0: memcpy@@GLIBC_2.14 (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
+==12638== by 0x436E99: UnknownInlinedFun (ctree.h:1613)
+==12638== by 0x436E99: check_block.part.14 (ctree.c:550)
+==12638== by 0x438954: UnknownInlinedFun (kerncompat.h:91)
+==12638== by 0x438954: btrfs_search_slot (ctree.c:1120)
+==12638== by 0x40DD1F: count_csum_range (cmds-check.c:1419)
+==12638== by 0x40DD1F: process_file_extent (cmds-check.c:1551)
+==12638== by 0x40DD1F: process_one_leaf (cmds-check.c:1617)
+==12638== by 0x40DD1F: walk_down_tree (cmds-check.c:1742)
+==12638== by 0x40DD1F: check_fs_root (cmds-check.c:3380)
+==12638== by 0x40DD1F: check_fs_roots.isra.51 (cmds-check.c:3516)
+==12638== by 0x4C64B0F: ???
+==12638== by 0x4C30A2F: ???
+==12638== by 0x4C468CF: ???
+==12638== by 0x32629B88DF: ??? (in /usr/lib64/libc-2.20.so)
+==12638== by 0x4C3657F: ???
+==12638==
+==12638== Invalid read of size 1
+==12638== at 0x4A0B3A0: memcpy@@GLIBC_2.14 (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
+==12638== by 0x436E99: UnknownInlinedFun (ctree.h:1613)
+==12638== by 0x436E99: check_block.part.14 (ctree.c:550)
+==12638== by 0x438954: UnknownInlinedFun (kerncompat.h:91)
+==12638== by 0x438954: btrfs_search_slot (ctree.c:1120)
+==12638== by 0x40DD1F: count_csum_range (cmds-check.c:1419)
+==12638== by 0x40DD1F: process_file_extent (cmds-check.c:1551)
+==12638== by 0x40DD1F: process_one_leaf (cmds-check.c:1617)
+==12638== by 0x40DD1F: walk_down_tree (cmds-check.c:1742)
+==12638== by 0x40DD1F: check_fs_root (cmds-check.c:3380)
+==12638== by 0x40DD1F: check_fs_roots.isra.51 (cmds-check.c:3516)
+==12638== by 0x4C64B0F: ???
+==12638== by 0x4C30A2F: ???
+==12638== by 0x4C468CF: ???
+==12638== by 0x32629B88DF: ??? (in /usr/lib64/libc-2.20.so)
+==12638== by 0x4C3657F: ???
+==12638== Address 0xa25c9de9 is not stack'd, malloc'd or (recently) free'd
+==12638==
+==12638==
+==12638== Process terminating with default action of signal 11 (SIGSEGV)
+==12638== Access not within mapped region at address 0xA25C9DE9
+==12638== at 0x4A0B3A0: memcpy@@GLIBC_2.14 (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
+==12638== by 0x436E99: UnknownInlinedFun (ctree.h:1613)
+==12638== by 0x436E99: check_block.part.14 (ctree.c:550)
+==12638== by 0x438954: UnknownInlinedFun (kerncompat.h:91)
+==12638== by 0x438954: btrfs_search_slot (ctree.c:1120)
+==12638== by 0x40DD1F: count_csum_range (cmds-check.c:1419)
+==12638== by 0x40DD1F: process_file_extent (cmds-check.c:1551)
+==12638== by 0x40DD1F: process_one_leaf (cmds-check.c:1617)
+==12638== by 0x40DD1F: walk_down_tree (cmds-check.c:1742)
+==12638== by 0x40DD1F: check_fs_root (cmds-check.c:3380)
+==12638== by 0x40DD1F: check_fs_roots.isra.51 (cmds-check.c:3516)
+==12638== by 0x4C64B0F: ???
+==12638== by 0x4C30A2F: ???
+==12638== by 0x4C468CF: ???
+==12638== by 0x32629B88DF: ??? (in /usr/lib64/libc-2.20.so)
+==12638== by 0x4C3657F: ???
+==12638== If you believe this happened as a result of a stack
+==12638== overflow in your program's main thread (unlikely but
+==12638== possible), you can try to increase the size of the
+==12638== main thread stack using the --main-stacksize= flag.
+==12638== The main thread stack size used in this run was 8388608.
+==12638==
+==12638== HEAP SUMMARY:
+==12638== in use at exit: 46,260 bytes in 56 blocks
+==12638== total heap usage: 380 allocs, 324 frees, 218,054 bytes allocated
+==12638==
+==12638== LEAK SUMMARY:
+==12638== definitely lost: 272 bytes in 2 blocks
+==12638== indirectly lost: 800 bytes in 8 blocks
+==12638== possibly lost: 88 bytes in 1 blocks
+==12638== still reachable: 45,100 bytes in 45 blocks
+==12638== suppressed: 0 bytes in 0 blocks
+==12638== Rerun with --leak-check=full to see details of leaked memory
+==12638==
+==12638== For counts of detected and suppressed errors, rerun with: -v
+==12638== Use --track-origins=yes to see where uninitialised values come from
+==12638== ERROR SUMMARY: 10 errors from 10 contexts (suppressed: 0 from 0)
+[1] 12638 segmentation fault valgrind btrfs check btrfs_fukked_memorycorruption.bin
diff --git a/tests/fsck-tests/015-check-bad-memory-access/bko-97171-btrfs-image.raw.xz b/tests/fsck-tests/015-check-bad-memory-access/bko-97171-btrfs-image.raw.xz
new file mode 100644
index 00000000..f3f0944d
--- /dev/null
+++ b/tests/fsck-tests/015-check-bad-memory-access/bko-97171-btrfs-image.raw.xz
Binary files differ
diff --git a/tests/fsck-tests/016-wrong-inode-nbytes/default_case.img.xz b/tests/fsck-tests/016-wrong-inode-nbytes/default_case.img.xz
new file mode 100644
index 00000000..d513acf5
--- /dev/null
+++ b/tests/fsck-tests/016-wrong-inode-nbytes/default_case.img.xz
Binary files differ
diff --git a/tests/fsck-tests/017-missing-all-file-extent/default_case.img.xz b/tests/fsck-tests/017-missing-all-file-extent/default_case.img.xz
new file mode 100644
index 00000000..10cd4c78
--- /dev/null
+++ b/tests/fsck-tests/017-missing-all-file-extent/default_case.img.xz
Binary files differ
diff --git a/tests/fsck-tests/018-leaf-crossing-stripes/default_case.raw.xz b/tests/fsck-tests/018-leaf-crossing-stripes/default_case.raw.xz
new file mode 100644
index 00000000..60eb2f97
--- /dev/null
+++ b/tests/fsck-tests/018-leaf-crossing-stripes/default_case.raw.xz
Binary files differ
diff --git a/tests/fsck-tests/018-leaf-crossing-stripes/test.sh b/tests/fsck-tests/018-leaf-crossing-stripes/test.sh
new file mode 100755
index 00000000..c453ab5c
--- /dev/null
+++ b/tests/fsck-tests/018-leaf-crossing-stripes/test.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+source $TOP/tests/common
+
+check_prereq btrfs
+
+image=$(extract_image "./default_case.raw.xz")
+run_check_stdout $TOP/btrfs check "$image" 2>&1 |
+ grep -q "crossing stripe boundary" ||
+ _fail "no expected error message in the output"
+
+rm -f "$image"
diff --git a/tests/fsck-tests/019-non-skinny-false-alert/default_case.img.xz b/tests/fsck-tests/019-non-skinny-false-alert/default_case.img.xz
new file mode 100644
index 00000000..c35f8bc6
--- /dev/null
+++ b/tests/fsck-tests/019-non-skinny-false-alert/default_case.img.xz
Binary files differ
diff --git a/tests/fsck-tests/019-non-skinny-false-alert/test.sh b/tests/fsck-tests/019-non-skinny-false-alert/test.sh
new file mode 100755
index 00000000..a7f8e862
--- /dev/null
+++ b/tests/fsck-tests/019-non-skinny-false-alert/test.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+#
+# $ btrfs check img
+# Checking filesystem on img
+# UUID: 17f2bf15-f4c2-4ebc-b1f7-39b7af26257a
+# checking extents
+# bad extent [29376512, 29392896), type mismatch with chunk
+# bad extent [29442048, 29458432), type mismatch with chunk
+# bad extent [29589504, 29605888), type mismatch with chunk
+# ...
+#
+# a buggy check leads to the above messages
+
+source $TOP/tests/common
+
+check_prereq btrfs
+
+image=$(extract_image "./default_case.img.xz")
+run_check_stdout $TOP/btrfs check "$image" 2>&1 |
+ grep -q "type mismatch with chunk" &&
+ _fail "unexpected error message in the output"
+
+rm -f "$image"
diff --git a/tests/fuzz-tests.sh b/tests/fuzz-tests.sh
new file mode 100755
index 00000000..204dce2d
--- /dev/null
+++ b/tests/fuzz-tests.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# misc tests on fuzzed or crafted images
+
+unset TOP
+unset LANG
+LANG=C
+SCRIPT_DIR=$(dirname $(readlink -f $0))
+TOP=$(readlink -f $SCRIPT_DIR/../)
+TEST_DEV=${TEST_DEV:-}
+RESULTS="$TOP/tests/fuzz-tests-results.txt"
+IMAGE="$TOP/tests/test.img"
+
+source $TOP/tests/common
+
+export TOP
+export RESULTS
+export LANG
+export IMAGE
+
+rm -f $RESULTS
+
+check_prereq btrfs
+
+# The tests are driven by their custom script called 'test.sh'
+
+for i in $(find $TOP/tests/fuzz-tests -maxdepth 1 -mindepth 1 -type d \
+ ${TEST:+-name "$TEST"} | sort)
+do
+ name=$(basename $i)
+ cd $i
+ if [ -x test.sh ]; then
+ echo "=== Entering $i" >> $RESULTS
+ echo " [TEST/fuzz] $name"
+ ./test.sh
+ if [ $? -ne 0 ]; then
+ _fail "test failed for case $(basename $i)"
+ fi
+ fi
+ cd $TOP
+done
diff --git a/tests/fuzz-tests/001-simple-unmounted/test.sh b/tests/fuzz-tests/001-simple-unmounted/test.sh
new file mode 100755
index 00000000..bf01a3a4
--- /dev/null
+++ b/tests/fuzz-tests/001-simple-unmounted/test.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+# iterate over all fuzzed images and run 'btrfs check'
+
+source $TOP/tests/common
+
+setup_root_helper
+check_prereq btrfs
+
+# redefine the one provided by common
+check_image() {
+ local image
+
+ image=$1
+ run_mayfail $TOP/btrfs check "$image"
+}
+
+check_all_images $TOP/tests/fuzz-tests/images
+
+exit 0
diff --git a/tests/fuzz-tests/images/bad-superblock-1.raw.xz b/tests/fuzz-tests/images/bad-superblock-1.raw.xz
new file mode 100644
index 00000000..3d6358f0
--- /dev/null
+++ b/tests/fuzz-tests/images/bad-superblock-1.raw.xz
Binary files differ
diff --git a/tests/fuzz-tests/images/bad-superblock-2.raw.xz b/tests/fuzz-tests/images/bad-superblock-2.raw.xz
new file mode 100644
index 00000000..7db7610b
--- /dev/null
+++ b/tests/fuzz-tests/images/bad-superblock-2.raw.xz
Binary files differ
diff --git a/tests/fuzz-tests/images/bad-superblock-3.raw.xz b/tests/fuzz-tests/images/bad-superblock-3.raw.xz
new file mode 100644
index 00000000..4aa31483
--- /dev/null
+++ b/tests/fuzz-tests/images/bad-superblock-3.raw.xz
Binary files differ
diff --git a/tests/fuzz-tests/images/bad-superblock.txt b/tests/fuzz-tests/images/bad-superblock.txt
new file mode 100644
index 00000000..f7dd9aa0
--- /dev/null
+++ b/tests/fuzz-tests/images/bad-superblock.txt
@@ -0,0 +1,17 @@
+bad-superblock-*.txt
+
+Crafted images from Jiri Slaby, produced by some symbolic execution framework
+that finds unhandled cases at mount time.
+
+Relevant kernel patches to backport:
+
+e3540eab29e1b2260bc4b9b3979a49a00e3e3af8
+btrfs: add more checks to btrfs_read_sys_array
+
+ce7fca5f57ed0fcd7e7b3d7b1a3e1791f8e56fa3
+btrfs: add checks for sys_chunk_array sizes
+
+75d6ad382bb91f363452119d34238e156589ca2d
+btrfs: more superblock checks, lower bounds on devices and sectorsize/nodesize
+
+(and more from fs/btrfs/super.c)
diff --git a/tests/fuzz-tests/images/bko-104131-fsck-oob-read.raw.xz b/tests/fuzz-tests/images/bko-104131-fsck-oob-read.raw.xz
new file mode 100644
index 00000000..7848f8b1
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-104131-fsck-oob-read.raw.xz
Binary files differ
diff --git a/tests/fuzz-tests/images/bko-104131-fsck-oob-read.txt b/tests/fuzz-tests/images/bko-104131-fsck-oob-read.txt
new file mode 100644
index 00000000..0e829c2e
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-104131-fsck-oob-read.txt
@@ -0,0 +1,31 @@
+URL: https://bugzilla.kernel.org/show_bug.cgi?id=104131
+Hanno Boeck 2015-09-07 07:24:32 UTC
+
+Created attachment 186941 [details]
+malformed btrfs filesystem causing oob read
+
+The attached malformed filesystem image will cause an invalid heap out of bounds memory read in btrfsck.
+
+This was found while fuzzing btrfs-progs with american fuzzy lop.
+
+Stack trace from Address Sanitizer:
+==31289==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x60f00000f003 at pc 0x0000005d0dbb bp 0x7ffdf444c180 sp 0x7ffdf444c178
+READ of size 8 at 0x60f00000f003 thread T0
+ #0 0x5d0dba in btrfs_header_bytenr /mnt/ram/btrfs-progs-v4.1.2/./ctree.h:1797:1
+ #1 0x5d0dba in check_tree_block /mnt/ram/btrfs-progs-v4.1.2/disk-io.c:60
+ #2 0x5d0dba in read_tree_block /mnt/ram/btrfs-progs-v4.1.2/disk-io.c:337
+ #3 0x5dc00e in btrfs_setup_chunk_tree_and_device_map /mnt/ram/btrfs-progs-v4.1.2/disk-io.c:1169:30
+ #4 0x5dcf89 in __open_ctree_fd /mnt/ram/btrfs-progs-v4.1.2/disk-io.c:1261:8
+ #5 0x5dc50a in open_ctree_fs_info /mnt/ram/btrfs-progs-v4.1.2/disk-io.c:1302:9
+ #6 0x52f22f in cmd_check /mnt/ram/btrfs-progs-v4.1.2/cmds-check.c:9333:9
+ #7 0x4e7bcc in main /mnt/ram/btrfs-progs-v4.1.2/btrfs.c:245:7
+ #8 0x7f98bb101f9f in __libc_start_main /var/tmp/portage/sys-libs/glibc-2.20-r2/work/glibc-2.20/csu/libc-start.c:289
+ #9 0x41f748 in _start (/mnt/ram/btrfs/btrfs+0x41f748)
+
+0x60f00000f003 is located 3 bytes to the right of 176-byte region [0x60f00000ef50,0x60f00000f000)
+allocated by thread T0 here:
+ #0 0x4bade8 in malloc (/mnt/ram/btrfs/btrfs+0x4bade8)
+ #1 0x622c24 in __alloc_extent_buffer /mnt/ram/btrfs-progs-v4.1.2/extent_io.c:541:7
+ #2 0x622c24 in alloc_extent_buffer /mnt/ram/btrfs-progs-v4.1.2/extent_io.c:648
+ #3 0x5cf436 in btrfs_find_create_tree_block /mnt/ram/btrfs-progs-v4.1.2/disk-io.c:186:9
+ #4 0x5cf436 in read_tree_block /mnt/ram/btrfs-progs-v4.1.2/disk-io.c:314
diff --git a/tests/fuzz-tests/images/bko-104141-fsck-exception.raw.xz b/tests/fuzz-tests/images/bko-104141-fsck-exception.raw.xz
new file mode 100644
index 00000000..d24a32f8
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-104141-fsck-exception.raw.xz
Binary files differ
diff --git a/tests/fuzz-tests/images/bko-104141-fsck-exception.txt b/tests/fuzz-tests/images/bko-104141-fsck-exception.txt
new file mode 100644
index 00000000..aed91909
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-104141-fsck-exception.txt
@@ -0,0 +1,9 @@
+URL: https://bugzilla.kernel.org/show_bug.cgi?id=104141
+Hanno Boeck 2015-09-07 07:27:58 UTC
+
+Created attachment 186951 [details]
+malformed filesystem causing floating point exception
+
+The attacked file will cause a floating point exception in btrfsck.
+
+This was found while fuzzing btrfs-progs with american fuzzy lop.
diff --git a/tests/fuzz-tests/images/bko-97191-btrfs-image.raw.txt b/tests/fuzz-tests/images/bko-97191-btrfs-image.raw.txt
new file mode 100644
index 00000000..f0d81894
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-97191-btrfs-image.raw.txt
@@ -0,0 +1,137 @@
+URL: https://bugzilla.kernel.org/show_bug.cgi?id=97191
+Lukas Lueg 2015-04-23 22:20:35 UTC
+
+Running btrfs-progs v3.19.1
+
+The btrfs-image attached to this bug causes the btrfs-userland tool to
+overflow some data structures, leading to unallocated memory being written to
+and read from. A segfault results shortly after. Reproduced on x86-64 and
+i686.
+
+The kernel seems to be less affected and fails to mount the image. I didn't
+investigate whether the reads/writes could be used to gain control over $EIP.
+Since the first invalid write of 8 bytes seems to run into adjacent heap
+blocks (crash in unlink()), it may be possible though.
+
+gdb output:
+
+Program received signal SIGSEGV, Segmentation fault.
+malloc_consolidate (av=av@entry=0x32629b7cc0 <main_arena>) at malloc.c:4151
+4151 unlink(av, p, bck, fwd);
+(gdb) bt
+#0 malloc_consolidate (av=av@entry=0x32629b7cc0 <main_arena>) at malloc.c:4151
+#1 0x0000003262680628 in _int_malloc (av=av@entry=0x32629b7cc0 <main_arena>, bytes=bytes@entry=4224) at malloc.c:3420
+#2 0x000000326268315e in __GI___libc_malloc (bytes=4224) at malloc.c:2896
+#3 0x0000000000449d15 in __alloc_extent_buffer (tree=0x88c078, bytenr=4288512, blocksize=4096) at extent_io.c:541
+#4 0x000000000044a8b4 in alloc_extent_buffer (tree=0x88c078, bytenr=4288512, blocksize=4096) at extent_io.c:648
+#5 0x000000000043b1a0 in btrfs_find_create_tree_block (root=root@entry=0x895840, bytenr=<optimized out>,
+ blocksize=<optimized out>) at disk-io.c:159
+#6 0x000000000043ca4e in read_tree_block (root=root@entry=0x895840, bytenr=<optimized out>, blocksize=<optimized out>,
+ parent_transid=13) at disk-io.c:287
+#7 0x000000000043ccb7 in find_and_setup_root (tree_root=0x88c250, fs_info=<optimized out>, objectid=5, root=0x895840)
+ at disk-io.c:557
+#8 0x000000000043ce92 in btrfs_read_fs_root_no_cache (fs_info=fs_info@entry=0x88c010, location=location@entry=0x7fffffffd960)
+ at disk-io.c:640
+#9 0x000000000043d060 in btrfs_read_fs_root (fs_info=fs_info@entry=0x88c010, location=location@entry=0x7fffffffd960)
+ at disk-io.c:739
+#10 0x000000000043d48c in btrfs_setup_all_roots (fs_info=fs_info@entry=0x88c010, root_tree_bytenr=<optimized out>,
+ root_tree_bytenr@entry=0, flags=flags@entry=OPEN_CTREE_EXCLUSIVE) at disk-io.c:988
+#11 0x000000000043d802 in __open_ctree_fd (fp=fp@entry=3, path=path@entry=0x7fffffffe20d "ramdisk/btrfs_fukked.bin",
+ sb_bytenr=65536, sb_bytenr@entry=0, root_tree_bytenr=root_tree_bytenr@entry=0, flags=flags@entry=OPEN_CTREE_EXCLUSIVE)
+ at disk-io.c:1199
+#12 0x000000000043d965 in open_ctree_fs_info (filename=0x7fffffffe20d "ramdisk/btrfs_fukked.bin", sb_bytenr=sb_bytenr@entry=0,
+ root_tree_bytenr=root_tree_bytenr@entry=0, flags=flags@entry=OPEN_CTREE_EXCLUSIVE) at disk-io.c:1231
+#13 0x0000000000427bf5 in cmd_check (argc=1, argv=0x7fffffffdea0) at cmds-check.c:9326
+#14 0x000000000040e5a2 in main (argc=2, argv=0x7fffffffdea0) at btrfs.c:245
+
+
+valgrind output:
+
+==32463== Memcheck, a memory error detector
+==32463== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
+==32463== Using Valgrind-3.10.1 and LibVEX; rerun with -h for copyright info
+==32463== Command: btrfs check ramdisk/btrfs_fukked.bin
+==32463==
+==32463== Invalid write of size 8
+==32463== at 0x4386FB: btrfs_search_slot (ctree.c:1119)
+==32463== by 0x4427F7: UnknownInlinedFun (extent-tree.c:3117)
+==32463== by 0x4427F7: btrfs_read_block_groups (extent-tree.c:3167)
+==32463== by 0x43D4F2: btrfs_setup_all_roots (disk-io.c:983)
+==32463== by 0x43D801: __open_ctree_fd (disk-io.c:1199)
+==32463== by 0x43D964: open_ctree_fs_info (disk-io.c:1231)
+==32463== by 0x427BF4: cmd_check (cmds-check.c:9326)
+==32463== by 0x40E5A1: main (btrfs.c:245)
+==32463== Address 0x4c409f0 is 16 bytes after a block of size 144 alloc'd
+==32463== at 0x4A08946: calloc (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
+==32463== by 0x4427AB: btrfs_read_block_groups (extent-tree.c:3162)
+==32463== by 0x43D4F2: btrfs_setup_all_roots (disk-io.c:983)
+==32463== by 0x43D801: __open_ctree_fd (disk-io.c:1199)
+==32463== by 0x43D964: open_ctree_fs_info (disk-io.c:1231)
+==32463== by 0x427BF4: cmd_check (cmds-check.c:9326)
+==32463== by 0x40E5A1: main (btrfs.c:245)
+==32463==
+==32463== Invalid read of size 8
+==32463== at 0x436E70: check_block.part.14 (ctree.c:548)
+==32463== by 0x438954: UnknownInlinedFun (kerncompat.h:91)
+==32463== by 0x438954: btrfs_search_slot (ctree.c:1120)
+==32463== by 0x4427F7: UnknownInlinedFun (extent-tree.c:3117)
+==32463== by 0x4427F7: btrfs_read_block_groups (extent-tree.c:3167)
+==32463== by 0x43D4F2: btrfs_setup_all_roots (disk-io.c:983)
+==32463== by 0x43D801: __open_ctree_fd (disk-io.c:1199)
+==32463== by 0x43D964: open_ctree_fs_info (disk-io.c:1231)
+==32463== by 0x427BF4: cmd_check (cmds-check.c:9326)
+==32463== by 0x40E5A1: main (btrfs.c:245)
+==32463== Address 0x4c409f8 is 24 bytes after a block of size 144 in arena "client"
+==32463==
+==32463== Invalid read of size 4
+==32463== at 0x436E84: UnknownInlinedFun (ctree.h:1605)
+==32463== by 0x436E84: UnknownInlinedFun (ctree.h:1612)
+==32463== by 0x436E84: check_block.part.14 (ctree.c:550)
+==32463== by 0x438954: UnknownInlinedFun (kerncompat.h:91)
+==32463== by 0x438954: btrfs_search_slot (ctree.c:1120)
+==32463== by 0x4427F7: UnknownInlinedFun (extent-tree.c:3117)
+==32463== by 0x4427F7: btrfs_read_block_groups (extent-tree.c:3167)
+==32463== by 0x43D4F2: btrfs_setup_all_roots (disk-io.c:983)
+==32463== by 0x43D801: __open_ctree_fd (disk-io.c:1199)
+==32463== by 0x43D964: open_ctree_fs_info (disk-io.c:1231)
+==32463== by 0x427BF4: cmd_check (cmds-check.c:9326)
+==32463== by 0x40E5A1: main (btrfs.c:245)
+==32463== Address 0x4c409e4 is 4 bytes after a block of size 144 alloc'd
+==32463== at 0x4A08946: calloc (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
+==32463== by 0x4427AB: btrfs_read_block_groups (extent-tree.c:3162)
+==32463== by 0x43D4F2: btrfs_setup_all_roots (disk-io.c:983)
+==32463== by 0x43D801: __open_ctree_fd (disk-io.c:1199)
+==32463== by 0x43D964: open_ctree_fs_info (disk-io.c:1231)
+==32463== by 0x427BF4: cmd_check (cmds-check.c:9326)
+==32463== by 0x40E5A1: main (btrfs.c:245)
+==32463==
+==32463== Invalid read of size 1
+==32463== at 0x4A0B3A0: memcpy@@GLIBC_2.14 (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
+==32463== by 0x436E99: UnknownInlinedFun (ctree.h:1613)
+==32463== by 0x436E99: check_block.part.14 (ctree.c:550)
+==32463== by 0x438954: UnknownInlinedFun (kerncompat.h:91)
+==32463== by 0x438954: btrfs_search_slot (ctree.c:1120)
+==32463== by 0x4427F7: UnknownInlinedFun (extent-tree.c:3117)
+==32463== by 0x4427F7: btrfs_read_block_groups (extent-tree.c:3167)
+==32463== by 0x43D4F2: btrfs_setup_all_roots (disk-io.c:983)
+==32463== by 0x43D801: __open_ctree_fd (disk-io.c:1199)
+==32463== by 0x43D964: open_ctree_fs_info (disk-io.c:1231)
+==32463== by 0x427BF4: cmd_check (cmds-check.c:9326)
+==32463== by 0x40E5A1: main (btrfs.c:245)
+==32463== Address 0x1b1 is not stack'd, malloc'd or (recently) free'd
+==32463==
+==32463==
+==32463== Process terminating with default action of signal 11 (SIGSEGV)
+==32463== Access not within mapped region at address 0x1B1
+==32463== at 0x4A0B3A0: memcpy@@GLIBC_2.14 (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
+==32463== by 0x436E99: UnknownInlinedFun (ctree.h:1613)
+==32463== by 0x436E99: check_block.part.14 (ctree.c:550)
+==32463== by 0x438954: UnknownInlinedFun (kerncompat.h:91)
+==32463== by 0x438954: btrfs_search_slot (ctree.c:1120)
+==32463== by 0x4427F7: UnknownInlinedFun (extent-tree.c:3117)
+==32463== by 0x4427F7: btrfs_read_block_groups (extent-tree.c:3167)
+==32463== by 0x43D4F2: btrfs_setup_all_roots (disk-io.c:983)
+==32463== by 0x43D801: __open_ctree_fd (disk-io.c:1199)
+==32463== by 0x43D964: open_ctree_fs_info (disk-io.c:1231)
+==32463== by 0x427BF4: cmd_check (cmds-check.c:9326)
+==32463== by 0x40E5A1: main (btrfs.c:245)
diff --git a/tests/fuzz-tests/images/bko-97191-btrfs-image.raw.xz b/tests/fuzz-tests/images/bko-97191-btrfs-image.raw.xz
new file mode 100644
index 00000000..b2e48c08
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-97191-btrfs-image.raw.xz
Binary files differ
diff --git a/tests/fuzz-tests/images/bko-97271-btrfs-image.raw.txt b/tests/fuzz-tests/images/bko-97271-btrfs-image.raw.txt
new file mode 100644
index 00000000..67f20968
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-97271-btrfs-image.raw.txt
@@ -0,0 +1,54 @@
+URL: https://bugzilla.kernel.org/show_bug.cgi?id=97271
+Lukas Lueg 2015-04-25 20:34:39 UTC
+
+The attached btrfs-image causes "btrfs check" to write outside of allocated
+memory locations and ultimately die due to a segfault. An adjacent heap block's
+control structure is overwritten with a `struct extent_buffer *`, which is not
+controllable by the user.
+
+"btrfs version" is v3.19.1. Running "btrfs check" immediately dies with
+
+*** Error in `btrfs': double free or corruption (!prev): 0x0000000002396ec0 ***
+*** Error in `btrfs': malloc(): memory corruption: 0x0000000002396f60 ***
+
+Debugging with valgrind and gdb gives
+
+==11670== Invalid write of size 8
+==11670== at 0x4386FB: btrfs_search_slot (ctree.c:1119)
+==11670== by 0x44E16E: btrfs_read_chunk_tree (volumes.c:1814)
+==11670== by 0x43D654: btrfs_setup_chunk_tree_and_device_map (disk-io.c:1115)
+==11670== by 0x43D7D0: __open_ctree_fd (disk-io.c:1190)
+==11670== by 0x43D964: open_ctree_fs_info (disk-io.c:1231)
+==11670== by 0x427BF4: cmd_check (cmds-check.c:9326)
+==11670== by 0x40E5A1: main (btrfs.c:245)
+==11670== Address 0x4c3bb98 is 8 bytes after a block of size 144 alloc'd
+==11670== at 0x4A08946: calloc (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
+==11670== by 0x44E133: btrfs_read_chunk_tree (volumes.c:1801)
+==11670== by 0x43D654: btrfs_setup_chunk_tree_and_device_map (disk-io.c:1115)
+==11670== by 0x43D7D0: __open_ctree_fd (disk-io.c:1190)
+==11670== by 0x43D964: open_ctree_fs_info (disk-io.c:1231)
+==11670== by 0x427BF4: cmd_check (cmds-check.c:9326)
+==11670== by 0x40E5A1: main (btrfs.c:245)
+
+Program received signal SIGTRAP, Trace/breakpoint trap.
+btrfs_search_slot (trans=trans@entry=0x0, root=root@entry=0x4c36d30, key=key@entry=0xffefff830, p=p@entry=0x4c3bb00,
+ ins_len=ins_len@entry=0, cow=cow@entry=0) at ctree.c:1119
+1119 p->nodes[level] = b;
+(gdb) p p->nodes
+$1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}
+(gdb) p p
+$2 = (struct btrfs_path *) 0x4c3bb00
+(gdb) p b
+$3 = (struct extent_buffer *) 0x4c3a990
+
+
+The corresponding part in ctree.c:btrfs_search_slot() seems to fail to check if `level` overflows outside of `node`:
+
+level = btrfs_header_level(b);
+...
+if (level != btrfs_header_level(b))
+ WARN_ON(1);
+level = btrfs_header_level(b);
+p->nodes[level] = b; // <- Illegal write
+
+Maybe the repeated calls to btrfs_header_level() were meant to do something once, they seem to be noise.
diff --git a/tests/fuzz-tests/images/bko-97271-btrfs-image.raw.xz b/tests/fuzz-tests/images/bko-97271-btrfs-image.raw.xz
new file mode 100644
index 00000000..3c79edb5
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-97271-btrfs-image.raw.xz
Binary files differ
diff --git a/tests/fuzz-tests/images/sys-array-num-stripes-0.raw.txt b/tests/fuzz-tests/images/sys-array-num-stripes-0.raw.txt
new file mode 100644
index 00000000..bdde4e70
--- /dev/null
+++ b/tests/fuzz-tests/images/sys-array-num-stripes-0.raw.txt
@@ -0,0 +1,30 @@
+URL: http://article.gmane.org/gmane.comp.file-systems.btrfs/50230
+Vegard Nossum, 2015-11-15
+
+If sys_array::num_stripes == 0, we hit a BUG_ON during mount:
+
+BTRFS: device fsid 9006933e-2a9a-44f0-917f-514252aeec2c devid 1 transid 7 /dev/loop0
+BTRFS info (device loop0): disk space caching is enabled
+BUG: failure at fs/btrfs/ctree.h:337/btrfs_chunk_item_size()!
+Kernel panic - not syncing: BUG!
+CPU: 0 PID: 313 Comm: mount Not tainted 4.2.5-00657-ge047887-dirty #25
+Stack:
+ 637af890 60062489 602aeb2e 604192ba
+ 60387961 00000011 637af8a0 6038a835
+ 637af9c0 6038776b 634ef32b 00000000
+Call Trace:
+ [<6001c86d>] show_stack+0xfe/0x15b
+ [<6038a835>] dump_stack+0x2a/0x2c
+ [<6038776b>] panic+0x13e/0x2b3
+ [<6020f099>] btrfs_read_sys_array+0x25d/0x2ff
+ [<601cfbbe>] open_ctree+0x192d/0x27af
+ [<6019c2c1>] btrfs_mount+0x8f5/0xb9a
+ [<600bc9a7>] mount_fs+0x11/0xf3
+ [<600d5167>] vfs_kern_mount+0x75/0x11a
+ [<6019bcb0>] btrfs_mount+0x2e4/0xb9a
+ [<600bc9a7>] mount_fs+0x11/0xf3
+ [<600d5167>] vfs_kern_mount+0x75/0x11a
+ [<600d710b>] do_mount+0xa35/0xbc9
+ [<600d7557>] SyS_mount+0x95/0xc8
+
+Fixed by patch (kernel and btrfs-progs): btrfs: handle invalid num_stripes in sys_array
diff --git a/tests/fuzz-tests/images/sys-array-num-stripes-0.raw.xz b/tests/fuzz-tests/images/sys-array-num-stripes-0.raw.xz
new file mode 100644
index 00000000..d64fb300
--- /dev/null
+++ b/tests/fuzz-tests/images/sys-array-num-stripes-0.raw.xz
Binary files differ
diff --git a/tests/misc-tests.sh b/tests/misc-tests.sh
new file mode 100755
index 00000000..2a7f57c5
--- /dev/null
+++ b/tests/misc-tests.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+#
+# Misc tests
+
+unset TOP
+unset LANG
+LANG=C
+SCRIPT_DIR=$(dirname $(readlink -f $0))
+TOP=$(readlink -f $SCRIPT_DIR/../)
+TEST_DEV=${TEST_DEV:-}
+RESULTS="$TOP/tests/misc-tests-results.txt"
+IMAGE="$TOP/tests/test.img"
+
+source $TOP/tests/common
+
+# Allow child test to use $TOP and $RESULTS
+export TOP
+export RESULTS
+# For custom script needs to verfiy recovery
+export LANG
+# For tests that only use a loop device
+export IMAGE
+
+rm -f $RESULTS
+
+# test rely on corrupting blocks tool
+check_prereq btrfs-corrupt-block
+check_prereq btrfs-image
+check_prereq btrfstune
+check_prereq btrfs
+
+# The tests are driven by their custom script called 'test.sh'
+
+for i in $(find $TOP/tests/misc-tests -maxdepth 1 -mindepth 1 -type d \
+ ${TEST:+-name "$TEST"} | sort)
+do
+ echo " [TEST/misc] $(basename $i)"
+ cd $i
+ echo "=== Entering $i" >> $RESULTS
+ if [ -x test.sh ]; then
+ ./test.sh
+ if [ $? -ne 0 ]; then
+ _fail "test failed for case $(basename $i)"
+ fi
+ fi
+ cd $TOP
+done
diff --git a/tests/misc-tests/001-btrfstune-features/test.sh b/tests/misc-tests/001-btrfstune-features/test.sh
new file mode 100755
index 00000000..c858d701
--- /dev/null
+++ b/tests/misc-tests/001-btrfstune-features/test.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# test btrfstune options that enable filesystem features
+
+source $TOP/tests/common
+
+check_prereq btrfs-debug-tree
+check_prereq btrfs-show-super
+check_prereq mkfs.btrfs
+check_prereq btrfstune
+check_prereq btrfs
+
+setup_root_helper
+prepare_test_dev
+
+# test whether fsck can rebuild a corrupted extent tree
+# parameters:
+# - option for mkfs.btrfs -O, empty for defaults
+# - option for btrfstune
+# - string representing the feature in btrfs-show-super dump
+test_feature()
+{
+ local mkfsfeatures
+ local tuneopt
+ local sbflag
+
+ mkfsfeatures=${1:+-O ^$1}
+ tuneopt="$2"
+ sbflag="$3"
+
+ run_check $SUDO_HELPER $TOP/mkfs.btrfs -f $mkfsfeatures $TEST_DEV
+ if run_check_stdout $TOP/btrfs-show-super $TEST_DEV | \
+ grep -q "$sbflag"; then
+ _fail "FAIL: feature $sbflag must not be set on the base image"
+ fi
+ run_check $TOP/btrfstune $tuneopt $TEST_DEV
+ if ! run_check_stdout $TOP/btrfs-show-super $TEST_DEV | \
+ grep -q "$sbflag"; then
+ _fail "FAIL: feature $sbflag not set"
+ fi
+ run_check $SUDO_HELPER $TOP/btrfs check $TEST_DEV
+}
+
+test_feature extref -r EXTENDED_IREF
+test_feature skinny-metadata -x SKINNY_METADATA
+test_feature no-holes -n NO_HOLES
+test_feature '' '-S 1' SEEDING
diff --git a/tests/misc-tests/002-uuid-rewrite/test.sh b/tests/misc-tests/002-uuid-rewrite/test.sh
new file mode 100755
index 00000000..d84ec6ca
--- /dev/null
+++ b/tests/misc-tests/002-uuid-rewrite/test.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# test btrfstune uuid rewriting options
+
+source $TOP/tests/common
+
+check_prereq btrfs-debug-tree
+check_prereq btrfs-show-super
+check_prereq mkfs.btrfs
+check_prereq btrfstune
+check_prereq btrfs
+
+prepare_test_dev
+
+get_fs_uuid() {
+ local image
+
+ image="$1"
+ run_check_stdout $TOP/btrfs-show-super "$image" | \
+ grep '^fsid' | awk '{print $2}'
+}
+
+test_uuid_random()
+{
+ local origuuid
+
+ origuuid=11111111-a101-4031-b29a-379d4f8b7a2d
+
+ run_check $SUDO_HELPER $TOP/mkfs.btrfs -f \
+ --uuid $origuuid \
+ --rootdir $TOP/Documentation \
+ $TEST_DEV
+ run_check $TOP/btrfs-show-super "$TEST_DEV"
+ currentfsid=$(run_check_stdout $TOP/btrfstune -f -u $TEST_DEV | \
+ grep -i 'current fsid:' | awk '{print $3}')
+ if ! [ $currentfsid = $origuuid ]; then
+ _fail "FAIL: current UUID mismatch"
+ fi
+ run_check $TOP/btrfs-show-super "$TEST_DEV"
+ run_check $SUDO_HELPER $TOP/btrfs check $TEST_DEV
+}
+
+test_uuid_user()
+{
+ local origuuid
+ local newuuid
+
+ origuuid=22222222-d324-4f92-80e9-7658bf3b845f
+ newuuid=33333333-bfc9-4045-9399-a396dc6893b3
+
+ run_check $SUDO_HELPER $TOP/mkfs.btrfs -f \
+ --uuid $origuuid \
+ --rootdir $TOP/Documentation \
+ $TEST_DEV
+ run_check $TOP/btrfs-show-super "$TEST_DEV"
+ run_check $TOP/btrfstune -f -U $newuuid \
+ $TEST_DEV
+ # btrfs-show-super is called within get_fs_uuid
+ fsid=$(get_fs_uuid $TEST_DEV)
+ if ! [ $fsid = $newuuid ]; then
+ _fail "FAIL: UUID not rewritten"
+ fi
+ run_check $SUDO_HELPER $TOP/btrfs check $TEST_DEV
+}
+
+test_uuid_random
+test_uuid_user
diff --git a/tests/misc-tests/003-zero-log/test.sh b/tests/misc-tests/003-zero-log/test.sh
new file mode 100755
index 00000000..b650930e
--- /dev/null
+++ b/tests/misc-tests/003-zero-log/test.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+# test zero-log
+
+source $TOP/tests/common
+
+check_prereq btrfs-show-super
+check_prereq mkfs.btrfs
+check_prereq btrfs
+prepare_test_dev
+
+get_log_root()
+{
+ local image
+
+ image="$1"
+ $TOP/btrfs-show-super "$image" | \
+ grep '^log_root\>' | awk '{print $2}'
+}
+get_log_root_level() {
+ local image
+
+ image="$1"
+ $TOP/btrfs-show-super "$image" | \
+ grep '^log_root_level' | awk '{print $2}'
+}
+
+test_zero_log()
+{
+ # FIXME: we need an image with existing log_root
+ run_check $SUDO_HELPER $TOP/mkfs.btrfs -f \
+ --rootdir $TOP/Documentation \
+ $TEST_DEV
+ run_check $TOP/btrfs-show-super $TEST_DEV
+ if [ "$1" = 'standalone' ]; then
+ run_check $TOP/btrfs rescue zero-log $TEST_DEV
+ else
+ run_check $TOP/btrfs-zero-log $TEST_DEV
+ fi
+ log_root=$(get_log_root $TEST_DEV)
+ log_root_level=$(get_log_root $TEST_DEV)
+ if [ "$log_root" != 0 ]; then
+ _fail "FAIL: log_root not reset"
+ fi
+ if [ "$log_root_level" != 0 ]; then
+ _fail "FAIL: log_root_level not reset"
+ fi
+ run_check $TOP/btrfs-show-super $TEST_DEV
+ run_check $SUDO_HELPER $TOP/btrfs check $TEST_DEV
+}
+
+test_zero_log standalone
+test_zero_log internal
diff --git a/tests/misc-tests/004-shrink-fs/test.sh b/tests/misc-tests/004-shrink-fs/test.sh
new file mode 100755
index 00000000..88740358
--- /dev/null
+++ b/tests/misc-tests/004-shrink-fs/test.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+#
+# Test getting the minimum size a filesystem can be resized to and verify we
+# are able to resize (shrink) it to that size.
+#
+
+source $TOP/tests/common
+
+check_prereq mkfs.btrfs
+check_prereq btrfs
+
+setup_root_helper
+
+# Optionally take id of the device to shrink
+shrink_test()
+{
+ min_size=$(run_check_stdout $SUDO_HELPER $TOP/btrfs inspect-internal min-dev-size ${1:+--id $1} $TEST_MNT)
+ min_size=$(echo $min_size | cut -d ' ' -f 1)
+ echo "min size = ${min_size}" >> $RESULTS
+ if [ -z "$min_size" ]; then
+ _fail "Failed to parse minimum size"
+ fi
+ run_check $SUDO_HELPER $TOP/btrfs filesystem resize $min_size $TEST_MNT
+}
+
+run_check truncate -s 20G $IMAGE
+run_check $TOP/mkfs.btrfs -f $IMAGE
+run_check $SUDO_HELPER mount $IMAGE $TEST_MNT
+run_check $SUDO_HELPER chmod a+rw $TEST_MNT
+
+# Create 7 data block groups, each with a size of 1Gb.
+for ((i = 1; i <= 7; i++)); do
+ run_check fallocate -l 1G $TEST_MNT/foo$i
+done
+
+# Make sure they are persisted (all the chunk, device and block group items
+# added to the chunk/dev/extent trees).
+run_check $TOP/btrfs filesystem sync $TEST_MNT
+
+# Now remove 3 of those 1G files. This will result in 3 block groups becoming
+# unused, which will be automatically deleted by the cleaner kthread, and this
+# will result in 3 holes (unallocated space) in the device (each with a size
+# of 1Gb).
+
+run_check rm -f $TEST_MNT/foo2
+run_check rm -f $TEST_MNT/foo4
+run_check rm -f $TEST_MNT/foo6
+
+# Sync once to wake up the cleaner kthread which will delete the unused block
+# groups - it could have been sleeping when they became unused. Then wait a bit
+# to allow the cleaner kthread to delete them and then finally ensure the
+# transaction started by the cleaner kthread is committed.
+run_check $TOP/btrfs filesystem sync $TEST_MNT
+sleep 3
+run_check $TOP/btrfs filesystem sync $TEST_MNT
+
+# Now attempt to get the minimum size we can resize the filesystem to and verify
+# the resize operation succeeds. This size closely matches the sum of the size
+# of all the allocated device extents.
+for ((i = 1; i <= 3; i++)); do
+ shrink_test
+done
+
+# Now convert metadata and system chunks to the single profile and check we are
+# still able to get a correct minimum size and shrink to that size.
+run_check $SUDO_HELPER $TOP/btrfs balance start -mconvert=single \
+ -sconvert=single -f $TEST_MNT
+for ((i = 1; i <= 3; i++)); do
+ shrink_test 1
+done
+
+run_check $SUDO_HELPER umount $TEST_MNT
diff --git a/tests/misc-tests/005-convert-progress-thread-crash/test.sh b/tests/misc-tests/005-convert-progress-thread-crash/test.sh
new file mode 100755
index 00000000..054069c2
--- /dev/null
+++ b/tests/misc-tests/005-convert-progress-thread-crash/test.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# test convert-thread-conflict
+
+source $TOP/tests/common
+
+check_prereq btrfs-convert
+
+mkfs.ext4 -V &>/dev/null || _not_run "mkfs.ext4 not found"
+prepare_test_dev 1G
+
+for ((i = 0; i < 20; i++)); do
+ echo "loop $i" >>$RESULTS
+ mkfs.ext4 -F "$TEST_DEV" &>>$RESULTS || _not_run "mkfs.ext4 failed"
+ run_check $TOP/btrfs-convert "$TEST_DEV"
+done
diff --git a/tests/misc-tests/006-image-on-missing-device/test.sh b/tests/misc-tests/006-image-on-missing-device/test.sh
new file mode 100755
index 00000000..8680a707
--- /dev/null
+++ b/tests/misc-tests/006-image-on-missing-device/test.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+# test btrfs-image with a missing device (uses loop devices)
+#
+# - btrfs-image must not loop indefinetelly
+# - btrfs-image will expectedly fail to produce the dump
+
+source $TOP/tests/common
+
+check_prereq btrfs-show-super
+check_prereq btrfs-image
+check_prereq mkfs.btrfs
+check_prereq btrfs
+
+ndevs=2
+declare -a devs
+dev1=
+dev2=
+
+setup_root_helper
+
+
+# TODO: move the helpers to common
+
+prepare_devices()
+{
+ for i in `seq $ndevs`; do
+ touch img$i
+ chmod a+rw img$i
+ truncate -s0 img$i
+ truncate -s2g img$i
+ devs[$i]=`run_check_stdout $SUDO_HELPER losetup --find --show img$i`
+ done
+}
+
+cleanup_devices()
+{
+ for dev in ${devs[@]}; do
+ run_mayfail $SUDO_HELPER losetup -d $dev
+ done
+ for i in `seq $ndevs`; do
+ truncate -s0 img$i
+ done
+ run_check $SUDO_HELPER losetup --list
+}
+
+test_image_dump()
+{
+ run_check $SUDO_HELPER $TOP/btrfs check $dev1
+ # the output file will be deleted
+ run_mayfail $SUDO_HELPER $TOP/btrfs-image $dev1 /tmp/test-img.dump
+}
+
+test_run()
+{
+ run_check $SUDO_HELPER $TOP/mkfs.btrfs -f -d raid1 -m raid1 $dev1 $dev2
+
+ # we need extents to trigger reading from all devices
+ run_check $SUDO_HELPER mount $dev1 $TEST_MNT
+ run_check $SUDO_HELPER dd if=/dev/zero of=$TEST_MNT/a bs=1M count=10
+ run_check $SUDO_HELPER dd if=/dev/zero of=$TEST_MNT/b bs=4k count=1000 conv=sync
+ run_check $SUDO_HELPER umount $TEST_MNT
+
+ test_image_dump
+ run_check btrfs fi show $dev1
+ # create a degraded raid1 filesystem, check must succeed
+ # btrfs-image must not loop
+ run_mayfail wipefs -a $dev2
+ run_check $SUDO_HELPER losetup -d $dev2
+ run_check btrfs fi show $dev1
+
+ test_image_dump
+}
+
+prepare_devices
+dev1=${devs[1]}
+dev2=${devs[2]}
+test_run
+cleanup_devices
diff --git a/tests/misc-tests/007-subvolume-sync/test.sh b/tests/misc-tests/007-subvolume-sync/test.sh
new file mode 100755
index 00000000..a745fb56
--- /dev/null
+++ b/tests/misc-tests/007-subvolume-sync/test.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# test btrfs subvolume run normally with more than one subvolume
+#
+# - btrfs subvolume must not loop indefinetelly
+# - btrfs subvolume return 0 in normal case
+
+source $TOP/tests/common
+
+check_prereq mkfs.btrfs
+check_prereq btrfs
+
+setup_root_helper
+prepare_test_dev
+
+run_check $SUDO_HELPER $TOP/mkfs.btrfs -f "$TEST_DEV"
+run_check_mount_test_dev
+
+# to check following thing in both 1 and multiple subvolume case:
+# 1: is subvolume sync loop indefinetelly
+# 2: is return value right
+#
+run_check $SUDO_HELPER $TOP/btrfs subvolume create "$TEST_MNT"/mysubvol1
+run_check $SUDO_HELPER $TOP/btrfs subvolume create "$TEST_MNT"/mysubvol2
+run_check $SUDO_HELPER $TOP/btrfs subvolume delete "$TEST_MNT"/mysubvol1
+run_check $SUDO_HELPER $TOP/btrfs subvolume delete "$TEST_MNT"/mysubvol2
+run_check $SUDO_HELPER $TOP/btrfs subvolume sync "$TEST_MNT"
+
+run_check $SUDO_HELPER $TOP/btrfs subvolume create "$TEST_MNT"/mysubvol
+run_check $SUDO_HELPER $TOP/btrfs subvolume delete "$TEST_MNT"/mysubvol
+run_check $SUDO_HELPER $TOP/btrfs subvolume sync "$TEST_MNT"
+
+run_check_umount_test_dev
diff --git a/tests/misc-tests/008-leaf-crossing-stripes/test.sh b/tests/misc-tests/008-leaf-crossing-stripes/test.sh
new file mode 100755
index 00000000..03818062
--- /dev/null
+++ b/tests/misc-tests/008-leaf-crossing-stripes/test.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# test if btrfs-convert creates a filesystem without leaf crossing stripes
+
+source $TOP/tests/common
+
+check_prereq btrfs-convert
+check_prereq btrfs
+
+# In my test, it happened in 514M~560M, 737M~769M, 929M~917M,
+# and HAVE_ERROR=((size + 1) / 2) % 2 if size >= 970
+#
+SIZE_FROM=514
+SIZE_END=560
+A_PRIME_NUM=17
+for ((size = SIZE_FROM; size <= SIZE_END; size += A_PRIME_NUM)); do
+ run_check truncate -s "$size"M "$IMAGE"
+ run_check mkfs.ext4 -F "$IMAGE"
+ run_check $TOP/btrfs-convert "$IMAGE"
+ run_check_stdout $TOP/btrfs check "$IMAGE" 2>&1 |
+ grep -q "crossing stripe boundary" &&
+ _fail "leaf crossing stripes after btrfs-convert"
+done
+
+# grep will expectedly fail
+exit 0
diff --git a/tests/misc-tests/009-subvolume-sync-must-wait/test.sh b/tests/misc-tests/009-subvolume-sync-must-wait/test.sh
new file mode 100755
index 00000000..056584e5
--- /dev/null
+++ b/tests/misc-tests/009-subvolume-sync-must-wait/test.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+#
+# Verify that subovolume sync waits until the subvolume is cleaned
+
+source $TOP/tests/common
+
+check_prereq mkfs.btrfs
+check_prereq btrfs
+
+setup_root_helper
+
+run_check truncate -s 2G $IMAGE
+run_check $TOP/mkfs.btrfs -f $IMAGE
+run_check $SUDO_HELPER mount $IMAGE $TEST_MNT
+run_check $SUDO_HELPER chmod a+rw $TEST_MNT
+
+cd $TEST_MNT
+
+for i in `seq 5`; do
+ run_check dd if=/dev/zero of=file$i bs=1M count=10
+done
+
+for sn in `seq 4`;do
+ run_check $SUDO_HELPER $TOP/btrfs subvolume snapshot . snap$sn
+ for i in `seq 10`; do
+ run_check dd if=/dev/zero of=snap$sn/file$i bs=1M count=10
+ done
+done
+
+run_check $SUDO_HELPER $TOP/btrfs subvolume list .
+run_check $SUDO_HELPER $TOP/btrfs subvolume list -d .
+
+idtodel=`run_check_stdout $SUDO_HELPER $TOP/btrfs inspect-internal rootid snap3`
+
+# delete, sync after some time
+run_check $SUDO_HELPER $TOP/btrfs subvolume delete -c snap3
+{ sleep 5; run_check $TOP/btrfs filesystem sync $TEST_MNT; } &
+
+run_check $SUDO_HELPER $TOP/btrfs subvolume sync . $idtodel
+
+if run_check_stdout $SUDO_HELPER $TOP/btrfs subvolume list -d . |
+ grep -q "ID $idtodel.*DELETED"; then
+ _fail "sync did not wait for the subvolume cleanup"
+fi
+
+run_check $TOP/btrfs filesystem sync $TEST_MNT
+run_check $SUDO_HELPER $TOP/btrfs subvolume list -d .
+
+wait
+cd ..
+
+run_check $SUDO_HELPER umount $TEST_MNT
diff --git a/tests/misc-tests/010-convert-delete-ext2-subvol/test.sh b/tests/misc-tests/010-convert-delete-ext2-subvol/test.sh
new file mode 100755
index 00000000..451e453a
--- /dev/null
+++ b/tests/misc-tests/010-convert-delete-ext2-subvol/test.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+#
+# verify that convert rollback finds the ext2_subvolume intact and fails if it
+# was partially deleted
+
+source $TOP/tests/common
+
+check_prereq btrfs-convert
+check_prereq btrfs-debug-tree
+check_prereq btrfs
+
+setup_root_helper
+prepare_test_dev
+
+run_check truncate -s 2G "$TEST_DEV"
+run_check mkfs.ext4 -F "$TEST_DEV"
+run_check $TOP/btrfs-convert "$TEST_DEV"
+run_check $TOP/btrfs-debug-tree "$TEST_DEV"
+run_check_mount_test_dev
+run_check $SUDO_HELPER $TOP/btrfs subvolume delete -c "$TEST_MNT/ext2_saved"
+run_check_umount_test_dev
+run_check $TOP/btrfs-debug-tree "$TEST_DEV"
+run_check_stdout $TOP/btrfs-convert --rollback "$TEST_DEV" |
+ grep -q 'is it deleted' || _fail "unexpected rollback"
+
+exit 0
diff --git a/tests/misc-tests/011-delete-missing-device/test.sh b/tests/misc-tests/011-delete-missing-device/test.sh
new file mode 100755
index 00000000..26645f10
--- /dev/null
+++ b/tests/misc-tests/011-delete-missing-device/test.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# make sure that 'missing' is accepted for device deletion
+
+source $TOP/tests/common
+
+check_prereq btrfs-show-super
+check_prereq mkfs.btrfs
+check_prereq btrfs
+
+ndevs=4
+declare -a devs
+dev1=
+devtodel=
+
+setup_root_helper
+
+prepare_devices()
+{
+ for i in `seq $ndevs`; do
+ touch img$i
+ chmod a+rw img$i
+ truncate -s0 img$i
+ truncate -s2g img$i
+ devs[$i]=`run_check_stdout $SUDO_HELPER losetup --find --show img$i`
+ done
+}
+
+cleanup_devices()
+{
+ for dev in ${devs[@]}; do
+ run_mayfail $SUDO_HELPER losetup -d $dev
+ done
+ for i in `seq $ndevs`; do
+ truncate -s0 img$i
+ done
+ run_check $SUDO_HELPER losetup --list
+}
+
+test_do_mkfs()
+{
+ run_check $SUDO_HELPER $TOP/mkfs.btrfs -f $@ ${devs[@]}
+ run_check $TOP/btrfs-show-super $dev1
+ run_check $SUDO_HELPER $TOP/btrfs check $dev1
+ run_check $TOP/btrfs filesystem show
+}
+
+test_wipefs()
+{
+ run_check wipefs -a $devtodel
+ run_check $SUDO_HELPER losetup -d $devtodel
+ run_check losetup -a
+ run_check $TOP/btrfs filesystem show
+}
+test_delete_missing()
+{
+ run_check_mount_test_dev -o degraded
+ run_check $SUDO_HELPER $TOP/btrfs filesystem show $TEST_MNT
+ run_check $SUDO_HELPER $TOP/btrfs device delete missing $TEST_MNT
+ run_check $SUDO_HELPER $TOP/btrfs filesystem show $TEST_MNT
+ run_check_umount_test_dev
+
+ run_check_mount_test_dev
+ local out
+ out="$(run_check_stdout $SUDO_HELPER $TOP/btrfs filesystem show $TEST_MNT)"
+ if echo "$out" | grep -q -- "$devtodel"; then
+ _fail "device $devtodel not deleted"
+ fi
+ if echo "$out" | grep -q missing; then
+ _fail "missing device still present"
+ fi
+ run_check_umount_test_dev
+}
+
+prepare_devices
+dev1=${devs[1]}
+devtodel=${devs[3]}
+TEST_DEV=$dev1
+
+test_do_mkfs
+test_wipefs
+test_delete_missing
+
+cleanup_devices
diff --git a/tests/misc-tests/012-find-root-no-result/first_meta_chunk.btrfs-image b/tests/misc-tests/012-find-root-no-result/first_meta_chunk.btrfs-image
new file mode 100644
index 00000000..7bf6c509
--- /dev/null
+++ b/tests/misc-tests/012-find-root-no-result/first_meta_chunk.btrfs-image
Binary files differ
diff --git a/tests/misc-tests/012-find-root-no-result/test.sh b/tests/misc-tests/012-find-root-no-result/test.sh
new file mode 100755
index 00000000..983a8a1e
--- /dev/null
+++ b/tests/misc-tests/012-find-root-no-result/test.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# Regression test for case btrfs-find-root may print no result on a
+# recent fs or balanced fs, whose metadata chunk is the first chunk
+# and the only metadata chunk
+
+source $TOP/tests/common
+
+check_prereq btrfs-find-root
+check_prereq btrfs-image
+
+run_check $TOP/btrfs-image -r first_meta_chunk.btrfs-image test.img || \
+ _fail "failed to extract first_meta_chunk.btrfs-image"
+
+result=$(run_check_stdout $TOP/btrfs-find-root test.img | sed '/^Superblock/d')
+
+if [ -z "$result" ]; then
+ _fail "btrfs-find-root failed to find tree root"
+fi
+
+if ! echo "$result" | grep -q 'Found tree root at'; then
+ _fail "btrfs-find-root failed to find tree root, unexpected output"
+fi
+
+rm test.img
diff --git a/tests/mkfs-tests.sh b/tests/mkfs-tests.sh
new file mode 100755
index 00000000..c0635ad1
--- /dev/null
+++ b/tests/mkfs-tests.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+#
+# mkfs.btrfs tests
+
+unset TOP
+unset LANG
+LANG=C
+SCRIPT_DIR=$(dirname $(readlink -f $0))
+TOP=$(readlink -f $SCRIPT_DIR/../)
+TEST_DEV=${TEST_DEV:-}
+RESULTS="$TOP/tests/mkfs-tests-results.txt"
+IMAGE="$TOP/tests/test.img"
+
+source $TOP/tests/common
+
+# Allow child test to use $TOP and $RESULTS
+export TOP
+export RESULTS
+# For custom script needs to verfiy recovery
+export LANG
+# For tests that only use a loop device
+export IMAGE
+
+rm -f $RESULTS
+
+check_prereq mkfs.btrfs
+check_prereq btrfs
+
+# The tests are driven by their custom script called 'test.sh'
+
+for i in $(find $TOP/tests/mkfs-tests -maxdepth 1 -mindepth 1 -type d \
+ ${TEST:+-name "$TEST"} | sort)
+do
+ echo " [TEST/mkfs] $(basename $i)"
+ cd $i
+ echo "=== Entering $i" >> $RESULTS
+ if [ -x test.sh ]; then
+ ./test.sh
+ if [ $? -ne 0 ]; then
+ _fail "test failed for case $(basename $i)"
+ fi
+ fi
+ cd $TOP
+done
diff --git a/tests/mkfs-tests/001-basic-profiles/test.sh b/tests/mkfs-tests/001-basic-profiles/test.sh
new file mode 100755
index 00000000..2747d429
--- /dev/null
+++ b/tests/mkfs-tests/001-basic-profiles/test.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+# test various blockgroup profile combinations, use loop devices as block
+# devices
+
+source $TOP/tests/common
+
+check_prereq btrfs-show-super
+check_prereq mkfs.btrfs
+check_prereq btrfs
+
+ndevs=4
+declare -a devs
+dev1=
+
+setup_root_helper
+
+prepare_devices()
+{
+ for i in `seq $ndevs`; do
+ touch img$i
+ chmod a+rw img$i
+ truncate -s0 img$i
+ truncate -s2g img$i
+ devs[$i]=`run_check_stdout $SUDO_HELPER losetup --find --show img$i`
+ done
+}
+
+cleanup_devices()
+{
+ for dev in ${devs[@]}; do
+ run_check $SUDO_HELPER losetup -d $dev
+ done
+ for i in `seq $ndevs`; do
+ truncate -s0 img$i
+ done
+ run_check $SUDO_HELPER losetup --list
+}
+
+test_get_info()
+{
+ run_check $TOP/btrfs-show-super $dev1
+ run_check $SUDO_HELPER $TOP/btrfs check $dev1
+ run_check $SUDO_HELPER mount $dev1 $TEST_MNT
+ run_check $TOP/btrfs filesystem df $TEST_MNT
+ run_check $SUDO_HELPER $TOP/btrfs filesystem usage $TEST_MNT
+ run_check $SUDO_HELPER $TOP/btrfs device usage $TEST_MNT
+ run_check $SUDO_HELPER umount "$TEST_MNT"
+}
+test_do_mkfs()
+{
+ run_check $SUDO_HELPER $TOP/mkfs.btrfs -f \
+ $@
+}
+
+test_mkfs_single()
+{
+ test_do_mkfs $@ $dev1
+ test_get_info
+}
+test_mkfs_multi()
+{
+ test_do_mkfs $@ ${devs[@]}
+ test_get_info
+}
+
+prepare_devices
+dev1=${devs[1]}
+
+test_mkfs_single
+test_mkfs_single -d single -m single
+test_mkfs_single -d single -m single --mixed
+test_mkfs_single -d single -m dup
+test_mkfs_single -d dup -m single
+test_mkfs_single -d dup -m dup
+test_mkfs_single -d dup -m dup --mixed
+
+test_mkfs_multi
+test_mkfs_multi -d single -m single
+test_mkfs_multi -d single -m single --mixed
+test_mkfs_multi -d raid0 -m raid0
+test_mkfs_multi -d raid0 -m raid0 --mixed
+test_mkfs_multi -d raid1 -m raid1
+test_mkfs_multi -d raid1 -m raid1 --mixed
+test_mkfs_multi -d raid10 -m raid10
+test_mkfs_multi -d raid10 -m raid10 --mixed
+test_mkfs_multi -d raid5 -m raid5
+test_mkfs_multi -d raid5 -m raid5 --mixed
+test_mkfs_multi -d raid6 -m raid6
+test_mkfs_multi -d raid6 -m raid6 --mixed
+
+cleanup_devices
diff --git a/tests/mkfs-tests/002-no-force-mixed-on-small-volume/test.sh b/tests/mkfs-tests/002-no-force-mixed-on-small-volume/test.sh
new file mode 100755
index 00000000..855fbd18
--- /dev/null
+++ b/tests/mkfs-tests/002-no-force-mixed-on-small-volume/test.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+#
+# Verify that we do not force mixed block groups on small volumes anymore
+
+source $TOP/tests/common
+
+check_prereq mkfs.btrfs
+
+setup_root_helper
+
+run_check truncate -s 512M $IMAGE
+mixed=$(run_check_stdout $TOP/mkfs.btrfs -n 64k -f $IMAGE | egrep 'Data|Metadata')
+echo "$mixed" | grep -q -v 'Data+Metadata:' || _fail "unexpected: created a mixed-bg filesystem"
diff --git a/tests/mkfs-tests/003-mixed-with-wrong-nodesize/test.sh b/tests/mkfs-tests/003-mixed-with-wrong-nodesize/test.sh
new file mode 100755
index 00000000..289d5ff0
--- /dev/null
+++ b/tests/mkfs-tests/003-mixed-with-wrong-nodesize/test.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+#
+# Mixed mode needs equal sectorsize and nodesize
+
+source $TOP/tests/common
+
+check_prereq mkfs.btrfs
+
+run_check truncate -s 512M $IMAGE
+run_mayfail $TOP/mkfs.btrfs -f -M -s 4096 -n 16384 "$IMAGE" && _fail
+
+exit 0
diff --git a/tests/mkfs-tests/004-rootdir-keeps-size/test.sh b/tests/mkfs-tests/004-rootdir-keeps-size/test.sh
new file mode 100755
index 00000000..7038c8ea
--- /dev/null
+++ b/tests/mkfs-tests/004-rootdir-keeps-size/test.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# make sure that mkfs.btrfs --rootsize does not change size of the image
+
+source $TOP/tests/common
+
+check_prereq mkfs.btrfs
+
+prepare_test_dev
+
+test_mkfs_with_size() {
+ local size
+ local imgsize
+ local tmp
+
+ size="$1"
+ run_check truncate -s$size $TEST_DEV
+ imgsize=$(run_check_stdout stat --format=%s $TEST_DEV)
+ run_check $SUDO_HELPER $TOP/mkfs.btrfs -f \
+ --rootdir $TOP/Documentation \
+ $TEST_DEV
+ tmp=$(run_check_stdout stat --format=%s $TEST_DEV)
+ if ! [ "$imgsize" = "$tmp" ]; then
+ _fail "image size changed from $imgsize to $tmp"
+ fi
+}
+
+test_mkfs_with_size 128M
+test_mkfs_with_size 256M
+test_mkfs_with_size 512M
+test_mkfs_with_size 1G
+test_mkfs_with_size 2G
diff --git a/tests/mkfs-tests/005-long-device-name-for-ssd/test.sh b/tests/mkfs-tests/005-long-device-name-for-ssd/test.sh
new file mode 100755
index 00000000..c89ee0e1
--- /dev/null
+++ b/tests/mkfs-tests/005-long-device-name-for-ssd/test.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# a long device name must pass the SSD test
+
+source $TOP/tests/common
+
+check_prereq mkfs.btrfs
+check_prereq btrfs-show-super
+
+setup_root_helper
+prepare_test_dev
+
+# prep device
+dmname=\
+btrfs-test-with-very-long-name-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+dmdev=/dev/mapper/$dmname
+
+run_check truncate -s0 img
+chmod a+w img
+run_check truncate -s2g img
+
+loopdev=`run_check_stdout $SUDO_HELPER losetup --find --show img`
+run_check $SUDO_HELPER dmsetup create $dmname --table "0 1048576 linear $loopdev 0"
+
+base=`basename "$loopdev"`
+rot=/sys/class/block/$base/queue/rotational
+
+# switch rotational
+run_check cat $rot
+echo 0 | run_check $SUDO_HELPER tee $rot
+run_check cat $rot
+
+# test
+run_check_stdout $SUDO_HELPER $TOP/mkfs.btrfs -f $@ $dmdev |
+ grep -q 'SSD detected:.*yes' || _fail 'SSD not detected'
+run_check $TOP/btrfs-show-super $dmdev
+
+# cleanup
+run_check $SUDO_HELPER dmsetup remove $dmname
+run_mayfail $SUDO_HELPER losetup -d $loopdev
+run_check truncate -s0 img
diff --git a/tests/mkfs-tests/006-partitioned-loopdev/partition-1g-1g b/tests/mkfs-tests/006-partitioned-loopdev/partition-1g-1g
new file mode 100644
index 00000000..eb057769
--- /dev/null
+++ b/tests/mkfs-tests/006-partitioned-loopdev/partition-1g-1g
Binary files differ
diff --git a/tests/mkfs-tests/006-partitioned-loopdev/test.sh b/tests/mkfs-tests/006-partitioned-loopdev/test.sh
new file mode 100755
index 00000000..7c9fb829
--- /dev/null
+++ b/tests/mkfs-tests/006-partitioned-loopdev/test.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# recognize partitioned loop devices
+
+source $TOP/tests/common
+
+check_prereq mkfs.btrfs
+check_prereq btrfs-show-super
+
+setup_root_helper
+
+run_check truncate -s0 img
+chmod a+w img
+cp partition-1g-1g img
+run_check truncate -s2g img
+
+loopdev=$(run_check_stdout $SUDO_HELPER losetup --partscan --find --show img)
+base=$(basename $loopdev)
+
+# expect partitions named like loop0p1 etc
+for looppart in $(ls /dev/$base?*); do
+ run_check $SUDO_HELPER $TOP/mkfs.btrfs -f $looppart
+ run_check $TOP/btrfs-show-super $looppart
+done
+
+# cleanup
+run_check $SUDO_HELPER losetup -d $loopdev
+run_check truncate -s0 img
diff --git a/tests/mkfs-tests/007-mix-nodesize-sectorsize/test.sh b/tests/mkfs-tests/007-mix-nodesize-sectorsize/test.sh
new file mode 100755
index 00000000..d5374cbd
--- /dev/null
+++ b/tests/mkfs-tests/007-mix-nodesize-sectorsize/test.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# iterate over nodesize and sectorsize combinations
+
+source $TOP/tests/common
+
+check_prereq btrfs-show-super
+check_prereq mkfs.btrfs
+check_prereq btrfs
+
+setup_root_helper
+prepare_test_dev
+
+test_mkfs_single()
+{
+ run_check $SUDO_HELPER $TOP/mkfs.btrfs -f "$@" $TEST_DEV
+ run_check $TOP/btrfs-show-super $TEST_DEV
+ run_check $SUDO_HELPER $TOP/btrfs check $TEST_DEV
+}
+
+# default
+test_mkfs_single
+
+# nodesize >= sectorsize
+for nodesize in 4096 8192 16384 32768 65536; do
+ for sectorsize in 4096 8192 16384 32768 65536; do
+ [ $nodesize -lt $sectorsize ] && continue
+ test_mkfs_single -n $nodesize -s $sectorsize -d single -m single
+ test_mkfs_single -n $nodesize -s $sectorsize -d single -m dup
+ done
+done
+
+# nodesize, mixed mode
+for nodesize in 4k 8k 16k 32k 64k; do
+ test_mkfs_single -n $nodesize -s $nodesize -d single -m single --mixed
+ test_mkfs_single -n $nodesize -s $nodesize -d dup -m dup --mixed
+done
diff --git a/tests/mkfs-tests/008-secorsize-nodesize-combination/test.sh b/tests/mkfs-tests/008-secorsize-nodesize-combination/test.sh
new file mode 100755
index 00000000..79cc2b22
--- /dev/null
+++ b/tests/mkfs-tests/008-secorsize-nodesize-combination/test.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# test various sectorsize and node size combinations
+# including valid and invalid ones
+# only do mkfs and fsck check, no mounting as
+# sub/multi-pagesize is not supported yet
+
+source $TOP/tests/common
+
+check_prereq mkfs.btrfs
+check_prereq btrfs
+
+prepare_test_dev
+
+# disable mixed bg to avoid sectorsize == nodesize check
+features="^mixed-bg"
+
+# caller need to check whether the combination is valid
+do_test()
+{
+ sectorsize=$1
+ nodesize=$2
+ run_mayfail $TOP/mkfs.btrfs -O $features -n $nodesize -s $sectorsize \
+ $TEST_DEV
+ ret=$?
+ if [ $ret == 0 ]; then
+ run_check $TOP/btrfs check $TEST_DEV
+ fi
+ return $ret
+}
+
+# Invalid: Unaligned sectorsize and nodesize
+do_test 8191 8191 && _fail
+
+# Invalid: Aligned sectorsize with unaligned nodesize
+do_test 4k 16385 && _fail
+
+# Invalid: Ungliend sectorsize with aligned nodesize
+do_test 8191 16k && _fail
+
+# Valid: Aligned sectorsize and nodesize
+do_test 4k 16k || _fail
+
+# Invalid: Sectorsize larger than nodesize
+do_test 8k 4k && _fail
+
+# Invalid: too large nodesize
+do_test 16k 128k && _fail
+
+# Valid: large sectorsize
+do_test 64k 64k || _fail
diff --git a/transaction.h b/transaction.h
new file mode 100644
index 00000000..13e09a64
--- /dev/null
+++ b/transaction.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_TRANSACTION_H__
+#define __BTRFS_TRANSACTION_H__
+
+#include "kerncompat.h"
+#include "ctree.h"
+
+struct btrfs_trans_handle {
+ u64 transid;
+ u64 alloc_exclude_start;
+ u64 alloc_exclude_nr;
+ unsigned long blocks_reserved;
+ unsigned long blocks_used;
+ struct btrfs_block_group_cache *block_group;
+};
+
+static inline struct btrfs_trans_handle *
+btrfs_start_transaction(struct btrfs_root *root, int num_blocks)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_trans_handle *h = malloc(sizeof(*h));
+
+ BUG_ON(!h);
+ BUG_ON(root->commit_root);
+ BUG_ON(fs_info->running_transaction);
+ fs_info->running_transaction = h;
+ fs_info->generation++;
+ h->transid = fs_info->generation;
+ h->alloc_exclude_start = 0;
+ h->alloc_exclude_nr = 0;
+ h->blocks_reserved = num_blocks;
+ h->blocks_used = 0;
+ h->block_group = NULL;
+ root->last_trans = h->transid;
+ root->commit_root = root->node;
+ extent_buffer_get(root->node);
+ return h;
+}
+
+static inline void btrfs_free_transaction(struct btrfs_root *root,
+ struct btrfs_trans_handle *handle)
+{
+ memset(handle, 0, sizeof(*handle));
+ free(handle);
+}
+
+#endif
diff --git a/ulist.c b/ulist.c
new file mode 100644
index 00000000..60fdc091
--- /dev/null
+++ b/ulist.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2011 STRATO AG
+ * written by Arne Jansen <sensille@gmx.net>
+ * Distributed under the GNU GPL license version 2.
+ */
+
+//#include <linux/slab.h>
+#include <stdlib.h>
+#include "kerncompat.h"
+#include "ulist.h"
+#include "ctree.h"
+
+/*
+ * ulist is a generic data structure to hold a collection of unique u64
+ * values. The only operations it supports is adding to the list and
+ * enumerating it.
+ * It is possible to store an auxiliary value along with the key.
+ *
+ * A sample usage for ulists is the enumeration of directed graphs without
+ * visiting a node twice. The pseudo-code could look like this:
+ *
+ * ulist = ulist_alloc();
+ * ulist_add(ulist, root);
+ * ULIST_ITER_INIT(&uiter);
+ *
+ * while ((elem = ulist_next(ulist, &uiter)) {
+ * for (all child nodes n in elem)
+ * ulist_add(ulist, n);
+ * do something useful with the node;
+ * }
+ * ulist_free(ulist);
+ *
+ * This assumes the graph nodes are adressable by u64. This stems from the
+ * usage for tree enumeration in btrfs, where the logical addresses are
+ * 64 bit.
+ *
+ * It is also useful for tree enumeration which could be done elegantly
+ * recursively, but is not possible due to kernel stack limitations. The
+ * loop would be similar to the above.
+ */
+
+/**
+ * ulist_init - freshly initialize a ulist
+ * @ulist: the ulist to initialize
+ *
+ * Note: don't use this function to init an already used ulist, use
+ * ulist_reinit instead.
+ */
+void ulist_init(struct ulist *ulist)
+{
+ INIT_LIST_HEAD(&ulist->nodes);
+ ulist->root = RB_ROOT;
+ ulist->nnodes = 0;
+}
+
+/**
+ * ulist_fini - free up additionally allocated memory for the ulist
+ * @ulist: the ulist from which to free the additional memory
+ *
+ * This is useful in cases where the base 'struct ulist' has been statically
+ * allocated.
+ */
+static void ulist_fini(struct ulist *ulist)
+{
+ struct ulist_node *node;
+ struct ulist_node *next;
+
+ list_for_each_entry_safe(node, next, &ulist->nodes, list) {
+ kfree(node);
+ }
+ ulist->root = RB_ROOT;
+ INIT_LIST_HEAD(&ulist->nodes);
+}
+
+/**
+ * ulist_reinit - prepare a ulist for reuse
+ * @ulist: ulist to be reused
+ *
+ * Free up all additional memory allocated for the list elements and reinit
+ * the ulist.
+ */
+void ulist_reinit(struct ulist *ulist)
+{
+ ulist_fini(ulist);
+ ulist_init(ulist);
+}
+
+/**
+ * ulist_alloc - dynamically allocate a ulist
+ * @gfp_mask: allocation flags to for base allocation
+ *
+ * The allocated ulist will be returned in an initialized state.
+ */
+struct ulist *ulist_alloc(gfp_t gfp_mask)
+{
+ struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask);
+
+ if (!ulist)
+ return NULL;
+
+ ulist_init(ulist);
+
+ return ulist;
+}
+
+/**
+ * ulist_free - free dynamically allocated ulist
+ * @ulist: ulist to free
+ *
+ * It is not necessary to call ulist_fini before.
+ */
+void ulist_free(struct ulist *ulist)
+{
+ if (!ulist)
+ return;
+ ulist_fini(ulist);
+ kfree(ulist);
+}
+
+static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val)
+{
+ struct rb_node *n = ulist->root.rb_node;
+ struct ulist_node *u = NULL;
+
+ while (n) {
+ u = rb_entry(n, struct ulist_node, rb_node);
+ if (u->val < val)
+ n = n->rb_right;
+ else if (u->val > val)
+ n = n->rb_left;
+ else
+ return u;
+ }
+ return NULL;
+}
+
+static int ulist_rbtree_insert(struct ulist *ulist, struct ulist_node *ins)
+{
+ struct rb_node **p = &ulist->root.rb_node;
+ struct rb_node *parent = NULL;
+ struct ulist_node *cur = NULL;
+
+ while (*p) {
+ parent = *p;
+ cur = rb_entry(parent, struct ulist_node, rb_node);
+
+ if (cur->val < ins->val)
+ p = &(*p)->rb_right;
+ else if (cur->val > ins->val)
+ p = &(*p)->rb_left;
+ else
+ return -EEXIST;
+ }
+ rb_link_node(&ins->rb_node, parent, p);
+ rb_insert_color(&ins->rb_node, &ulist->root);
+ return 0;
+}
+
+/**
+ * ulist_add - add an element to the ulist
+ * @ulist: ulist to add the element to
+ * @val: value to add to ulist
+ * @aux: auxiliary value to store along with val
+ * @gfp_mask: flags to use for allocation
+ *
+ * Note: locking must be provided by the caller. In case of rwlocks write
+ * locking is needed
+ *
+ * Add an element to a ulist. The @val will only be added if it doesn't
+ * already exist. If it is added, the auxiliary value @aux is stored along with
+ * it. In case @val already exists in the ulist, @aux is ignored, even if
+ * it differs from the already stored value.
+ *
+ * ulist_add returns 0 if @val already exists in ulist and 1 if @val has been
+ * inserted.
+ * In case of allocation failure -ENOMEM is returned and the ulist stays
+ * unaltered.
+ */
+int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask)
+{
+ return ulist_add_merge(ulist, val, aux, NULL, gfp_mask);
+}
+
+int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
+ u64 *old_aux, gfp_t gfp_mask)
+{
+ int ret;
+ struct ulist_node *node;
+
+ node = ulist_rbtree_search(ulist, val);
+ if (node) {
+ if (old_aux)
+ *old_aux = node->aux;
+ return 0;
+ }
+ node = kmalloc(sizeof(*node), gfp_mask);
+ if (!node)
+ return -ENOMEM;
+
+ node->val = val;
+ node->aux = aux;
+#ifdef CONFIG_BTRFS_DEBUG
+ node->seqnum = ulist->nnodes;
+#endif
+
+ ret = ulist_rbtree_insert(ulist, node);
+ ASSERT(!ret);
+ list_add_tail(&node->list, &ulist->nodes);
+ ulist->nnodes++;
+
+ return 1;
+}
+
+/**
+ * ulist_next - iterate ulist
+ * @ulist: ulist to iterate
+ * @uiter: iterator variable, initialized with ULIST_ITER_INIT(&iterator)
+ *
+ * Note: locking must be provided by the caller. In case of rwlocks only read
+ * locking is needed
+ *
+ * This function is used to iterate an ulist.
+ * It returns the next element from the ulist or %NULL when the
+ * end is reached. No guarantee is made with respect to the order in which
+ * the elements are returned. They might neither be returned in order of
+ * addition nor in ascending order.
+ * It is allowed to call ulist_add during an enumeration. Newly added items
+ * are guaranteed to show up in the running enumeration.
+ */
+struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter)
+{
+ struct ulist_node *node;
+
+ if (list_empty(&ulist->nodes))
+ return NULL;
+ if (uiter->cur_list && uiter->cur_list->next == &ulist->nodes)
+ return NULL;
+ if (uiter->cur_list) {
+ uiter->cur_list = uiter->cur_list->next;
+ } else {
+ uiter->cur_list = ulist->nodes.next;
+#ifdef CONFIG_BTRFS_DEBUG
+ uiter->i = 0;
+#endif
+ }
+ node = list_entry(uiter->cur_list, struct ulist_node, list);
+#ifdef CONFIG_BTRFS_DEBUG
+ ASSERT(node->seqnum == uiter->i);
+ ASSERT(uiter->i >= 0 && uiter->i < ulist->nnodes);
+ uiter->i++;
+#endif
+ return node;
+}
diff --git a/ulist.h b/ulist.h
new file mode 100644
index 00000000..88f5b826
--- /dev/null
+++ b/ulist.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2011 STRATO AG
+ * written by Arne Jansen <sensille@gmx.net>
+ * Distributed under the GNU GPL license version 2.
+ *
+ */
+
+#ifndef __ULIST_H__
+#define __ULIST_H__
+
+#include "kerncompat.h"
+#include "list.h"
+#include "rbtree.h"
+
+/*
+ * ulist is a generic data structure to hold a collection of unique u64
+ * values. The only operations it supports is adding to the list and
+ * enumerating it.
+ * It is possible to store an auxiliary value along with the key.
+ *
+ */
+struct ulist_iterator {
+#ifdef CONFIG_BTRFS_DEBUG
+ int i;
+#endif
+ struct list_head *cur_list; /* hint to start search */
+};
+
+/*
+ * element of the list
+ */
+struct ulist_node {
+ u64 val; /* value to store */
+ u64 aux; /* auxiliary value saved along with the val */
+
+#ifdef CONFIG_BTRFS_DEBUG
+ int seqnum; /* sequence number this node is added */
+#endif
+
+ struct list_head list; /* used to link node */
+ struct rb_node rb_node; /* used to speed up search */
+};
+
+struct ulist {
+ /*
+ * number of elements stored in list
+ */
+ unsigned long nnodes;
+
+ struct list_head nodes;
+ struct rb_root root;
+};
+
+void ulist_init(struct ulist *ulist);
+void ulist_reinit(struct ulist *ulist);
+struct ulist *ulist_alloc(gfp_t gfp_mask);
+void ulist_free(struct ulist *ulist);
+int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask);
+int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
+ u64 *old_aux, gfp_t gfp_mask);
+
+/* just like ulist_add_merge() but take a pointer for the aux data */
+static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux,
+ void **old_aux, gfp_t gfp_mask)
+{
+#if BITS_PER_LONG == 32
+ u64 old64 = (uintptr_t)*old_aux;
+ int ret = ulist_add_merge(ulist, val, (uintptr_t)aux, &old64, gfp_mask);
+ *old_aux = (void *)((uintptr_t)old64);
+ return ret;
+#else
+ return ulist_add_merge(ulist, val, (u64)aux, (u64 *)old_aux, gfp_mask);
+#endif
+}
+
+struct ulist_node *ulist_next(struct ulist *ulist,
+ struct ulist_iterator *uiter);
+
+#define ULIST_ITER_INIT(uiter) ((uiter)->cur_list = NULL)
+
+#endif
diff --git a/utils-lib.c b/utils-lib.c
new file mode 100644
index 00000000..79ef35e3
--- /dev/null
+++ b/utils-lib.c
@@ -0,0 +1,40 @@
+#include "kerncompat.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#if BTRFS_FLAT_INCLUDES
+#else
+#endif /* BTRFS_FLAT_INCLUDES */
+
+/*
+ * This function should be only used when parsing command arg, it won't return
+ * error to its caller and rather exit directly just like usage().
+ */
+u64 arg_strtou64(const char *str)
+{
+ u64 value;
+ char *ptr_parse_end = NULL;
+
+ value = strtoull(str, &ptr_parse_end, 0);
+ if (ptr_parse_end && *ptr_parse_end != '\0') {
+ fprintf(stderr, "ERROR: %s is not a valid numeric value.\n",
+ str);
+ exit(1);
+ }
+
+ /*
+ * if we pass a negative number to strtoull, it will return an
+ * unexpected number to us, so let's do the check ourselves.
+ */
+ if (str[0] == '-') {
+ fprintf(stderr, "ERROR: %s: negative value is invalid.\n",
+ str);
+ exit(1);
+ }
+ if (value == ULLONG_MAX) {
+ fprintf(stderr, "ERROR: %s is too large.\n", str);
+ exit(1);
+ }
+ return value;
+}
diff --git a/utils.c b/utils.c
new file mode 100644
index 00000000..eabc36dc
--- /dev/null
+++ b/utils.c
@@ -0,0 +1,3150 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ * Copyright (C) 2008 Morey Roof. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <uuid/uuid.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <mntent.h>
+#include <ctype.h>
+#include <linux/loop.h>
+#include <linux/major.h>
+#include <linux/kdev_t.h>
+#include <limits.h>
+#include <blkid/blkid.h>
+#include <sys/vfs.h>
+#include <sys/statfs.h>
+#include <linux/magic.h>
+#include <getopt.h>
+
+#include "kerncompat.h"
+#include "radix-tree.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "crc32c.h"
+#include "utils.h"
+#include "volumes.h"
+#include "ioctl.h"
+#include "commands.h"
+
+#ifndef BLKDISCARD
+#define BLKDISCARD _IO(0x12,119)
+#endif
+
+static int btrfs_scan_done = 0;
+
+static char argv0_buf[ARGV0_BUF_SIZE] = "btrfs";
+
+const char *get_argv0_buf(void)
+{
+ return argv0_buf;
+}
+
+void fixup_argv0(char **argv, const char *token)
+{
+ int len = strlen(argv0_buf);
+
+ snprintf(argv0_buf + len, sizeof(argv0_buf) - len, " %s", token);
+ argv[0] = argv0_buf;
+}
+
+void set_argv0(char **argv)
+{
+ strncpy(argv0_buf, argv[0], sizeof(argv0_buf));
+ argv0_buf[sizeof(argv0_buf) - 1] = 0;
+}
+
+int check_argc_exact(int nargs, int expected)
+{
+ if (nargs < expected)
+ fprintf(stderr, "%s: too few arguments\n", argv0_buf);
+ if (nargs > expected)
+ fprintf(stderr, "%s: too many arguments\n", argv0_buf);
+
+ return nargs != expected;
+}
+
+int check_argc_min(int nargs, int expected)
+{
+ if (nargs < expected) {
+ fprintf(stderr, "%s: too few arguments\n", argv0_buf);
+ return 1;
+ }
+
+ return 0;
+}
+
+int check_argc_max(int nargs, int expected)
+{
+ if (nargs > expected) {
+ fprintf(stderr, "%s: too many arguments\n", argv0_buf);
+ return 1;
+ }
+
+ return 0;
+}
+
+
+/*
+ * Discard the given range in one go
+ */
+static int discard_range(int fd, u64 start, u64 len)
+{
+ u64 range[2] = { start, len };
+
+ if (ioctl(fd, BLKDISCARD, &range) < 0)
+ return errno;
+ return 0;
+}
+
+/*
+ * Discard blocks in the given range in 1G chunks, the process is interruptible
+ */
+static int discard_blocks(int fd, u64 start, u64 len)
+{
+ while (len > 0) {
+ /* 1G granularity */
+ u64 chunk_size = min_t(u64, len, 1*1024*1024*1024);
+ int ret;
+
+ ret = discard_range(fd, start, chunk_size);
+ if (ret)
+ return ret;
+ len -= chunk_size;
+ start += chunk_size;
+ }
+
+ return 0;
+}
+
+static u64 reference_root_table[] = {
+ [1] = BTRFS_ROOT_TREE_OBJECTID,
+ [2] = BTRFS_EXTENT_TREE_OBJECTID,
+ [3] = BTRFS_CHUNK_TREE_OBJECTID,
+ [4] = BTRFS_DEV_TREE_OBJECTID,
+ [5] = BTRFS_FS_TREE_OBJECTID,
+ [6] = BTRFS_CSUM_TREE_OBJECTID,
+};
+
+int test_uuid_unique(char *fs_uuid)
+{
+ int unique = 1;
+ blkid_dev_iterate iter = NULL;
+ blkid_dev dev = NULL;
+ blkid_cache cache = NULL;
+
+ if (blkid_get_cache(&cache, NULL) < 0) {
+ printf("ERROR: lblkid cache get failed\n");
+ return 1;
+ }
+ blkid_probe_all(cache);
+ iter = blkid_dev_iterate_begin(cache);
+ blkid_dev_set_search(iter, "UUID", fs_uuid);
+
+ while (blkid_dev_next(iter, &dev) == 0) {
+ dev = blkid_verify(cache, dev);
+ if (dev) {
+ unique = 0;
+ break;
+ }
+ }
+
+ blkid_dev_iterate_end(iter);
+ blkid_put_cache(cache);
+
+ return unique;
+}
+
+/*
+ * @fs_uuid - if NULL, generates a UUID, returns back the new filesystem UUID
+ */
+int make_btrfs(int fd, struct btrfs_mkfs_config *cfg)
+{
+ struct btrfs_super_block super;
+ struct extent_buffer *buf;
+ struct btrfs_root_item root_item;
+ struct btrfs_disk_key disk_key;
+ struct btrfs_extent_item *extent_item;
+ struct btrfs_inode_item *inode_item;
+ struct btrfs_chunk *chunk;
+ struct btrfs_dev_item *dev_item;
+ struct btrfs_dev_extent *dev_extent;
+ u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+ u8 *ptr;
+ int i;
+ int ret;
+ u32 itemoff;
+ u32 nritems = 0;
+ u64 first_free;
+ u64 ref_root;
+ u32 array_size;
+ u32 item_size;
+ int skinny_metadata = !!(cfg->features &
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
+ u64 num_bytes;
+
+ buf = malloc(sizeof(*buf) + max(cfg->sectorsize, cfg->nodesize));
+ if (!buf)
+ return -ENOMEM;
+
+ first_free = BTRFS_SUPER_INFO_OFFSET + cfg->sectorsize * 2 - 1;
+ first_free &= ~((u64)cfg->sectorsize - 1);
+
+ memset(&super, 0, sizeof(super));
+
+ num_bytes = (cfg->num_bytes / cfg->sectorsize) * cfg->sectorsize;
+ if (cfg->fs_uuid && *cfg->fs_uuid) {
+ if (uuid_parse(cfg->fs_uuid, super.fsid) != 0) {
+ fprintf(stderr, "could not parse UUID: %s\n",
+ cfg->fs_uuid);
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!test_uuid_unique(cfg->fs_uuid)) {
+ fprintf(stderr, "non-unique UUID: %s\n", cfg->fs_uuid);
+ ret = -EBUSY;
+ goto out;
+ }
+ } else {
+ uuid_generate(super.fsid);
+ if (cfg->fs_uuid)
+ uuid_unparse(super.fsid, cfg->fs_uuid);
+ }
+ uuid_generate(super.dev_item.uuid);
+ uuid_generate(chunk_tree_uuid);
+
+ btrfs_set_super_bytenr(&super, cfg->blocks[0]);
+ btrfs_set_super_num_devices(&super, 1);
+ btrfs_set_super_magic(&super, BTRFS_MAGIC);
+ btrfs_set_super_generation(&super, 1);
+ btrfs_set_super_root(&super, cfg->blocks[1]);
+ btrfs_set_super_chunk_root(&super, cfg->blocks[3]);
+ btrfs_set_super_total_bytes(&super, num_bytes);
+ btrfs_set_super_bytes_used(&super, 6 * cfg->nodesize);
+ btrfs_set_super_sectorsize(&super, cfg->sectorsize);
+ btrfs_set_super_leafsize(&super, cfg->nodesize);
+ btrfs_set_super_nodesize(&super, cfg->nodesize);
+ btrfs_set_super_stripesize(&super, cfg->stripesize);
+ btrfs_set_super_csum_type(&super, BTRFS_CSUM_TYPE_CRC32);
+ btrfs_set_super_chunk_root_generation(&super, 1);
+ btrfs_set_super_cache_generation(&super, -1);
+ btrfs_set_super_incompat_flags(&super, cfg->features);
+ if (cfg->label)
+ strncpy(super.label, cfg->label, BTRFS_LABEL_SIZE - 1);
+
+ /* create the tree of root objects */
+ memset(buf->data, 0, cfg->nodesize);
+ buf->len = cfg->nodesize;
+ btrfs_set_header_bytenr(buf, cfg->blocks[1]);
+ btrfs_set_header_nritems(buf, 4);
+ btrfs_set_header_generation(buf, 1);
+ btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
+ btrfs_set_header_owner(buf, BTRFS_ROOT_TREE_OBJECTID);
+ write_extent_buffer(buf, super.fsid, btrfs_header_fsid(),
+ BTRFS_FSID_SIZE);
+
+ write_extent_buffer(buf, chunk_tree_uuid,
+ btrfs_header_chunk_tree_uuid(buf),
+ BTRFS_UUID_SIZE);
+
+ /* create the items for the root tree */
+ memset(&root_item, 0, sizeof(root_item));
+ inode_item = &root_item.inode;
+ btrfs_set_stack_inode_generation(inode_item, 1);
+ btrfs_set_stack_inode_size(inode_item, 3);
+ btrfs_set_stack_inode_nlink(inode_item, 1);
+ btrfs_set_stack_inode_nbytes(inode_item, cfg->nodesize);
+ btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
+ btrfs_set_root_refs(&root_item, 1);
+ btrfs_set_root_used(&root_item, cfg->nodesize);
+ btrfs_set_root_generation(&root_item, 1);
+
+ memset(&disk_key, 0, sizeof(disk_key));
+ btrfs_set_disk_key_type(&disk_key, BTRFS_ROOT_ITEM_KEY);
+ btrfs_set_disk_key_offset(&disk_key, 0);
+ nritems = 0;
+
+ itemoff = __BTRFS_LEAF_DATA_SIZE(cfg->nodesize) - sizeof(root_item);
+ btrfs_set_root_bytenr(&root_item, cfg->blocks[2]);
+ btrfs_set_disk_key_objectid(&disk_key, BTRFS_EXTENT_TREE_OBJECTID);
+ btrfs_set_item_key(buf, &disk_key, nritems);
+ btrfs_set_item_offset(buf, btrfs_item_nr(nritems), itemoff);
+ btrfs_set_item_size(buf, btrfs_item_nr(nritems),
+ sizeof(root_item));
+ write_extent_buffer(buf, &root_item, btrfs_item_ptr_offset(buf,
+ nritems), sizeof(root_item));
+ nritems++;
+
+ itemoff = itemoff - sizeof(root_item);
+ btrfs_set_root_bytenr(&root_item, cfg->blocks[4]);
+ btrfs_set_disk_key_objectid(&disk_key, BTRFS_DEV_TREE_OBJECTID);
+ btrfs_set_item_key(buf, &disk_key, nritems);
+ btrfs_set_item_offset(buf, btrfs_item_nr(nritems), itemoff);
+ btrfs_set_item_size(buf, btrfs_item_nr(nritems),
+ sizeof(root_item));
+ write_extent_buffer(buf, &root_item,
+ btrfs_item_ptr_offset(buf, nritems),
+ sizeof(root_item));
+ nritems++;
+
+ itemoff = itemoff - sizeof(root_item);
+ btrfs_set_root_bytenr(&root_item, cfg->blocks[5]);
+ btrfs_set_disk_key_objectid(&disk_key, BTRFS_FS_TREE_OBJECTID);
+ btrfs_set_item_key(buf, &disk_key, nritems);
+ btrfs_set_item_offset(buf, btrfs_item_nr(nritems), itemoff);
+ btrfs_set_item_size(buf, btrfs_item_nr(nritems),
+ sizeof(root_item));
+ write_extent_buffer(buf, &root_item,
+ btrfs_item_ptr_offset(buf, nritems),
+ sizeof(root_item));
+ nritems++;
+
+ itemoff = itemoff - sizeof(root_item);
+ btrfs_set_root_bytenr(&root_item, cfg->blocks[6]);
+ btrfs_set_disk_key_objectid(&disk_key, BTRFS_CSUM_TREE_OBJECTID);
+ btrfs_set_item_key(buf, &disk_key, nritems);
+ btrfs_set_item_offset(buf, btrfs_item_nr(nritems), itemoff);
+ btrfs_set_item_size(buf, btrfs_item_nr(nritems),
+ sizeof(root_item));
+ write_extent_buffer(buf, &root_item,
+ btrfs_item_ptr_offset(buf, nritems),
+ sizeof(root_item));
+ nritems++;
+
+
+ csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
+ ret = pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[1]);
+ if (ret != cfg->nodesize) {
+ ret = (ret < 0 ? -errno : -EIO);
+ goto out;
+ }
+
+ /* create the items for the extent tree */
+ memset(buf->data + sizeof(struct btrfs_header), 0,
+ cfg->nodesize - sizeof(struct btrfs_header));
+ nritems = 0;
+ itemoff = __BTRFS_LEAF_DATA_SIZE(cfg->nodesize);
+ for (i = 1; i < 7; i++) {
+ item_size = sizeof(struct btrfs_extent_item);
+ if (!skinny_metadata)
+ item_size += sizeof(struct btrfs_tree_block_info);
+
+ BUG_ON(cfg->blocks[i] < first_free);
+ BUG_ON(cfg->blocks[i] < cfg->blocks[i - 1]);
+
+ /* create extent item */
+ itemoff -= item_size;
+ btrfs_set_disk_key_objectid(&disk_key, cfg->blocks[i]);
+ if (skinny_metadata) {
+ btrfs_set_disk_key_type(&disk_key,
+ BTRFS_METADATA_ITEM_KEY);
+ btrfs_set_disk_key_offset(&disk_key, 0);
+ } else {
+ btrfs_set_disk_key_type(&disk_key,
+ BTRFS_EXTENT_ITEM_KEY);
+ btrfs_set_disk_key_offset(&disk_key, cfg->nodesize);
+ }
+ btrfs_set_item_key(buf, &disk_key, nritems);
+ btrfs_set_item_offset(buf, btrfs_item_nr(nritems),
+ itemoff);
+ btrfs_set_item_size(buf, btrfs_item_nr(nritems),
+ item_size);
+ extent_item = btrfs_item_ptr(buf, nritems,
+ struct btrfs_extent_item);
+ btrfs_set_extent_refs(buf, extent_item, 1);
+ btrfs_set_extent_generation(buf, extent_item, 1);
+ btrfs_set_extent_flags(buf, extent_item,
+ BTRFS_EXTENT_FLAG_TREE_BLOCK);
+ nritems++;
+
+ /* create extent ref */
+ ref_root = reference_root_table[i];
+ btrfs_set_disk_key_objectid(&disk_key, cfg->blocks[i]);
+ btrfs_set_disk_key_offset(&disk_key, ref_root);
+ btrfs_set_disk_key_type(&disk_key, BTRFS_TREE_BLOCK_REF_KEY);
+ btrfs_set_item_key(buf, &disk_key, nritems);
+ btrfs_set_item_offset(buf, btrfs_item_nr(nritems),
+ itemoff);
+ btrfs_set_item_size(buf, btrfs_item_nr(nritems), 0);
+ nritems++;
+ }
+ btrfs_set_header_bytenr(buf, cfg->blocks[2]);
+ btrfs_set_header_owner(buf, BTRFS_EXTENT_TREE_OBJECTID);
+ btrfs_set_header_nritems(buf, nritems);
+ csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
+ ret = pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[2]);
+ if (ret != cfg->nodesize) {
+ ret = (ret < 0 ? -errno : -EIO);
+ goto out;
+ }
+
+ /* create the chunk tree */
+ memset(buf->data + sizeof(struct btrfs_header), 0,
+ cfg->nodesize - sizeof(struct btrfs_header));
+ nritems = 0;
+ item_size = sizeof(*dev_item);
+ itemoff = __BTRFS_LEAF_DATA_SIZE(cfg->nodesize) - item_size;
+
+ /* first device 1 (there is no device 0) */
+ btrfs_set_disk_key_objectid(&disk_key, BTRFS_DEV_ITEMS_OBJECTID);
+ btrfs_set_disk_key_offset(&disk_key, 1);
+ btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_ITEM_KEY);
+ btrfs_set_item_key(buf, &disk_key, nritems);
+ btrfs_set_item_offset(buf, btrfs_item_nr(nritems), itemoff);
+ btrfs_set_item_size(buf, btrfs_item_nr(nritems), item_size);
+
+ dev_item = btrfs_item_ptr(buf, nritems, struct btrfs_dev_item);
+ btrfs_set_device_id(buf, dev_item, 1);
+ btrfs_set_device_generation(buf, dev_item, 0);
+ btrfs_set_device_total_bytes(buf, dev_item, num_bytes);
+ btrfs_set_device_bytes_used(buf, dev_item,
+ BTRFS_MKFS_SYSTEM_GROUP_SIZE);
+ btrfs_set_device_io_align(buf, dev_item, cfg->sectorsize);
+ btrfs_set_device_io_width(buf, dev_item, cfg->sectorsize);
+ btrfs_set_device_sector_size(buf, dev_item, cfg->sectorsize);
+ btrfs_set_device_type(buf, dev_item, 0);
+
+ write_extent_buffer(buf, super.dev_item.uuid,
+ (unsigned long)btrfs_device_uuid(dev_item),
+ BTRFS_UUID_SIZE);
+ write_extent_buffer(buf, super.fsid,
+ (unsigned long)btrfs_device_fsid(dev_item),
+ BTRFS_UUID_SIZE);
+ read_extent_buffer(buf, &super.dev_item, (unsigned long)dev_item,
+ sizeof(*dev_item));
+
+ nritems++;
+ item_size = btrfs_chunk_item_size(1);
+ itemoff = itemoff - item_size;
+
+ /* then we have chunk 0 */
+ btrfs_set_disk_key_objectid(&disk_key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ btrfs_set_disk_key_offset(&disk_key, 0);
+ btrfs_set_disk_key_type(&disk_key, BTRFS_CHUNK_ITEM_KEY);
+ btrfs_set_item_key(buf, &disk_key, nritems);
+ btrfs_set_item_offset(buf, btrfs_item_nr(nritems), itemoff);
+ btrfs_set_item_size(buf, btrfs_item_nr(nritems), item_size);
+
+ chunk = btrfs_item_ptr(buf, nritems, struct btrfs_chunk);
+ btrfs_set_chunk_length(buf, chunk, BTRFS_MKFS_SYSTEM_GROUP_SIZE);
+ btrfs_set_chunk_owner(buf, chunk, BTRFS_EXTENT_TREE_OBJECTID);
+ btrfs_set_chunk_stripe_len(buf, chunk, 64 * 1024);
+ btrfs_set_chunk_type(buf, chunk, BTRFS_BLOCK_GROUP_SYSTEM);
+ btrfs_set_chunk_io_align(buf, chunk, cfg->sectorsize);
+ btrfs_set_chunk_io_width(buf, chunk, cfg->sectorsize);
+ btrfs_set_chunk_sector_size(buf, chunk, cfg->sectorsize);
+ btrfs_set_chunk_num_stripes(buf, chunk, 1);
+ btrfs_set_stripe_devid_nr(buf, chunk, 0, 1);
+ btrfs_set_stripe_offset_nr(buf, chunk, 0, 0);
+ nritems++;
+
+ write_extent_buffer(buf, super.dev_item.uuid,
+ (unsigned long)btrfs_stripe_dev_uuid(&chunk->stripe),
+ BTRFS_UUID_SIZE);
+
+ /* copy the key for the chunk to the system array */
+ ptr = super.sys_chunk_array;
+ array_size = sizeof(disk_key);
+
+ memcpy(ptr, &disk_key, sizeof(disk_key));
+ ptr += sizeof(disk_key);
+
+ /* copy the chunk to the system array */
+ read_extent_buffer(buf, ptr, (unsigned long)chunk, item_size);
+ array_size += item_size;
+ ptr += item_size;
+ btrfs_set_super_sys_array_size(&super, array_size);
+
+ btrfs_set_header_bytenr(buf, cfg->blocks[3]);
+ btrfs_set_header_owner(buf, BTRFS_CHUNK_TREE_OBJECTID);
+ btrfs_set_header_nritems(buf, nritems);
+ csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
+ ret = pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[3]);
+ if (ret != cfg->nodesize) {
+ ret = (ret < 0 ? -errno : -EIO);
+ goto out;
+ }
+
+ /* create the device tree */
+ memset(buf->data + sizeof(struct btrfs_header), 0,
+ cfg->nodesize - sizeof(struct btrfs_header));
+ nritems = 0;
+ itemoff = __BTRFS_LEAF_DATA_SIZE(cfg->nodesize) -
+ sizeof(struct btrfs_dev_extent);
+
+ btrfs_set_disk_key_objectid(&disk_key, 1);
+ btrfs_set_disk_key_offset(&disk_key, 0);
+ btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_EXTENT_KEY);
+ btrfs_set_item_key(buf, &disk_key, nritems);
+ btrfs_set_item_offset(buf, btrfs_item_nr(nritems), itemoff);
+ btrfs_set_item_size(buf, btrfs_item_nr(nritems),
+ sizeof(struct btrfs_dev_extent));
+ dev_extent = btrfs_item_ptr(buf, nritems, struct btrfs_dev_extent);
+ btrfs_set_dev_extent_chunk_tree(buf, dev_extent,
+ BTRFS_CHUNK_TREE_OBJECTID);
+ btrfs_set_dev_extent_chunk_objectid(buf, dev_extent,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ btrfs_set_dev_extent_chunk_offset(buf, dev_extent, 0);
+
+ write_extent_buffer(buf, chunk_tree_uuid,
+ (unsigned long)btrfs_dev_extent_chunk_tree_uuid(dev_extent),
+ BTRFS_UUID_SIZE);
+
+ btrfs_set_dev_extent_length(buf, dev_extent,
+ BTRFS_MKFS_SYSTEM_GROUP_SIZE);
+ nritems++;
+
+ btrfs_set_header_bytenr(buf, cfg->blocks[4]);
+ btrfs_set_header_owner(buf, BTRFS_DEV_TREE_OBJECTID);
+ btrfs_set_header_nritems(buf, nritems);
+ csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
+ ret = pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[4]);
+ if (ret != cfg->nodesize) {
+ ret = (ret < 0 ? -errno : -EIO);
+ goto out;
+ }
+
+ /* create the FS root */
+ memset(buf->data + sizeof(struct btrfs_header), 0,
+ cfg->nodesize - sizeof(struct btrfs_header));
+ btrfs_set_header_bytenr(buf, cfg->blocks[5]);
+ btrfs_set_header_owner(buf, BTRFS_FS_TREE_OBJECTID);
+ btrfs_set_header_nritems(buf, 0);
+ csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
+ ret = pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[5]);
+ if (ret != cfg->nodesize) {
+ ret = (ret < 0 ? -errno : -EIO);
+ goto out;
+ }
+ /* finally create the csum root */
+ memset(buf->data + sizeof(struct btrfs_header), 0,
+ cfg->nodesize - sizeof(struct btrfs_header));
+ btrfs_set_header_bytenr(buf, cfg->blocks[6]);
+ btrfs_set_header_owner(buf, BTRFS_CSUM_TREE_OBJECTID);
+ btrfs_set_header_nritems(buf, 0);
+ csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
+ ret = pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[6]);
+ if (ret != cfg->nodesize) {
+ ret = (ret < 0 ? -errno : -EIO);
+ goto out;
+ }
+
+ /* and write out the super block */
+ BUG_ON(sizeof(super) > cfg->sectorsize);
+ memset(buf->data, 0, BTRFS_SUPER_INFO_SIZE);
+ memcpy(buf->data, &super, sizeof(super));
+ buf->len = BTRFS_SUPER_INFO_SIZE;
+ csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
+ ret = pwrite(fd, buf->data, BTRFS_SUPER_INFO_SIZE, cfg->blocks[0]);
+ if (ret != BTRFS_SUPER_INFO_SIZE) {
+ ret = (ret < 0 ? -errno : -EIO);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ free(buf);
+ return ret;
+}
+
+static const struct btrfs_fs_feature {
+ const char *name;
+ u64 flag;
+ const char *desc;
+} mkfs_features[] = {
+ { "mixed-bg", BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS,
+ "mixed data and metadata block groups" },
+ { "extref", BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF,
+ "increased hardlink limit per file to 65536" },
+ { "raid56", BTRFS_FEATURE_INCOMPAT_RAID56,
+ "raid56 extended format" },
+ { "skinny-metadata", BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA,
+ "reduced-size metadata extent refs" },
+ { "no-holes", BTRFS_FEATURE_INCOMPAT_NO_HOLES,
+ "no explicit hole extents for files" },
+ /* Keep this one last */
+ { "list-all", BTRFS_FEATURE_LIST_ALL, NULL }
+};
+
+static int parse_one_fs_feature(const char *name, u64 *flags)
+{
+ int i;
+ int found = 0;
+
+ for (i = 0; i < ARRAY_SIZE(mkfs_features); i++) {
+ if (name[0] == '^' &&
+ !strcmp(mkfs_features[i].name, name + 1)) {
+ *flags &= ~ mkfs_features[i].flag;
+ found = 1;
+ } else if (!strcmp(mkfs_features[i].name, name)) {
+ *flags |= mkfs_features[i].flag;
+ found = 1;
+ }
+ }
+
+ return !found;
+}
+
+void btrfs_parse_features_to_string(char *buf, u64 flags)
+{
+ int i;
+
+ buf[0] = 0;
+
+ for (i = 0; i < ARRAY_SIZE(mkfs_features); i++) {
+ if (flags & mkfs_features[i].flag) {
+ if (*buf)
+ strcat(buf, ", ");
+ strcat(buf, mkfs_features[i].name);
+ }
+ }
+}
+
+void btrfs_process_fs_features(u64 flags)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mkfs_features); i++) {
+ if (flags & mkfs_features[i].flag) {
+ printf("Turning ON incompat feature '%s': %s\n",
+ mkfs_features[i].name,
+ mkfs_features[i].desc);
+ }
+ }
+}
+
+void btrfs_list_all_fs_features(u64 mask_disallowed)
+{
+ int i;
+
+ fprintf(stderr, "Filesystem features available:\n");
+ for (i = 0; i < ARRAY_SIZE(mkfs_features) - 1; i++) {
+ char *is_default = "";
+
+ if (mkfs_features[i].flag & mask_disallowed)
+ continue;
+ if (mkfs_features[i].flag & BTRFS_MKFS_DEFAULT_FEATURES)
+ is_default = ", default";
+ fprintf(stderr, "%-20s- %s (0x%llx%s)\n",
+ mkfs_features[i].name,
+ mkfs_features[i].desc,
+ mkfs_features[i].flag,
+ is_default);
+ }
+}
+
+/*
+ * Return NULL if all features were parsed fine, otherwise return the name of
+ * the first unparsed.
+ */
+char* btrfs_parse_fs_features(char *namelist, u64 *flags)
+{
+ char *this_char;
+ char *save_ptr = NULL; /* Satisfy static checkers */
+
+ for (this_char = strtok_r(namelist, ",", &save_ptr);
+ this_char != NULL;
+ this_char = strtok_r(NULL, ",", &save_ptr)) {
+ if (parse_one_fs_feature(this_char, flags))
+ return this_char;
+ }
+
+ return NULL;
+}
+
+u64 btrfs_device_size(int fd, struct stat *st)
+{
+ u64 size;
+ if (S_ISREG(st->st_mode)) {
+ return st->st_size;
+ }
+ if (!S_ISBLK(st->st_mode)) {
+ return 0;
+ }
+ if (ioctl(fd, BLKGETSIZE64, &size) >= 0) {
+ return size;
+ }
+ return 0;
+}
+
+static int zero_blocks(int fd, off_t start, size_t len)
+{
+ char *buf = malloc(len);
+ int ret = 0;
+ ssize_t written;
+
+ if (!buf)
+ return -ENOMEM;
+ memset(buf, 0, len);
+ written = pwrite(fd, buf, len, start);
+ if (written != len)
+ ret = -EIO;
+ free(buf);
+ return ret;
+}
+
+#define ZERO_DEV_BYTES (2 * 1024 * 1024)
+
+/* don't write outside the device by clamping the region to the device size */
+static int zero_dev_clamped(int fd, off_t start, ssize_t len, u64 dev_size)
+{
+ off_t end = max(start, start + len);
+
+#ifdef __sparc__
+ /* and don't overwrite the disk labels on sparc */
+ start = max(start, 1024);
+ end = max(end, 1024);
+#endif
+
+ start = min_t(u64, start, dev_size);
+ end = min_t(u64, end, dev_size);
+
+ return zero_blocks(fd, start, end - start);
+}
+
+int btrfs_add_to_fsid(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, int fd, char *path,
+ u64 device_total_bytes, u32 io_width, u32 io_align,
+ u32 sectorsize)
+{
+ struct btrfs_super_block *disk_super;
+ struct btrfs_super_block *super = root->fs_info->super_copy;
+ struct btrfs_device *device;
+ struct btrfs_dev_item *dev_item;
+ char *buf = NULL;
+ u64 fs_total_bytes;
+ u64 num_devs;
+ int ret;
+
+ device_total_bytes = (device_total_bytes / sectorsize) * sectorsize;
+
+ device = kzalloc(sizeof(*device), GFP_NOFS);
+ if (!device)
+ goto err_nomem;
+ buf = kzalloc(sectorsize, GFP_NOFS);
+ if (!buf)
+ goto err_nomem;
+ BUG_ON(sizeof(*disk_super) > sectorsize);
+
+ disk_super = (struct btrfs_super_block *)buf;
+ dev_item = &disk_super->dev_item;
+
+ uuid_generate(device->uuid);
+ device->devid = 0;
+ device->type = 0;
+ device->io_width = io_width;
+ device->io_align = io_align;
+ device->sector_size = sectorsize;
+ device->fd = fd;
+ device->writeable = 1;
+ device->total_bytes = device_total_bytes;
+ device->bytes_used = 0;
+ device->total_ios = 0;
+ device->dev_root = root->fs_info->dev_root;
+ device->name = strdup(path);
+ if (!device->name)
+ goto err_nomem;
+
+ INIT_LIST_HEAD(&device->dev_list);
+ ret = btrfs_add_device(trans, root, device);
+ BUG_ON(ret);
+
+ fs_total_bytes = btrfs_super_total_bytes(super) + device_total_bytes;
+ btrfs_set_super_total_bytes(super, fs_total_bytes);
+
+ num_devs = btrfs_super_num_devices(super) + 1;
+ btrfs_set_super_num_devices(super, num_devs);
+
+ memcpy(disk_super, super, sizeof(*disk_super));
+
+ btrfs_set_super_bytenr(disk_super, BTRFS_SUPER_INFO_OFFSET);
+ btrfs_set_stack_device_id(dev_item, device->devid);
+ btrfs_set_stack_device_type(dev_item, device->type);
+ btrfs_set_stack_device_io_align(dev_item, device->io_align);
+ btrfs_set_stack_device_io_width(dev_item, device->io_width);
+ btrfs_set_stack_device_sector_size(dev_item, device->sector_size);
+ btrfs_set_stack_device_total_bytes(dev_item, device->total_bytes);
+ btrfs_set_stack_device_bytes_used(dev_item, device->bytes_used);
+ memcpy(&dev_item->uuid, device->uuid, BTRFS_UUID_SIZE);
+
+ ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET);
+ BUG_ON(ret != sectorsize);
+
+ kfree(buf);
+ list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
+ device->fs_devices = root->fs_info->fs_devices;
+ return 0;
+
+err_nomem:
+ kfree(device);
+ kfree(buf);
+ return -ENOMEM;
+}
+
+static int btrfs_wipe_existing_sb(int fd)
+{
+ const char *off = NULL;
+ size_t len = 0;
+ loff_t offset;
+ char buf[BUFSIZ];
+ int ret = 0;
+ blkid_probe pr = NULL;
+
+ pr = blkid_new_probe();
+ if (!pr)
+ return -1;
+
+ if (blkid_probe_set_device(pr, fd, 0, 0)) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = blkid_probe_lookup_value(pr, "SBMAGIC_OFFSET", &off, NULL);
+ if (!ret)
+ ret = blkid_probe_lookup_value(pr, "SBMAGIC", NULL, &len);
+
+ if (ret || len == 0 || off == NULL) {
+ /*
+ * If lookup fails, the probe did not find any values, eg. for
+ * a file image or a loop device. Soft error.
+ */
+ ret = 1;
+ goto out;
+ }
+
+ offset = strtoll(off, NULL, 10);
+ if (len > sizeof(buf))
+ len = sizeof(buf);
+
+ memset(buf, 0, len);
+ ret = pwrite(fd, buf, len, offset);
+ if (ret != len) {
+ fprintf(stderr, "ERROR: cannot wipe existing superblock\n");
+ ret = -1;
+ }
+ fsync(fd);
+
+out:
+ blkid_free_probe(pr);
+ return ret;
+}
+
+int btrfs_prepare_device(int fd, char *file, int zero_end, u64 *block_count_ret,
+ u64 max_block_count, int discard)
+{
+ u64 block_count;
+ struct stat st;
+ int i, ret;
+
+ ret = fstat(fd, &st);
+ if (ret < 0) {
+ fprintf(stderr, "unable to stat %s\n", file);
+ return 1;
+ }
+
+ block_count = btrfs_device_size(fd, &st);
+ if (block_count == 0) {
+ fprintf(stderr, "unable to find %s size\n", file);
+ return 1;
+ }
+ if (max_block_count)
+ block_count = min(block_count, max_block_count);
+
+ if (discard) {
+ /*
+ * We intentionally ignore errors from the discard ioctl. It
+ * is not necessary for the mkfs functionality but just an
+ * optimization.
+ */
+ if (discard_range(fd, 0, 0) == 0) {
+ printf("Performing full device TRIM (%s) ...\n",
+ pretty_size(block_count));
+ discard_blocks(fd, 0, block_count);
+ }
+ }
+
+ ret = zero_dev_clamped(fd, 0, ZERO_DEV_BYTES, block_count);
+ for (i = 0 ; !ret && i < BTRFS_SUPER_MIRROR_MAX; i++)
+ ret = zero_dev_clamped(fd, btrfs_sb_offset(i),
+ BTRFS_SUPER_INFO_SIZE, block_count);
+ if (!ret && zero_end)
+ ret = zero_dev_clamped(fd, block_count - ZERO_DEV_BYTES,
+ ZERO_DEV_BYTES, block_count);
+
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: failed to zero device '%s' - %s\n",
+ file, strerror(-ret));
+ return 1;
+ }
+
+ ret = btrfs_wipe_existing_sb(fd);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: cannot wipe superblocks on '%s'\n",
+ file);
+ return 1;
+ }
+
+ *block_count_ret = block_count;
+ return 0;
+}
+
+int btrfs_make_root_dir(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid)
+{
+ int ret;
+ struct btrfs_inode_item inode_item;
+ time_t now = time(NULL);
+
+ memset(&inode_item, 0, sizeof(inode_item));
+ btrfs_set_stack_inode_generation(&inode_item, trans->transid);
+ btrfs_set_stack_inode_size(&inode_item, 0);
+ btrfs_set_stack_inode_nlink(&inode_item, 1);
+ btrfs_set_stack_inode_nbytes(&inode_item, root->nodesize);
+ btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
+ btrfs_set_stack_timespec_sec(&inode_item.atime, now);
+ btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
+ btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
+ btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
+ btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
+ btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
+ btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
+ btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
+
+ if (root->fs_info->tree_root == root)
+ btrfs_set_super_root_dir(root->fs_info->super_copy, objectid);
+
+ ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
+ if (ret)
+ goto error;
+
+ ret = btrfs_insert_inode_ref(trans, root, "..", 2, objectid, objectid, 0);
+ if (ret)
+ goto error;
+
+ btrfs_set_root_dirid(&root->root_item, objectid);
+ ret = 0;
+error:
+ return ret;
+}
+
+/*
+ * checks if a path is a block device node
+ * Returns negative errno on failure, otherwise
+ * returns 1 for blockdev, 0 for not-blockdev
+ */
+int is_block_device(const char *path)
+{
+ struct stat statbuf;
+
+ if (stat(path, &statbuf) < 0)
+ return -errno;
+
+ return !!S_ISBLK(statbuf.st_mode);
+}
+
+/*
+ * check if given path is a mount point
+ * return 1 if yes. 0 if no. -1 for error
+ */
+int is_mount_point(const char *path)
+{
+ FILE *f;
+ struct mntent *mnt;
+ int ret = 0;
+
+ f = setmntent("/proc/self/mounts", "r");
+ if (f == NULL)
+ return -1;
+
+ while ((mnt = getmntent(f)) != NULL) {
+ if (strcmp(mnt->mnt_dir, path))
+ continue;
+ ret = 1;
+ break;
+ }
+ endmntent(f);
+ return ret;
+}
+
+static int is_reg_file(const char *path)
+{
+ struct stat statbuf;
+
+ if (stat(path, &statbuf) < 0)
+ return -errno;
+ return S_ISREG(statbuf.st_mode);
+}
+
+/*
+ * This function checks if the given input parameter is
+ * an uuid or a path
+ * return <0 : some error in the given input
+ * return BTRFS_ARG_UNKNOWN: unknown input
+ * return BTRFS_ARG_UUID: given input is uuid
+ * return BTRFS_ARG_MNTPOINT: given input is path
+ * return BTRFS_ARG_REG: given input is regular file
+ * return BTRFS_ARG_BLKDEV: given input is block device
+ */
+int check_arg_type(const char *input)
+{
+ uuid_t uuid;
+ char path[PATH_MAX];
+
+ if (!input)
+ return -EINVAL;
+
+ if (realpath(input, path)) {
+ if (is_block_device(path) == 1)
+ return BTRFS_ARG_BLKDEV;
+
+ if (is_mount_point(path) == 1)
+ return BTRFS_ARG_MNTPOINT;
+
+ if (is_reg_file(path))
+ return BTRFS_ARG_REG;
+
+ return BTRFS_ARG_UNKNOWN;
+ }
+
+ if (strlen(input) == (BTRFS_UUID_UNPARSED_SIZE - 1) &&
+ !uuid_parse(input, uuid))
+ return BTRFS_ARG_UUID;
+
+ return BTRFS_ARG_UNKNOWN;
+}
+
+/*
+ * Find the mount point for a mounted device.
+ * On success, returns 0 with mountpoint in *mp.
+ * On failure, returns -errno (not mounted yields -EINVAL)
+ * Is noisy on failures, expects to be given a mounted device.
+ */
+int get_btrfs_mount(const char *dev, char *mp, size_t mp_size)
+{
+ int ret;
+ int fd = -1;
+
+ ret = is_block_device(dev);
+ if (ret <= 0) {
+ if (!ret) {
+ fprintf(stderr, "%s is not a block device\n", dev);
+ ret = -EINVAL;
+ } else {
+ fprintf(stderr, "Could not check %s: %s\n",
+ dev, strerror(-ret));
+ }
+ goto out;
+ }
+
+ fd = open(dev, O_RDONLY);
+ if (fd < 0) {
+ ret = -errno;
+ fprintf(stderr, "Could not open %s: %s\n", dev, strerror(errno));
+ goto out;
+ }
+
+ ret = check_mounted_where(fd, dev, mp, mp_size, NULL);
+ if (!ret) {
+ ret = -EINVAL;
+ } else { /* mounted, all good */
+ ret = 0;
+ }
+out:
+ if (fd != -1)
+ close(fd);
+ return ret;
+}
+
+/*
+ * Given a pathname, return a filehandle to:
+ * the original pathname or,
+ * if the pathname is a mounted btrfs device, to its mountpoint.
+ *
+ * On error, return -1, errno should be set.
+ */
+int open_path_or_dev_mnt(const char *path, DIR **dirstream, int verbose)
+{
+ char mp[PATH_MAX];
+ int ret;
+
+ if (is_block_device(path)) {
+ ret = get_btrfs_mount(path, mp, sizeof(mp));
+ if (ret < 0) {
+ /* not a mounted btrfs dev */
+ error_on(verbose, "'%s' is not a mounted btrfs device",
+ path);
+ errno = EINVAL;
+ return -1;
+ }
+ ret = open_file_or_dir(mp, dirstream);
+ error_on(verbose && ret < 0, "can't access '%s': %s",
+ path, strerror(errno));
+ } else {
+ ret = btrfs_open_dir(path, dirstream, 1);
+ }
+
+ return ret;
+}
+
+/*
+ * Do the following checks before calling open_file_or_dir():
+ * 1: path is in a btrfs filesystem
+ * 2: path is a directory
+ */
+int btrfs_open_dir(const char *path, DIR **dirstream, int verbose)
+{
+ struct statfs stfs;
+ struct stat st;
+ int ret;
+
+ if (statfs(path, &stfs) != 0) {
+ if (verbose)
+ fprintf(stderr,
+ "ERROR: can't access '%s': %s\n",
+ path, strerror(errno));
+ return -1;
+ }
+
+ if (stfs.f_type != BTRFS_SUPER_MAGIC) {
+ if (verbose)
+ fprintf(stderr,
+ "ERROR: not a btrfs filesystem: %s\n",
+ path);
+ return -2;
+ }
+
+ if (stat(path, &st) != 0) {
+ if (verbose)
+ fprintf(stderr,
+ "ERROR: can't access '%s': %s\n",
+ path, strerror(errno));
+ return -1;
+ }
+
+ if (!S_ISDIR(st.st_mode)) {
+ if (verbose)
+ fprintf(stderr,
+ "ERROR: not a directory: %s\n",
+ path);
+ return -3;
+ }
+
+ ret = open_file_or_dir(path, dirstream);
+ if (ret < 0) {
+ if (verbose)
+ fprintf(stderr,
+ "ERROR: can't access '%s': %s\n",
+ path, strerror(errno));
+ }
+
+ return ret;
+}
+
+/* checks if a device is a loop device */
+static int is_loop_device (const char* device) {
+ struct stat statbuf;
+
+ if(stat(device, &statbuf) < 0)
+ return -errno;
+
+ return (S_ISBLK(statbuf.st_mode) &&
+ MAJOR(statbuf.st_rdev) == LOOP_MAJOR);
+}
+
+/*
+ * Takes a loop device path (e.g. /dev/loop0) and returns
+ * the associated file (e.g. /images/my_btrfs.img) using
+ * loopdev API
+ */
+static int resolve_loop_device_with_loopdev(const char* loop_dev, char* loop_file)
+{
+ int fd;
+ int ret;
+ struct loop_info64 lo64;
+
+ fd = open(loop_dev, O_RDONLY | O_NONBLOCK);
+ if (fd < 0)
+ return -errno;
+ ret = ioctl(fd, LOOP_GET_STATUS64, &lo64);
+ if (ret < 0) {
+ ret = -errno;
+ goto out;
+ }
+
+ memcpy(loop_file, lo64.lo_file_name, sizeof(lo64.lo_file_name));
+ loop_file[sizeof(lo64.lo_file_name)] = 0;
+
+out:
+ close(fd);
+
+ return ret;
+}
+
+/* Takes a loop device path (e.g. /dev/loop0) and returns
+ * the associated file (e.g. /images/my_btrfs.img) */
+static int resolve_loop_device(const char* loop_dev, char* loop_file,
+ int max_len)
+{
+ int ret;
+ FILE *f;
+ char fmt[20];
+ char p[PATH_MAX];
+ char real_loop_dev[PATH_MAX];
+
+ if (!realpath(loop_dev, real_loop_dev))
+ return -errno;
+ snprintf(p, PATH_MAX, "/sys/block/%s/loop/backing_file", strrchr(real_loop_dev, '/'));
+ if (!(f = fopen(p, "r"))) {
+ if (errno == ENOENT)
+ /*
+ * It's possibly a partitioned loop device, which is
+ * resolvable with loopdev API.
+ */
+ return resolve_loop_device_with_loopdev(loop_dev, loop_file);
+ return -errno;
+ }
+
+ snprintf(fmt, 20, "%%%i[^\n]", max_len-1);
+ ret = fscanf(f, fmt, loop_file);
+ fclose(f);
+ if (ret == EOF)
+ return -errno;
+
+ return 0;
+}
+
+/*
+ * Checks whether a and b are identical or device
+ * files associated with the same block device
+ */
+static int is_same_blk_file(const char* a, const char* b)
+{
+ struct stat st_buf_a, st_buf_b;
+ char real_a[PATH_MAX];
+ char real_b[PATH_MAX];
+
+ if (!realpath(a, real_a))
+ strncpy_null(real_a, a);
+
+ if (!realpath(b, real_b))
+ strncpy_null(real_b, b);
+
+ /* Identical path? */
+ if (strcmp(real_a, real_b) == 0)
+ return 1;
+
+ if (stat(a, &st_buf_a) < 0 || stat(b, &st_buf_b) < 0) {
+ if (errno == ENOENT)
+ return 0;
+ return -errno;
+ }
+
+ /* Same blockdevice? */
+ if (S_ISBLK(st_buf_a.st_mode) && S_ISBLK(st_buf_b.st_mode) &&
+ st_buf_a.st_rdev == st_buf_b.st_rdev) {
+ return 1;
+ }
+
+ /* Hardlink? */
+ if (st_buf_a.st_dev == st_buf_b.st_dev &&
+ st_buf_a.st_ino == st_buf_b.st_ino) {
+ return 1;
+ }
+
+ return 0;
+}
+
+/* checks if a and b are identical or device
+ * files associated with the same block device or
+ * if one file is a loop device that uses the other
+ * file.
+ */
+static int is_same_loop_file(const char* a, const char* b)
+{
+ char res_a[PATH_MAX];
+ char res_b[PATH_MAX];
+ const char* final_a = NULL;
+ const char* final_b = NULL;
+ int ret;
+
+ /* Resolve a if it is a loop device */
+ if((ret = is_loop_device(a)) < 0) {
+ if (ret == -ENOENT)
+ return 0;
+ return ret;
+ } else if (ret) {
+ ret = resolve_loop_device(a, res_a, sizeof(res_a));
+ if (ret < 0) {
+ if (errno != EPERM)
+ return ret;
+ } else {
+ final_a = res_a;
+ }
+ } else {
+ final_a = a;
+ }
+
+ /* Resolve b if it is a loop device */
+ if ((ret = is_loop_device(b)) < 0) {
+ if (ret == -ENOENT)
+ return 0;
+ return ret;
+ } else if (ret) {
+ ret = resolve_loop_device(b, res_b, sizeof(res_b));
+ if (ret < 0) {
+ if (errno != EPERM)
+ return ret;
+ } else {
+ final_b = res_b;
+ }
+ } else {
+ final_b = b;
+ }
+
+ return is_same_blk_file(final_a, final_b);
+}
+
+/* Checks if a file exists and is a block or regular file*/
+static int is_existing_blk_or_reg_file(const char* filename)
+{
+ struct stat st_buf;
+
+ if(stat(filename, &st_buf) < 0) {
+ if(errno == ENOENT)
+ return 0;
+ else
+ return -errno;
+ }
+
+ return (S_ISBLK(st_buf.st_mode) || S_ISREG(st_buf.st_mode));
+}
+
+/* Checks if a file is used (directly or indirectly via a loop device)
+ * by a device in fs_devices
+ */
+static int blk_file_in_dev_list(struct btrfs_fs_devices* fs_devices,
+ const char* file)
+{
+ int ret;
+ struct list_head *head;
+ struct list_head *cur;
+ struct btrfs_device *device;
+
+ head = &fs_devices->devices;
+ list_for_each(cur, head) {
+ device = list_entry(cur, struct btrfs_device, dev_list);
+
+ if((ret = is_same_loop_file(device->name, file)))
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * Resolve a pathname to a device mapper node to /dev/mapper/<name>
+ * Returns NULL on invalid input or malloc failure; Other failures
+ * will be handled by the caller using the input pathame.
+ */
+char *canonicalize_dm_name(const char *ptname)
+{
+ FILE *f;
+ size_t sz;
+ char path[PATH_MAX], name[PATH_MAX], *res = NULL;
+
+ if (!ptname || !*ptname)
+ return NULL;
+
+ snprintf(path, sizeof(path), "/sys/block/%s/dm/name", ptname);
+ if (!(f = fopen(path, "r")))
+ return NULL;
+
+ /* read <name>\n from sysfs */
+ if (fgets(name, sizeof(name), f) && (sz = strlen(name)) > 1) {
+ name[sz - 1] = '\0';
+ snprintf(path, sizeof(path), "/dev/mapper/%s", name);
+
+ if (access(path, F_OK) == 0)
+ res = strdup(path);
+ }
+ fclose(f);
+ return res;
+}
+
+/*
+ * Resolve a pathname to a canonical device node, e.g. /dev/sda1 or
+ * to a device mapper pathname.
+ * Returns NULL on invalid input or malloc failure; Other failures
+ * will be handled by the caller using the input pathame.
+ */
+char *canonicalize_path(const char *path)
+{
+ char *canonical, *p;
+
+ if (!path || !*path)
+ return NULL;
+
+ canonical = realpath(path, NULL);
+ if (!canonical)
+ return strdup(path);
+ p = strrchr(canonical, '/');
+ if (p && strncmp(p, "/dm-", 4) == 0 && isdigit(*(p + 4))) {
+ char *dm = canonicalize_dm_name(p + 1);
+
+ if (dm) {
+ free(canonical);
+ return dm;
+ }
+ }
+ return canonical;
+}
+
+/*
+ * returns 1 if the device was mounted, < 0 on error or 0 if everything
+ * is safe to continue.
+ */
+int check_mounted(const char* file)
+{
+ int fd;
+ int ret;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0) {
+ fprintf (stderr, "check_mounted(): Could not open %s\n", file);
+ return -errno;
+ }
+
+ ret = check_mounted_where(fd, file, NULL, 0, NULL);
+ close(fd);
+
+ return ret;
+}
+
+int check_mounted_where(int fd, const char *file, char *where, int size,
+ struct btrfs_fs_devices **fs_dev_ret)
+{
+ int ret;
+ u64 total_devs = 1;
+ int is_btrfs;
+ struct btrfs_fs_devices *fs_devices_mnt = NULL;
+ FILE *f;
+ struct mntent *mnt;
+
+ /* scan the initial device */
+ ret = btrfs_scan_one_device(fd, file, &fs_devices_mnt,
+ &total_devs, BTRFS_SUPER_INFO_OFFSET, 0);
+ is_btrfs = (ret >= 0);
+
+ /* scan other devices */
+ if (is_btrfs && total_devs > 1) {
+ ret = btrfs_scan_lblkid();
+ if (ret)
+ return ret;
+ }
+
+ /* iterate over the list of currently mountes filesystems */
+ if ((f = setmntent ("/proc/self/mounts", "r")) == NULL)
+ return -errno;
+
+ while ((mnt = getmntent (f)) != NULL) {
+ if(is_btrfs) {
+ if(strcmp(mnt->mnt_type, "btrfs") != 0)
+ continue;
+
+ ret = blk_file_in_dev_list(fs_devices_mnt, mnt->mnt_fsname);
+ } else {
+ /* ignore entries in the mount table that are not
+ associated with a file*/
+ if((ret = is_existing_blk_or_reg_file(mnt->mnt_fsname)) < 0)
+ goto out_mntloop_err;
+ else if(!ret)
+ continue;
+
+ ret = is_same_loop_file(file, mnt->mnt_fsname);
+ }
+
+ if(ret < 0)
+ goto out_mntloop_err;
+ else if(ret)
+ break;
+ }
+
+ /* Did we find an entry in mnt table? */
+ if (mnt && size && where) {
+ strncpy(where, mnt->mnt_dir, size);
+ where[size-1] = 0;
+ }
+ if (fs_dev_ret)
+ *fs_dev_ret = fs_devices_mnt;
+
+ ret = (mnt != NULL);
+
+out_mntloop_err:
+ endmntent (f);
+
+ return ret;
+}
+
+struct pending_dir {
+ struct list_head list;
+ char name[PATH_MAX];
+};
+
+int btrfs_register_one_device(const char *fname)
+{
+ struct btrfs_ioctl_vol_args args;
+ int fd;
+ int ret;
+
+ fd = open("/dev/btrfs-control", O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "failed to open /dev/btrfs-control "
+ "skipping device registration: %s\n",
+ strerror(errno));
+ return -errno;
+ }
+ memset(&args, 0, sizeof(args));
+ strncpy_null(args.name, fname);
+ ret = ioctl(fd, BTRFS_IOC_SCAN_DEV, &args);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: device scan failed '%s' - %s\n",
+ fname, strerror(errno));
+ ret = -errno;
+ }
+ close(fd);
+ return ret;
+}
+
+/*
+ * Register all devices in the fs_uuid list created in the user
+ * space. Ensure btrfs_scan_lblkid() is called before this func.
+ */
+int btrfs_register_all_devices(void)
+{
+ int err;
+ struct btrfs_fs_devices *fs_devices;
+ struct btrfs_device *device;
+ struct list_head *all_uuids;
+
+ all_uuids = btrfs_scanned_uuids();
+
+ list_for_each_entry(fs_devices, all_uuids, list) {
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
+ if (*device->name) {
+ err = btrfs_register_one_device(device->name);
+ if (err < 0)
+ return err;
+ if (err > 0)
+ return -err;
+ }
+ }
+ }
+ return 0;
+}
+
+int btrfs_device_already_in_root(struct btrfs_root *root, int fd,
+ int super_offset)
+{
+ struct btrfs_super_block *disk_super;
+ char *buf;
+ int ret = 0;
+
+ buf = malloc(BTRFS_SUPER_INFO_SIZE);
+ if (!buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = pread(fd, buf, BTRFS_SUPER_INFO_SIZE, super_offset);
+ if (ret != BTRFS_SUPER_INFO_SIZE)
+ goto brelse;
+
+ ret = 0;
+ disk_super = (struct btrfs_super_block *)buf;
+ if (btrfs_super_magic(disk_super) != BTRFS_MAGIC)
+ goto brelse;
+
+ if (!memcmp(disk_super->fsid, root->fs_info->super_copy->fsid,
+ BTRFS_FSID_SIZE))
+ ret = 1;
+brelse:
+ free(buf);
+out:
+ return ret;
+}
+
+/*
+ * Note: this function uses a static per-thread buffer. Do not call this
+ * function more than 10 times within one argument list!
+ */
+const char *pretty_size_mode(u64 size, unsigned mode)
+{
+ static __thread int ps_index = 0;
+ static __thread char ps_array[10][32];
+ char *ret;
+
+ ret = ps_array[ps_index];
+ ps_index++;
+ ps_index %= 10;
+ (void)pretty_size_snprintf(size, ret, 32, mode);
+
+ return ret;
+}
+
+static const char* unit_suffix_binary[] =
+ { "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"};
+static const char* unit_suffix_decimal[] =
+ { "B", "kB", "MB", "GB", "TB", "PB", "EB"};
+
+int pretty_size_snprintf(u64 size, char *str, size_t str_size, unsigned unit_mode)
+{
+ int num_divs;
+ float fraction;
+ u64 base = 0;
+ int mult = 0;
+ const char** suffix = NULL;
+ u64 last_size;
+
+ if (str_size == 0)
+ return 0;
+
+ if ((unit_mode & ~UNITS_MODE_MASK) == UNITS_RAW) {
+ snprintf(str, str_size, "%llu", size);
+ return 0;
+ }
+
+ if ((unit_mode & ~UNITS_MODE_MASK) == UNITS_BINARY) {
+ base = 1024;
+ mult = 1024;
+ suffix = unit_suffix_binary;
+ } else if ((unit_mode & ~UNITS_MODE_MASK) == UNITS_DECIMAL) {
+ base = 1000;
+ mult = 1000;
+ suffix = unit_suffix_decimal;
+ }
+
+ /* Unknown mode */
+ if (!base) {
+ fprintf(stderr, "INTERNAL ERROR: unknown unit base, mode %d\n",
+ unit_mode);
+ assert(0);
+ return -1;
+ }
+
+ num_divs = 0;
+ last_size = size;
+ switch (unit_mode & UNITS_MODE_MASK) {
+ case UNITS_TBYTES: base *= mult; num_divs++;
+ case UNITS_GBYTES: base *= mult; num_divs++;
+ case UNITS_MBYTES: base *= mult; num_divs++;
+ case UNITS_KBYTES: num_divs++;
+ break;
+ case UNITS_BYTES:
+ base = 1;
+ num_divs = 0;
+ break;
+ default:
+ while (size >= mult) {
+ last_size = size;
+ size /= mult;
+ num_divs++;
+ }
+ }
+
+ if (num_divs >= ARRAY_SIZE(unit_suffix_binary)) {
+ str[0] = '\0';
+ printf("INTERNAL ERROR: unsupported unit suffix, index %d\n",
+ num_divs);
+ assert(0);
+ return -1;
+ }
+ fraction = (float)last_size / base;
+
+ return snprintf(str, str_size, "%.2f%s", fraction, suffix[num_divs]);
+}
+
+/*
+ * __strncpy__null - strncpy with null termination
+ * @dest: the target array
+ * @src: the source string
+ * @n: maximum bytes to copy (size of *dest)
+ *
+ * Like strncpy, but ensures destination is null-terminated.
+ *
+ * Copies the string pointed to by src, including the terminating null
+ * byte ('\0'), to the buffer pointed to by dest, up to a maximum
+ * of n bytes. Then ensure that dest is null-terminated.
+ */
+char *__strncpy__null(char *dest, const char *src, size_t n)
+{
+ strncpy(dest, src, n);
+ if (n > 0)
+ dest[n - 1] = '\0';
+ return dest;
+}
+
+/*
+ * Checks to make sure that the label matches our requirements.
+ * Returns:
+ 0 if everything is safe and usable
+ -1 if the label is too long
+ */
+static int check_label(const char *input)
+{
+ int len = strlen(input);
+
+ if (len > BTRFS_LABEL_SIZE - 1) {
+ fprintf(stderr, "ERROR: Label %s is too long (max %d)\n",
+ input, BTRFS_LABEL_SIZE - 1);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int set_label_unmounted(const char *dev, const char *label)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root;
+ int ret;
+
+ ret = check_mounted(dev);
+ if (ret < 0) {
+ fprintf(stderr, "FATAL: error checking %s mount status\n", dev);
+ return -1;
+ }
+ if (ret > 0) {
+ fprintf(stderr, "ERROR: dev %s is mounted, use mount point\n",
+ dev);
+ return -1;
+ }
+
+ /* Open the super_block at the default location
+ * and as read-write.
+ */
+ root = open_ctree(dev, 0, OPEN_CTREE_WRITES);
+ if (!root) /* errors are printed by open_ctree() */
+ return -1;
+
+ trans = btrfs_start_transaction(root, 1);
+ snprintf(root->fs_info->super_copy->label, BTRFS_LABEL_SIZE, "%s",
+ label);
+ btrfs_commit_transaction(trans, root);
+
+ /* Now we close it since we are done. */
+ close_ctree(root);
+ return 0;
+}
+
+static int set_label_mounted(const char *mount_path, const char *label)
+{
+ int fd;
+
+ fd = open(mount_path, O_RDONLY | O_NOATIME);
+ if (fd < 0) {
+ fprintf(stderr, "ERROR: unable to access '%s'\n", mount_path);
+ return -1;
+ }
+
+ if (ioctl(fd, BTRFS_IOC_SET_FSLABEL, label) < 0) {
+ fprintf(stderr, "ERROR: unable to set label %s\n",
+ strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ close(fd);
+ return 0;
+}
+
+int get_label_unmounted(const char *dev, char *label)
+{
+ struct btrfs_root *root;
+ int ret;
+
+ ret = check_mounted(dev);
+ if (ret < 0) {
+ fprintf(stderr, "FATAL: error checking %s mount status\n", dev);
+ return -1;
+ }
+
+ /* Open the super_block at the default location
+ * and as read-only.
+ */
+ root = open_ctree(dev, 0, 0);
+ if(!root)
+ return -1;
+
+ memcpy(label, root->fs_info->super_copy->label, BTRFS_LABEL_SIZE);
+
+ /* Now we close it since we are done. */
+ close_ctree(root);
+ return 0;
+}
+
+/*
+ * If a partition is mounted, try to get the filesystem label via its
+ * mounted path rather than device. Return the corresponding error
+ * the user specified the device path.
+ */
+int get_label_mounted(const char *mount_path, char *labelp)
+{
+ char label[BTRFS_LABEL_SIZE];
+ int fd;
+ int ret;
+
+ fd = open(mount_path, O_RDONLY | O_NOATIME);
+ if (fd < 0) {
+ fprintf(stderr, "ERROR: unable to access '%s'\n", mount_path);
+ return -1;
+ }
+
+ memset(label, '\0', sizeof(label));
+ ret = ioctl(fd, BTRFS_IOC_GET_FSLABEL, label);
+ if (ret < 0) {
+ if (errno != ENOTTY)
+ fprintf(stderr, "ERROR: unable to get label %s\n",
+ strerror(errno));
+ ret = -errno;
+ close(fd);
+ return ret;
+ }
+
+ strncpy(labelp, label, sizeof(label));
+ close(fd);
+ return 0;
+}
+
+int get_label(const char *btrfs_dev, char *label)
+{
+ int ret;
+
+ ret = is_existing_blk_or_reg_file(btrfs_dev);
+ if (!ret)
+ ret = get_label_mounted(btrfs_dev, label);
+ else if (ret > 0)
+ ret = get_label_unmounted(btrfs_dev, label);
+
+ return ret;
+}
+
+int set_label(const char *btrfs_dev, const char *label)
+{
+ int ret;
+
+ if (check_label(label))
+ return -1;
+
+ ret = is_existing_blk_or_reg_file(btrfs_dev);
+ if (!ret)
+ ret = set_label_mounted(btrfs_dev, label);
+ else if (ret > 0)
+ ret = set_label_unmounted(btrfs_dev, label);
+
+ return ret;
+}
+
+/*
+ * Unsafe subvolume check.
+ *
+ * This only checks ino == BTRFS_FIRST_FREE_OBJECTID, even it is not in a
+ * btrfs mount point.
+ * Must use together with other reliable method like btrfs ioctl.
+ */
+static int __is_subvol(const char *path)
+{
+ struct stat st;
+ int ret;
+
+ ret = lstat(path, &st);
+ if (ret < 0)
+ return ret;
+
+ return st.st_ino == BTRFS_FIRST_FREE_OBJECTID;
+}
+
+/*
+ * A not-so-good version fls64. No fascinating optimization since
+ * no one except parse_size use it
+ */
+static int fls64(u64 x)
+{
+ int i;
+
+ for (i = 0; i <64; i++)
+ if (x << i & (1ULL << 63))
+ return 64 - i;
+ return 64 - i;
+}
+
+u64 parse_size(char *s)
+{
+ char c;
+ char *endptr;
+ u64 mult = 1;
+ u64 ret;
+
+ if (!s) {
+ fprintf(stderr, "ERROR: Size value is empty\n");
+ exit(1);
+ }
+ if (s[0] == '-') {
+ fprintf(stderr,
+ "ERROR: Size value '%s' is less equal than 0\n", s);
+ exit(1);
+ }
+ ret = strtoull(s, &endptr, 10);
+ if (endptr == s) {
+ fprintf(stderr, "ERROR: Size value '%s' is invalid\n", s);
+ exit(1);
+ }
+ if (endptr[0] && endptr[1]) {
+ fprintf(stderr, "ERROR: Illegal suffix contains character '%c' in wrong position\n",
+ endptr[1]);
+ exit(1);
+ }
+ /*
+ * strtoll returns LLONG_MAX when overflow, if this happens,
+ * need to call strtoull to get the real size
+ */
+ if (errno == ERANGE && ret == ULLONG_MAX) {
+ fprintf(stderr,
+ "ERROR: Size value '%s' is too large for u64\n", s);
+ exit(1);
+ }
+ if (endptr[0]) {
+ c = tolower(endptr[0]);
+ switch (c) {
+ case 'e':
+ mult *= 1024;
+ /* fallthrough */
+ case 'p':
+ mult *= 1024;
+ /* fallthrough */
+ case 't':
+ mult *= 1024;
+ /* fallthrough */
+ case 'g':
+ mult *= 1024;
+ /* fallthrough */
+ case 'm':
+ mult *= 1024;
+ /* fallthrough */
+ case 'k':
+ mult *= 1024;
+ /* fallthrough */
+ case 'b':
+ break;
+ default:
+ fprintf(stderr, "ERROR: Unknown size descriptor '%c'\n",
+ c);
+ exit(1);
+ }
+ }
+ /* Check whether ret * mult overflow */
+ if (fls64(ret) + fls64(mult) - 1 > 64) {
+ fprintf(stderr,
+ "ERROR: Size value '%s' is too large for u64\n", s);
+ exit(1);
+ }
+ ret *= mult;
+ return ret;
+}
+
+u64 parse_qgroupid(const char *p)
+{
+ char *s = strchr(p, '/');
+ const char *ptr_src_end = p + strlen(p);
+ char *ptr_parse_end = NULL;
+ u64 level;
+ u64 id;
+ int fd;
+ int ret = 0;
+
+ if (p[0] == '/')
+ goto path;
+
+ /* Numeric format like '0/257' is the primary case */
+ if (!s) {
+ id = strtoull(p, &ptr_parse_end, 10);
+ if (ptr_parse_end != ptr_src_end)
+ goto path;
+ return id;
+ }
+ level = strtoull(p, &ptr_parse_end, 10);
+ if (ptr_parse_end != s)
+ goto path;
+
+ id = strtoull(s + 1, &ptr_parse_end, 10);
+ if (ptr_parse_end != ptr_src_end)
+ goto path;
+
+ return (level << BTRFS_QGROUP_LEVEL_SHIFT) | id;
+
+path:
+ /* Path format like subv at 'my_subvol' is the fallback case */
+ ret = __is_subvol(p);
+ if (ret < 0 || !ret)
+ goto err;
+ fd = open(p, O_RDONLY);
+ if (fd < 0)
+ goto err;
+ ret = lookup_ino_rootid(fd, &id);
+ close(fd);
+ if (ret < 0)
+ goto err;
+ return id;
+
+err:
+ fprintf(stderr, "ERROR: invalid qgroupid or subvolume path: %s\n", p);
+ exit(-1);
+}
+
+int open_file_or_dir3(const char *fname, DIR **dirstream, int open_flags)
+{
+ int ret;
+ struct stat st;
+ int fd;
+
+ ret = stat(fname, &st);
+ if (ret < 0) {
+ return -1;
+ }
+ if (S_ISDIR(st.st_mode)) {
+ *dirstream = opendir(fname);
+ if (!*dirstream)
+ return -1;
+ fd = dirfd(*dirstream);
+ } else if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) {
+ fd = open(fname, open_flags);
+ } else {
+ /*
+ * we set this on purpose, in case the caller output
+ * strerror(errno) as success
+ */
+ errno = EINVAL;
+ return -1;
+ }
+ if (fd < 0) {
+ fd = -1;
+ if (*dirstream) {
+ closedir(*dirstream);
+ *dirstream = NULL;
+ }
+ }
+ return fd;
+}
+
+int open_file_or_dir(const char *fname, DIR **dirstream)
+{
+ return open_file_or_dir3(fname, dirstream, O_RDWR);
+}
+
+void close_file_or_dir(int fd, DIR *dirstream)
+{
+ if (dirstream)
+ closedir(dirstream);
+ else if (fd >= 0)
+ close(fd);
+}
+
+int get_device_info(int fd, u64 devid,
+ struct btrfs_ioctl_dev_info_args *di_args)
+{
+ int ret;
+
+ di_args->devid = devid;
+ memset(&di_args->uuid, '\0', sizeof(di_args->uuid));
+
+ ret = ioctl(fd, BTRFS_IOC_DEV_INFO, di_args);
+ return ret < 0 ? -errno : 0;
+}
+
+static u64 find_max_device_id(struct btrfs_ioctl_search_args *search_args,
+ int nr_items)
+{
+ struct btrfs_dev_item *dev_item;
+ char *buf = search_args->buf;
+
+ buf += (nr_items - 1) * (sizeof(struct btrfs_ioctl_search_header)
+ + sizeof(struct btrfs_dev_item));
+ buf += sizeof(struct btrfs_ioctl_search_header);
+
+ dev_item = (struct btrfs_dev_item *)buf;
+
+ return btrfs_stack_device_id(dev_item);
+}
+
+static int search_chunk_tree_for_fs_info(int fd,
+ struct btrfs_ioctl_fs_info_args *fi_args)
+{
+ int ret;
+ int max_items;
+ u64 start_devid = 1;
+ struct btrfs_ioctl_search_args search_args;
+ struct btrfs_ioctl_search_key *search_key = &search_args.key;
+
+ fi_args->num_devices = 0;
+
+ max_items = BTRFS_SEARCH_ARGS_BUFSIZE
+ / (sizeof(struct btrfs_ioctl_search_header)
+ + sizeof(struct btrfs_dev_item));
+
+ search_key->tree_id = BTRFS_CHUNK_TREE_OBJECTID;
+ search_key->min_objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ search_key->max_objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ search_key->min_type = BTRFS_DEV_ITEM_KEY;
+ search_key->max_type = BTRFS_DEV_ITEM_KEY;
+ search_key->min_transid = 0;
+ search_key->max_transid = (u64)-1;
+ search_key->nr_items = max_items;
+ search_key->max_offset = (u64)-1;
+
+again:
+ search_key->min_offset = start_devid;
+
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &search_args);
+ if (ret < 0)
+ return -errno;
+
+ fi_args->num_devices += (u64)search_key->nr_items;
+
+ if (search_key->nr_items == max_items) {
+ start_devid = find_max_device_id(&search_args,
+ search_key->nr_items) + 1;
+ goto again;
+ }
+
+ /* get the lastest max_id to stay consistent with the num_devices */
+ if (search_key->nr_items == 0)
+ /*
+ * last tree_search returns an empty buf, use the devid of
+ * the last dev_item of the previous tree_search
+ */
+ fi_args->max_id = start_devid - 1;
+ else
+ fi_args->max_id = find_max_device_id(&search_args,
+ search_key->nr_items);
+
+ return 0;
+}
+
+/*
+ * For a given path, fill in the ioctl fs_ and info_ args.
+ * If the path is a btrfs mountpoint, fill info for all devices.
+ * If the path is a btrfs device, fill in only that device.
+ *
+ * The path provided must be either on a mounted btrfs fs,
+ * or be a mounted btrfs device.
+ *
+ * Returns 0 on success, or a negative errno.
+ */
+int get_fs_info(char *path, struct btrfs_ioctl_fs_info_args *fi_args,
+ struct btrfs_ioctl_dev_info_args **di_ret)
+{
+ int fd = -1;
+ int ret = 0;
+ int ndevs = 0;
+ int i = 0;
+ int replacing = 0;
+ struct btrfs_fs_devices *fs_devices_mnt = NULL;
+ struct btrfs_ioctl_dev_info_args *di_args;
+ struct btrfs_ioctl_dev_info_args tmp;
+ char mp[PATH_MAX];
+ DIR *dirstream = NULL;
+
+ memset(fi_args, 0, sizeof(*fi_args));
+
+ if (is_block_device(path) == 1) {
+ struct btrfs_super_block *disk_super;
+ char buf[BTRFS_SUPER_INFO_SIZE];
+ u64 devid;
+
+ /* Ensure it's mounted, then set path to the mountpoint */
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ ret = -errno;
+ fprintf(stderr, "Couldn't open %s: %s\n",
+ path, strerror(errno));
+ goto out;
+ }
+ ret = check_mounted_where(fd, path, mp, sizeof(mp),
+ &fs_devices_mnt);
+ if (!ret) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (ret < 0)
+ goto out;
+ path = mp;
+ /* Only fill in this one device */
+ fi_args->num_devices = 1;
+
+ disk_super = (struct btrfs_super_block *)buf;
+ ret = btrfs_read_dev_super(fd, disk_super,
+ BTRFS_SUPER_INFO_OFFSET, 0);
+ if (ret < 0) {
+ ret = -EIO;
+ goto out;
+ }
+ devid = btrfs_stack_device_id(&disk_super->dev_item);
+
+ fi_args->max_id = devid;
+ i = devid;
+
+ memcpy(fi_args->fsid, fs_devices_mnt->fsid, BTRFS_FSID_SIZE);
+ close(fd);
+ }
+
+ /* at this point path must not be for a block device */
+ fd = open_file_or_dir(path, &dirstream);
+ if (fd < 0) {
+ ret = -errno;
+ goto out;
+ }
+
+ /* fill in fi_args if not just a single device */
+ if (fi_args->num_devices != 1) {
+ ret = ioctl(fd, BTRFS_IOC_FS_INFO, fi_args);
+ if (ret < 0) {
+ ret = -errno;
+ goto out;
+ }
+
+ /*
+ * The fs_args->num_devices does not include seed devices
+ */
+ ret = search_chunk_tree_for_fs_info(fd, fi_args);
+ if (ret)
+ goto out;
+
+ /*
+ * search_chunk_tree_for_fs_info() will lacks the devid 0
+ * so manual probe for it here.
+ */
+ ret = get_device_info(fd, 0, &tmp);
+ if (!ret) {
+ fi_args->num_devices++;
+ ndevs++;
+ replacing = 1;
+ if (i == 0)
+ i++;
+ }
+ }
+
+ if (!fi_args->num_devices)
+ goto out;
+
+ di_args = *di_ret = malloc((fi_args->num_devices) * sizeof(*di_args));
+ if (!di_args) {
+ ret = -errno;
+ goto out;
+ }
+
+ if (replacing)
+ memcpy(di_args, &tmp, sizeof(tmp));
+ for (; i <= fi_args->max_id; ++i) {
+ ret = get_device_info(fd, i, &di_args[ndevs]);
+ if (ret == -ENODEV)
+ continue;
+ if (ret)
+ goto out;
+ ndevs++;
+ }
+
+ /*
+ * only when the only dev we wanted to find is not there then
+ * let any error be returned
+ */
+ if (fi_args->num_devices != 1) {
+ BUG_ON(ndevs == 0);
+ ret = 0;
+ }
+
+out:
+ close_file_or_dir(fd, dirstream);
+ return ret;
+}
+
+#define isoctal(c) (((c) & ~7) == '0')
+
+static inline void translate(char *f, char *t)
+{
+ while (*f != '\0') {
+ if (*f == '\\' &&
+ isoctal(f[1]) && isoctal(f[2]) && isoctal(f[3])) {
+ *t++ = 64*(f[1] & 7) + 8*(f[2] & 7) + (f[3] & 7);
+ f += 4;
+ } else
+ *t++ = *f++;
+ }
+ *t = '\0';
+ return;
+}
+
+/*
+ * Checks if the swap device.
+ * Returns 1 if swap device, < 0 on error or 0 if not swap device.
+ */
+static int is_swap_device(const char *file)
+{
+ FILE *f;
+ struct stat st_buf;
+ dev_t dev;
+ ino_t ino = 0;
+ char tmp[PATH_MAX];
+ char buf[PATH_MAX];
+ char *cp;
+ int ret = 0;
+
+ if (stat(file, &st_buf) < 0)
+ return -errno;
+ if (S_ISBLK(st_buf.st_mode))
+ dev = st_buf.st_rdev;
+ else if (S_ISREG(st_buf.st_mode)) {
+ dev = st_buf.st_dev;
+ ino = st_buf.st_ino;
+ } else
+ return 0;
+
+ if ((f = fopen("/proc/swaps", "r")) == NULL)
+ return 0;
+
+ /* skip the first line */
+ if (fgets(tmp, sizeof(tmp), f) == NULL)
+ goto out;
+
+ while (fgets(tmp, sizeof(tmp), f) != NULL) {
+ if ((cp = strchr(tmp, ' ')) != NULL)
+ *cp = '\0';
+ if ((cp = strchr(tmp, '\t')) != NULL)
+ *cp = '\0';
+ translate(tmp, buf);
+ if (stat(buf, &st_buf) != 0)
+ continue;
+ if (S_ISBLK(st_buf.st_mode)) {
+ if (dev == st_buf.st_rdev) {
+ ret = 1;
+ break;
+ }
+ } else if (S_ISREG(st_buf.st_mode)) {
+ if (dev == st_buf.st_dev && ino == st_buf.st_ino) {
+ ret = 1;
+ break;
+ }
+ }
+ }
+
+out:
+ fclose(f);
+
+ return ret;
+}
+
+/*
+ * Check for existing filesystem or partition table on device.
+ * Returns:
+ * 1 for existing fs or partition
+ * 0 for nothing found
+ * -1 for internal error
+ */
+static int
+check_overwrite(
+ char *device)
+{
+ const char *type;
+ blkid_probe pr = NULL;
+ int ret;
+ blkid_loff_t size;
+
+ if (!device || !*device)
+ return 0;
+
+ ret = -1; /* will reset on success of all setup calls */
+
+ pr = blkid_new_probe_from_filename(device);
+ if (!pr)
+ goto out;
+
+ size = blkid_probe_get_size(pr);
+ if (size < 0)
+ goto out;
+
+ /* nothing to overwrite on a 0-length device */
+ if (size == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = blkid_probe_enable_partitions(pr, 1);
+ if (ret < 0)
+ goto out;
+
+ ret = blkid_do_fullprobe(pr);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * Blkid returns 1 for nothing found and 0 when it finds a signature,
+ * but we want the exact opposite, so reverse the return value here.
+ *
+ * In addition print some useful diagnostics about what actually is
+ * on the device.
+ */
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ if (!blkid_probe_lookup_value(pr, "TYPE", &type, NULL)) {
+ fprintf(stderr,
+ "%s appears to contain an existing "
+ "filesystem (%s).\n", device, type);
+ } else if (!blkid_probe_lookup_value(pr, "PTTYPE", &type, NULL)) {
+ fprintf(stderr,
+ "%s appears to contain a partition "
+ "table (%s).\n", device, type);
+ } else {
+ fprintf(stderr,
+ "%s appears to contain something weird "
+ "according to blkid\n", device);
+ }
+ ret = 1;
+
+out:
+ if (pr)
+ blkid_free_probe(pr);
+ if (ret == -1)
+ fprintf(stderr,
+ "probe of %s failed, cannot detect "
+ "existing filesystem.\n", device);
+ return ret;
+}
+
+static int group_profile_devs_min(u64 flag)
+{
+ switch (flag & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ case 0: /* single */
+ case BTRFS_BLOCK_GROUP_DUP:
+ return 1;
+ case BTRFS_BLOCK_GROUP_RAID0:
+ case BTRFS_BLOCK_GROUP_RAID1:
+ case BTRFS_BLOCK_GROUP_RAID5:
+ return 2;
+ case BTRFS_BLOCK_GROUP_RAID6:
+ return 3;
+ case BTRFS_BLOCK_GROUP_RAID10:
+ return 4;
+ default:
+ return -1;
+ }
+}
+
+int test_num_disk_vs_raid(u64 metadata_profile, u64 data_profile,
+ u64 dev_cnt, int mixed, int ssd)
+{
+ u64 allowed = 0;
+
+ switch (dev_cnt) {
+ default:
+ case 4:
+ allowed |= BTRFS_BLOCK_GROUP_RAID10;
+ case 3:
+ allowed |= BTRFS_BLOCK_GROUP_RAID6;
+ case 2:
+ allowed |= BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID5;
+ break;
+ case 1:
+ allowed |= BTRFS_BLOCK_GROUP_DUP;
+ }
+
+ if (dev_cnt > 1 &&
+ ((metadata_profile | data_profile) & BTRFS_BLOCK_GROUP_DUP)) {
+ fprintf(stderr,
+ "ERROR: DUP is not allowed when FS has multiple devices\n");
+ return 1;
+ }
+ if (metadata_profile & ~allowed) {
+ fprintf(stderr,
+ "ERROR: unable to create FS with metadata profile %s "
+ "(have %llu devices but %d devices are required)\n",
+ btrfs_group_profile_str(metadata_profile), dev_cnt,
+ group_profile_devs_min(metadata_profile));
+ return 1;
+ }
+ if (data_profile & ~allowed) {
+ fprintf(stderr,
+ "ERROR: unable to create FS with data profile %s "
+ "(have %llu devices but %d devices are required)\n",
+ btrfs_group_profile_str(data_profile), dev_cnt,
+ group_profile_devs_min(data_profile));
+ return 1;
+ }
+
+ warning_on(!mixed && (data_profile & BTRFS_BLOCK_GROUP_DUP) && ssd,
+ "DUP may not actually lead to 2 copies on the device, see manual page");
+
+ return 0;
+}
+
+int group_profile_max_safe_loss(u64 flags)
+{
+ switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ case 0: /* single */
+ case BTRFS_BLOCK_GROUP_DUP:
+ case BTRFS_BLOCK_GROUP_RAID0:
+ return 0;
+ case BTRFS_BLOCK_GROUP_RAID1:
+ case BTRFS_BLOCK_GROUP_RAID5:
+ case BTRFS_BLOCK_GROUP_RAID10:
+ return 1;
+ case BTRFS_BLOCK_GROUP_RAID6:
+ return 2;
+ default:
+ return -1;
+ }
+}
+
+/*
+ * Check if a device is suitable for btrfs
+ * returns:
+ * 1: something is wrong, an error is printed
+ * 0: all is fine
+ */
+int test_dev_for_mkfs(char *file, int force_overwrite)
+{
+ int ret, fd;
+ struct stat st;
+
+ ret = is_swap_device(file);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: checking status of %s: %s\n", file,
+ strerror(-ret));
+ return 1;
+ }
+ if (ret == 1) {
+ fprintf(stderr, "ERROR: %s is a swap device\n", file);
+ return 1;
+ }
+ if (!force_overwrite) {
+ if (check_overwrite(file)) {
+ fprintf(stderr, "Use the -f option to force overwrite.\n");
+ return 1;
+ }
+ }
+ ret = check_mounted(file);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: checking mount status of %s: %s\n",
+ file, strerror(-ret));
+ return 1;
+ }
+ if (ret == 1) {
+ fprintf(stderr, "ERROR: %s is mounted\n", file);
+ return 1;
+ }
+ /* check if the device is busy */
+ fd = open(file, O_RDWR|O_EXCL);
+ if (fd < 0) {
+ fprintf(stderr, "ERROR: unable to open %s: %s\n", file,
+ strerror(errno));
+ return 1;
+ }
+ if (fstat(fd, &st)) {
+ fprintf(stderr, "ERROR: unable to stat %s: %s\n", file,
+ strerror(errno));
+ close(fd);
+ return 1;
+ }
+ if (!S_ISBLK(st.st_mode)) {
+ fprintf(stderr, "ERROR: %s is not a block device\n", file);
+ close(fd);
+ return 1;
+ }
+ close(fd);
+ return 0;
+}
+
+int btrfs_scan_lblkid(void)
+{
+ int fd = -1;
+ int ret;
+ u64 num_devices;
+ struct btrfs_fs_devices *tmp_devices;
+ blkid_dev_iterate iter = NULL;
+ blkid_dev dev = NULL;
+ blkid_cache cache = NULL;
+ char path[PATH_MAX];
+
+ if (btrfs_scan_done)
+ return 0;
+
+ if (blkid_get_cache(&cache, NULL) < 0) {
+ printf("ERROR: lblkid cache get failed\n");
+ return 1;
+ }
+ blkid_probe_all(cache);
+ iter = blkid_dev_iterate_begin(cache);
+ blkid_dev_set_search(iter, "TYPE", "btrfs");
+ while (blkid_dev_next(iter, &dev) == 0) {
+ dev = blkid_verify(cache, dev);
+ if (!dev)
+ continue;
+ /* if we are here its definitely a btrfs disk*/
+ strncpy_null(path, blkid_dev_devname(dev));
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ printf("ERROR: could not open %s\n", path);
+ continue;
+ }
+ ret = btrfs_scan_one_device(fd, path, &tmp_devices,
+ &num_devices, BTRFS_SUPER_INFO_OFFSET, 0);
+ if (ret) {
+ printf("ERROR: could not scan %s\n", path);
+ close (fd);
+ continue;
+ }
+
+ close(fd);
+ }
+ blkid_dev_iterate_end(iter);
+ blkid_put_cache(cache);
+
+ btrfs_scan_done = 1;
+
+ return 0;
+}
+
+int is_vol_small(char *file)
+{
+ int fd = -1;
+ int e;
+ struct stat st;
+ u64 size;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ return -errno;
+ if (fstat(fd, &st) < 0) {
+ e = -errno;
+ close(fd);
+ return e;
+ }
+ size = btrfs_device_size(fd, &st);
+ if (size == 0) {
+ close(fd);
+ return -1;
+ }
+ if (size < BTRFS_MKFS_SMALL_VOLUME_SIZE) {
+ close(fd);
+ return 1;
+ } else {
+ close(fd);
+ return 0;
+ }
+}
+
+/*
+ * This reads a line from the stdin and only returns non-zero if the
+ * first whitespace delimited token is a case insensitive match with yes
+ * or y.
+ */
+int ask_user(char *question)
+{
+ char buf[30] = {0,};
+ char *saveptr = NULL;
+ char *answer;
+
+ printf("%s [y/N]: ", question);
+
+ return fgets(buf, sizeof(buf) - 1, stdin) &&
+ (answer = strtok_r(buf, " \t\n\r", &saveptr)) &&
+ (!strcasecmp(answer, "yes") || !strcasecmp(answer, "y"));
+}
+
+/*
+ * For a given:
+ * - file or directory return the containing tree root id
+ * - subvolume return its own tree id
+ * - BTRFS_EMPTY_SUBVOL_DIR_OBJECTID (directory with ino == 2) the result is
+ * undefined and function returns -1
+ */
+int lookup_ino_rootid(int fd, u64 *rootid)
+{
+ struct btrfs_ioctl_ino_lookup_args args;
+ int ret;
+
+ memset(&args, 0, sizeof(args));
+ args.treeid = 0;
+ args.objectid = BTRFS_FIRST_FREE_OBJECTID;
+
+ ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR: Failed to lookup root id - %s\n",
+ strerror(errno));
+ return ret;
+ }
+
+ *rootid = args.treeid;
+
+ return 0;
+}
+
+/*
+ * return 0 if a btrfs mount point is found
+ * return 1 if a mount point is found but not btrfs
+ * return <0 if something goes wrong
+ */
+int find_mount_root(const char *path, char **mount_root)
+{
+ FILE *mnttab;
+ int fd;
+ struct mntent *ent;
+ int len;
+ int ret;
+ int not_btrfs = 1;
+ int longest_matchlen = 0;
+ char *longest_match = NULL;
+
+ fd = open(path, O_RDONLY | O_NOATIME);
+ if (fd < 0)
+ return -errno;
+ close(fd);
+
+ mnttab = setmntent("/proc/self/mounts", "r");
+ if (!mnttab)
+ return -errno;
+
+ while ((ent = getmntent(mnttab))) {
+ len = strlen(ent->mnt_dir);
+ if (strncmp(ent->mnt_dir, path, len) == 0) {
+ /* match found and use the latest match */
+ if (longest_matchlen <= len) {
+ free(longest_match);
+ longest_matchlen = len;
+ longest_match = strdup(ent->mnt_dir);
+ not_btrfs = strcmp(ent->mnt_type, "btrfs");
+ }
+ }
+ }
+ endmntent(mnttab);
+
+ if (!longest_match)
+ return -ENOENT;
+ if (not_btrfs) {
+ free(longest_match);
+ return 1;
+ }
+
+ ret = 0;
+ *mount_root = realpath(longest_match, NULL);
+ if (!*mount_root)
+ ret = -errno;
+
+ free(longest_match);
+ return ret;
+}
+
+int test_minimum_size(const char *file, u32 nodesize)
+{
+ int fd;
+ struct stat statbuf;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ return -errno;
+ if (stat(file, &statbuf) < 0) {
+ close(fd);
+ return -errno;
+ }
+ if (btrfs_device_size(fd, &statbuf) < btrfs_min_dev_size(nodesize)) {
+ close(fd);
+ return 1;
+ }
+ close(fd);
+ return 0;
+}
+
+/*
+ * test if name is a correct subvolume name
+ * this function return
+ * 0-> name is not a correct subvolume name
+ * 1-> name is a correct subvolume name
+ */
+int test_issubvolname(const char *name)
+{
+ return name[0] != '\0' && !strchr(name, '/') &&
+ strcmp(name, ".") && strcmp(name, "..");
+}
+
+/*
+ * Test if path is a directory
+ * Returns:
+ * 0 - path exists but it is not a directory
+ * 1 - path exists and it is a directory
+ * < 0 - error
+ */
+int test_isdir(const char *path)
+{
+ struct stat st;
+ int ret;
+
+ ret = stat(path, &st);
+ if (ret < 0)
+ return -errno;
+
+ return !!S_ISDIR(st.st_mode);
+}
+
+void units_set_mode(unsigned *units, unsigned mode)
+{
+ unsigned base = *units & UNITS_MODE_MASK;
+
+ *units = base | mode;
+}
+
+void units_set_base(unsigned *units, unsigned base)
+{
+ unsigned mode = *units & ~UNITS_MODE_MASK;
+
+ *units = base | mode;
+}
+
+int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
+{
+ int level;
+
+ for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+ if (!path->nodes[level])
+ break;
+ if (path->slots[level] + 1 >=
+ btrfs_header_nritems(path->nodes[level]))
+ continue;
+ if (level == 0)
+ btrfs_item_key_to_cpu(path->nodes[level], key,
+ path->slots[level] + 1);
+ else
+ btrfs_node_key_to_cpu(path->nodes[level], key,
+ path->slots[level] + 1);
+ return 0;
+ }
+ return 1;
+}
+
+char* btrfs_group_type_str(u64 flag)
+{
+ u64 mask = BTRFS_BLOCK_GROUP_TYPE_MASK |
+ BTRFS_SPACE_INFO_GLOBAL_RSV;
+
+ switch (flag & mask) {
+ case BTRFS_BLOCK_GROUP_DATA:
+ return "Data";
+ case BTRFS_BLOCK_GROUP_SYSTEM:
+ return "System";
+ case BTRFS_BLOCK_GROUP_METADATA:
+ return "Metadata";
+ case BTRFS_BLOCK_GROUP_DATA|BTRFS_BLOCK_GROUP_METADATA:
+ return "Data+Metadata";
+ case BTRFS_SPACE_INFO_GLOBAL_RSV:
+ return "GlobalReserve";
+ default:
+ return "unknown";
+ }
+}
+
+char* btrfs_group_profile_str(u64 flag)
+{
+ switch (flag & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ case 0:
+ return "single";
+ case BTRFS_BLOCK_GROUP_RAID0:
+ return "RAID0";
+ case BTRFS_BLOCK_GROUP_RAID1:
+ return "RAID1";
+ case BTRFS_BLOCK_GROUP_RAID5:
+ return "RAID5";
+ case BTRFS_BLOCK_GROUP_RAID6:
+ return "RAID6";
+ case BTRFS_BLOCK_GROUP_DUP:
+ return "DUP";
+ case BTRFS_BLOCK_GROUP_RAID10:
+ return "RAID10";
+ default:
+ return "unknown";
+ }
+}
+
+u64 disk_size(char *path)
+{
+ struct statfs sfs;
+
+ if (statfs(path, &sfs) < 0)
+ return 0;
+ else
+ return sfs.f_bsize * sfs.f_blocks;
+}
+
+u64 get_partition_size(char *dev)
+{
+ u64 result;
+ int fd = open(dev, O_RDONLY);
+
+ if (fd < 0)
+ return 0;
+ if (ioctl(fd, BLKGETSIZE64, &result) < 0) {
+ close(fd);
+ return 0;
+ }
+ close(fd);
+
+ return result;
+}
+
+int btrfs_tree_search2_ioctl_supported(int fd)
+{
+ struct btrfs_ioctl_search_args_v2 *args2;
+ struct btrfs_ioctl_search_key *sk;
+ int args2_size = 1024;
+ char args2_buf[args2_size];
+ int ret;
+ static int v2_supported = -1;
+
+ if (v2_supported != -1)
+ return v2_supported;
+
+ args2 = (struct btrfs_ioctl_search_args_v2 *)args2_buf;
+ sk = &(args2->key);
+
+ /*
+ * Search for the extent tree item in the root tree.
+ */
+ sk->tree_id = BTRFS_ROOT_TREE_OBJECTID;
+ sk->min_objectid = BTRFS_EXTENT_TREE_OBJECTID;
+ sk->max_objectid = BTRFS_EXTENT_TREE_OBJECTID;
+ sk->min_type = BTRFS_ROOT_ITEM_KEY;
+ sk->max_type = BTRFS_ROOT_ITEM_KEY;
+ sk->min_offset = 0;
+ sk->max_offset = (u64)-1;
+ sk->min_transid = 0;
+ sk->max_transid = (u64)-1;
+ sk->nr_items = 1;
+ args2->buf_size = args2_size - sizeof(struct btrfs_ioctl_search_args_v2);
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH_V2, args2);
+ if (ret == -EOPNOTSUPP)
+ v2_supported = 0;
+ else if (ret == 0)
+ v2_supported = 1;
+ else
+ return ret;
+
+ return v2_supported;
+}
+
+int btrfs_check_nodesize(u32 nodesize, u32 sectorsize, u64 features)
+{
+ if (nodesize < sectorsize) {
+ fprintf(stderr,
+ "ERROR: Illegal nodesize %u (smaller than %u)\n",
+ nodesize, sectorsize);
+ return -1;
+ } else if (nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+ fprintf(stderr,
+ "ERROR: Illegal nodesize %u (larger than %u)\n",
+ nodesize, BTRFS_MAX_METADATA_BLOCKSIZE);
+ return -1;
+ } else if (nodesize & (sectorsize - 1)) {
+ fprintf(stderr,
+ "ERROR: Illegal nodesize %u (not aligned to %u)\n",
+ nodesize, sectorsize);
+ return -1;
+ } else if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS &&
+ nodesize != sectorsize) {
+ fprintf(stderr,
+ "ERROR: Illegal nodesize %u (not equal to %u for mixed block group)\n",
+ nodesize, sectorsize);
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Copy a path argument from SRC to DEST and check the SRC length if it's at
+ * most PATH_MAX and fits into DEST. DESTLEN is supposed to be exact size of
+ * the buffer.
+ * The destination buffer is zero terminated.
+ * Return < 0 for error, 0 otherwise.
+ */
+int arg_copy_path(char *dest, const char *src, int destlen)
+{
+ size_t len = strlen(src);
+
+ if (len >= PATH_MAX || len >= destlen)
+ return -ENAMETOOLONG;
+
+ __strncpy__null(dest, src, destlen);
+
+ return 0;
+}
+
+unsigned int get_unit_mode_from_arg(int *argc, char *argv[], int df_mode)
+{
+ unsigned int unit_mode = UNITS_DEFAULT;
+ int arg_i;
+ int arg_end;
+
+ for (arg_i = 0; arg_i < *argc; arg_i++) {
+ if (!strcmp(argv[arg_i], "--raw")) {
+ unit_mode = UNITS_RAW;
+ argv[arg_i] = NULL;
+ continue;
+ }
+ if (!strcmp(argv[arg_i], "--human-readable")) {
+ unit_mode = UNITS_HUMAN_BINARY;
+ argv[arg_i] = NULL;
+ continue;
+ }
+
+ if (!strcmp(argv[arg_i], "--iec")) {
+ units_set_mode(&unit_mode, UNITS_BINARY);
+ argv[arg_i] = NULL;
+ continue;
+ }
+ if (!strcmp(argv[arg_i], "--si")) {
+ units_set_mode(&unit_mode, UNITS_DECIMAL);
+ argv[arg_i] = NULL;
+ continue;
+ }
+
+ if (!strcmp(argv[arg_i], "--kbytes")) {
+ units_set_base(&unit_mode, UNITS_KBYTES);
+ argv[arg_i] = NULL;
+ continue;
+ }
+ if (!strcmp(argv[arg_i], "--mbytes")) {
+ units_set_base(&unit_mode, UNITS_MBYTES);
+ argv[arg_i] = NULL;
+ continue;
+ }
+ if (!strcmp(argv[arg_i], "--gbytes")) {
+ units_set_base(&unit_mode, UNITS_GBYTES);
+ argv[arg_i] = NULL;
+ continue;
+ }
+ if (!strcmp(argv[arg_i], "--tbytes")) {
+ units_set_base(&unit_mode, UNITS_TBYTES);
+ argv[arg_i] = NULL;
+ continue;
+ }
+
+ if (!df_mode)
+ continue;
+
+ if (!strcmp(argv[arg_i], "-b")) {
+ unit_mode = UNITS_RAW;
+ argv[arg_i] = NULL;
+ continue;
+ }
+ if (!strcmp(argv[arg_i], "-h")) {
+ unit_mode = UNITS_HUMAN_BINARY;
+ argv[arg_i] = NULL;
+ continue;
+ }
+ if (!strcmp(argv[arg_i], "-H")) {
+ unit_mode = UNITS_HUMAN_DECIMAL;
+ argv[arg_i] = NULL;
+ continue;
+ }
+ if (!strcmp(argv[arg_i], "-k")) {
+ units_set_base(&unit_mode, UNITS_KBYTES);
+ argv[arg_i] = NULL;
+ continue;
+ }
+ if (!strcmp(argv[arg_i], "-m")) {
+ units_set_base(&unit_mode, UNITS_MBYTES);
+ argv[arg_i] = NULL;
+ continue;
+ }
+ if (!strcmp(argv[arg_i], "-g")) {
+ units_set_base(&unit_mode, UNITS_GBYTES);
+ argv[arg_i] = NULL;
+ continue;
+ }
+ if (!strcmp(argv[arg_i], "-t")) {
+ units_set_base(&unit_mode, UNITS_TBYTES);
+ argv[arg_i] = NULL;
+ continue;
+ }
+ }
+
+ for (arg_i = 0, arg_end = 0; arg_i < *argc; arg_i++) {
+ if (!argv[arg_i])
+ continue;
+ argv[arg_end] = argv[arg_i];
+ arg_end++;
+ }
+
+ *argc = arg_end;
+
+ return unit_mode;
+}
+
+int string_is_numerical(const char *str)
+{
+ if (!(*str >= '0' && *str <= '9'))
+ return 0;
+ while (*str >= '0' && *str <= '9')
+ str++;
+ if (*str != '\0')
+ return 0;
+ return 1;
+}
+
+/*
+ * Preprocess @argv with getopt_long to reorder options and consume the "--"
+ * option separator.
+ * Unknown short and long options are reported, optionally the @usage is printed
+ * before exit.
+ */
+void clean_args_no_options(int argc, char *argv[], const char * const *usagestr)
+{
+ static const struct option long_options[] = {
+ {NULL, 0, NULL, 0}
+ };
+
+ while (1) {
+ int c = getopt_long(argc, argv, "", long_options, NULL);
+
+ if (c < 0)
+ break;
+
+ switch (c) {
+ default:
+ if (usagestr)
+ usage(usagestr);
+ }
+ }
+}
diff --git a/utils.h b/utils.h
new file mode 100644
index 00000000..d53357a2
--- /dev/null
+++ b/utils.h
@@ -0,0 +1,338 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_UTILS_H__
+#define __BTRFS_UTILS_H__
+
+#include <sys/stat.h>
+#include "ctree.h"
+#include <dirent.h>
+#include <stdarg.h>
+
+#define BTRFS_MKFS_SYSTEM_GROUP_SIZE (4 * 1024 * 1024)
+#define BTRFS_MKFS_SMALL_VOLUME_SIZE (1024 * 1024 * 1024)
+#define BTRFS_MKFS_DEFAULT_NODE_SIZE 16384
+#define BTRFS_MKFS_DEFAULT_FEATURES \
+ (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF \
+ | BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
+
+/*
+ * Avoid multi-device features (RAID56) and mixed block groups
+ */
+#define BTRFS_CONVERT_ALLOWED_FEATURES \
+ (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF \
+ | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL \
+ | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO \
+ | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 \
+ | BTRFS_FEATURE_INCOMPAT_BIG_METADATA \
+ | BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF \
+ | BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA \
+ | BTRFS_FEATURE_INCOMPAT_NO_HOLES)
+
+#define BTRFS_FEATURE_LIST_ALL (1ULL << 63)
+
+#define BTRFS_SCAN_MOUNTED (1ULL << 0)
+#define BTRFS_SCAN_LBLKID (1ULL << 1)
+
+#define BTRFS_UPDATE_KERNEL 1
+
+#define BTRFS_ARG_UNKNOWN 0
+#define BTRFS_ARG_MNTPOINT 1
+#define BTRFS_ARG_UUID 2
+#define BTRFS_ARG_BLKDEV 3
+#define BTRFS_ARG_REG 4
+
+#define BTRFS_UUID_UNPARSED_SIZE 37
+
+#define ARGV0_BUF_SIZE PATH_MAX
+
+#define GETOPT_VAL_SI 256
+#define GETOPT_VAL_IEC 257
+#define GETOPT_VAL_RAW 258
+#define GETOPT_VAL_HUMAN_READABLE 259
+#define GETOPT_VAL_KBYTES 260
+#define GETOPT_VAL_MBYTES 261
+#define GETOPT_VAL_GBYTES 262
+#define GETOPT_VAL_TBYTES 263
+
+#define GETOPT_VAL_HELP 270
+
+int check_argc_exact(int nargs, int expected);
+int check_argc_min(int nargs, int expected);
+int check_argc_max(int nargs, int expected);
+
+void fixup_argv0(char **argv, const char *token);
+void set_argv0(char **argv);
+
+/*
+ * Output modes of size
+ */
+#define UNITS_RESERVED (0)
+#define UNITS_BYTES (1)
+#define UNITS_KBYTES (2)
+#define UNITS_MBYTES (3)
+#define UNITS_GBYTES (4)
+#define UNITS_TBYTES (5)
+#define UNITS_RAW (1U << UNITS_MODE_SHIFT)
+#define UNITS_BINARY (2U << UNITS_MODE_SHIFT)
+#define UNITS_DECIMAL (3U << UNITS_MODE_SHIFT)
+#define UNITS_MODE_MASK ((1U << UNITS_MODE_SHIFT) - 1)
+#define UNITS_MODE_SHIFT (8)
+#define UNITS_HUMAN_BINARY (UNITS_BINARY)
+#define UNITS_HUMAN_DECIMAL (UNITS_DECIMAL)
+#define UNITS_HUMAN (UNITS_HUMAN_BINARY)
+#define UNITS_DEFAULT (UNITS_HUMAN)
+
+void units_set_mode(unsigned *units, unsigned mode);
+void units_set_base(unsigned *units, unsigned base);
+
+void btrfs_list_all_fs_features(u64 mask_disallowed);
+char* btrfs_parse_fs_features(char *namelist, u64 *flags);
+void btrfs_process_fs_features(u64 flags);
+void btrfs_parse_features_to_string(char *buf, u64 flags);
+
+struct btrfs_mkfs_config {
+ char *label;
+ char *fs_uuid;
+ char *chunk_uuid;
+ u64 blocks[8];
+ u64 num_bytes;
+ u32 nodesize;
+ u32 sectorsize;
+ u32 stripesize;
+ u64 features;
+
+ /* Super bytenr after make_btrfs */
+ u64 super_bytenr;
+};
+
+int make_btrfs(int fd, struct btrfs_mkfs_config *cfg);
+int btrfs_make_root_dir(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid);
+int btrfs_prepare_device(int fd, char *file, int zero_end, u64 *block_count_ret,
+ u64 max_block_count, int discard);
+int btrfs_add_to_fsid(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, int fd, char *path,
+ u64 block_count, u32 io_width, u32 io_align,
+ u32 sectorsize);
+int btrfs_scan_for_fsid(int run_ioctls);
+int btrfs_register_one_device(const char *fname);
+int btrfs_register_all_devices(void);
+char *canonicalize_dm_name(const char *ptname);
+char *canonicalize_path(const char *path);
+int check_mounted(const char *devicename);
+int check_mounted_where(int fd, const char *file, char *where, int size,
+ struct btrfs_fs_devices **fs_devices_mnt);
+int btrfs_device_already_in_root(struct btrfs_root *root, int fd,
+ int super_offset);
+
+int pretty_size_snprintf(u64 size, char *str, size_t str_bytes, unsigned unit_mode);
+#define pretty_size(size) pretty_size_mode(size, UNITS_DEFAULT)
+const char *pretty_size_mode(u64 size, unsigned mode);
+
+int get_mountpt(char *dev, char *mntpt, size_t size);
+u64 parse_size(char *s);
+u64 parse_qgroupid(const char *p);
+u64 arg_strtou64(const char *str);
+int arg_copy_path(char *dest, const char *src, int destlen);
+int open_file_or_dir(const char *fname, DIR **dirstream);
+int open_file_or_dir3(const char *fname, DIR **dirstream, int open_flags);
+void close_file_or_dir(int fd, DIR *dirstream);
+int get_fs_info(char *path, struct btrfs_ioctl_fs_info_args *fi_args,
+ struct btrfs_ioctl_dev_info_args **di_ret);
+int get_label(const char *btrfs_dev, char *label);
+int set_label(const char *btrfs_dev, const char *label);
+
+char *__strncpy__null(char *dest, const char *src, size_t n);
+int is_block_device(const char *file);
+int is_mount_point(const char *file);
+int check_arg_type(const char *input);
+int open_path_or_dev_mnt(const char *path, DIR **dirstream, int verbose);
+int btrfs_open_dir(const char *path, DIR **dirstream, int verbose);
+u64 btrfs_device_size(int fd, struct stat *st);
+/* Helper to always get proper size of the destination string */
+#define strncpy_null(dest, src) __strncpy__null(dest, src, sizeof(dest))
+int test_dev_for_mkfs(char *file, int force_overwrite);
+int get_label_mounted(const char *mount_path, char *labelp);
+int get_label_unmounted(const char *dev, char *label);
+int test_num_disk_vs_raid(u64 metadata_profile, u64 data_profile,
+ u64 dev_cnt, int mixed, int ssd);
+int group_profile_max_safe_loss(u64 flags);
+int is_vol_small(char *file);
+int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
+ int verify);
+int ask_user(char *question);
+int lookup_ino_rootid(int fd, u64 *rootid);
+int btrfs_scan_lblkid(void);
+int get_btrfs_mount(const char *dev, char *mp, size_t mp_size);
+int find_mount_root(const char *path, char **mount_root);
+int get_device_info(int fd, u64 devid,
+ struct btrfs_ioctl_dev_info_args *di_args);
+int test_uuid_unique(char *fs_uuid);
+u64 disk_size(char *path);
+int get_device_info(int fd, u64 devid,
+ struct btrfs_ioctl_dev_info_args *di_args);
+u64 get_partition_size(char *dev);
+const char* group_type_str(u64 flags);
+const char* group_profile_str(u64 flags);
+
+int test_minimum_size(const char *file, u32 leafsize);
+int test_issubvolname(const char *name);
+int test_isdir(const char *path);
+
+/*
+ * Btrfs minimum size calculation is complicated, it should include at least:
+ * 1. system group size
+ * 2. minimum global block reserve
+ * 3. metadata used at mkfs
+ * 4. space reservation to create uuid for first mount.
+ * Also, raid factor should also be taken into consideration.
+ * To avoid the overkill calculation, (system group + global block rsv) * 2
+ * for *EACH* device should be good enough.
+ */
+static inline u64 btrfs_min_global_blk_rsv_size(u32 leafsize)
+{
+ return leafsize << 10;
+}
+static inline u64 btrfs_min_dev_size(u32 leafsize)
+{
+ return 2 * (BTRFS_MKFS_SYSTEM_GROUP_SIZE +
+ btrfs_min_global_blk_rsv_size(leafsize));
+}
+
+int find_next_key(struct btrfs_path *path, struct btrfs_key *key);
+char* btrfs_group_type_str(u64 flag);
+char* btrfs_group_profile_str(u64 flag);
+
+/*
+ * Get the length of the string converted from a u64 number.
+ *
+ * Result is equal to log10(num) + 1, but without the use of math library.
+ */
+static inline int count_digits(u64 num)
+{
+ int ret = 0;
+
+ if (num == 0)
+ return 1;
+ while (num > 0) {
+ ret++;
+ num /= 10;
+ }
+ return ret;
+}
+
+static inline u64 div_factor(u64 num, int factor)
+{
+ if (factor == 10)
+ return num;
+ num *= factor;
+ num /= 10;
+ return num;
+}
+
+int btrfs_tree_search2_ioctl_supported(int fd);
+int btrfs_check_nodesize(u32 nodesize, u32 sectorsize, u64 features);
+
+const char *get_argv0_buf(void);
+
+#define HELPINFO_UNITS_LONG \
+ "--raw raw numbers in bytes", \
+ "--human-readable human friendly numbers, base 1024 (default)", \
+ "--iec use 1024 as a base (KiB, MiB, GiB, TiB)", \
+ "--si use 1000 as a base (kB, MB, GB, TB)", \
+ "--kbytes show sizes in KiB, or kB with --si", \
+ "--mbytes show sizes in MiB, or MB with --si", \
+ "--gbytes show sizes in GiB, or GB with --si", \
+ "--tbytes show sizes in TiB, or TB with --si"
+
+#define HELPINFO_UNITS_SHORT_LONG \
+ "-b|--raw raw numbers in bytes", \
+ "-h|--human-readable", \
+ " human friendly numbers, base 1024 (default)", \
+ "-H human friendly numbers, base 1000", \
+ "--iec use 1024 as a base (KiB, MiB, GiB, TiB)", \
+ "--si use 1000 as a base (kB, MB, GB, TB)", \
+ "-k|--kbytes show sizes in KiB, or kB with --si", \
+ "-m|--mbytes show sizes in MiB, or MB with --si", \
+ "-g|--gbytes show sizes in GiB, or GB with --si", \
+ "-t|--tbytes show sizes in TiB, or TB with --si"
+
+unsigned int get_unit_mode_from_arg(int *argc, char *argv[], int df_mode);
+void clean_args_no_options(int argc, char *argv[], const char * const *usage);
+int string_is_numerical(const char *str);
+
+__attribute__ ((format (printf, 1, 2)))
+static inline void warning(const char *fmt, ...)
+{
+ va_list args;
+
+ fputs("WARNING: ", stderr);
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ fputc('\n', stderr);
+}
+
+__attribute__ ((format (printf, 1, 2)))
+static inline void error(const char *fmt, ...)
+{
+ va_list args;
+
+ fputs("ERROR: ", stderr);
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ fputc('\n', stderr);
+}
+
+__attribute__ ((format (printf, 2, 3)))
+static inline int warning_on(int condition, const char *fmt, ...)
+{
+ va_list args;
+
+ if (!condition)
+ return 0;
+
+ fputs("WARNING: ", stderr);
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ fputc('\n', stderr);
+
+ return 1;
+}
+
+__attribute__ ((format (printf, 2, 3)))
+static inline int error_on(int condition, const char *fmt, ...)
+{
+ va_list args;
+
+ if (!condition)
+ return 0;
+
+ fputs("ERROR: ", stderr);
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ fputc('\n', stderr);
+
+ return 1;
+}
+
+#endif
diff --git a/uuid-tree.c b/uuid-tree.c
new file mode 100644
index 00000000..39c3f3fd
--- /dev/null
+++ b/uuid-tree.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) STRATO AG 2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <uuid/uuid.h>
+#include <sys/ioctl.h>
+#include "ctree.h"
+#include "transaction.h"
+#include "disk-io.h"
+#include "print-tree.h"
+
+
+static void btrfs_uuid_to_key(const u8 *uuid, u64 *key_objectid,
+ u64 *key_offset)
+{
+ *key_objectid = get_unaligned_le64(uuid);
+ *key_offset = get_unaligned_le64(uuid + sizeof(u64));
+}
+
+
+/* return -ENOENT for !found, < 0 for errors, or 0 if an item was found */
+static int btrfs_uuid_tree_lookup_any(int fd, const u8 *uuid, u8 type,
+ u64 *subid)
+{
+ int ret;
+ u64 key_objectid = 0;
+ u64 key_offset;
+ struct btrfs_ioctl_search_args search_arg;
+ struct btrfs_ioctl_search_header *search_header;
+ u32 item_size;
+ __le64 lesubid;
+
+ btrfs_uuid_to_key(uuid, &key_objectid, &key_offset);
+
+ memset(&search_arg, 0, sizeof(search_arg));
+ search_arg.key.tree_id = BTRFS_UUID_TREE_OBJECTID;
+ search_arg.key.min_objectid = key_objectid;
+ search_arg.key.max_objectid = key_objectid;
+ search_arg.key.min_type = type;
+ search_arg.key.max_type = type;
+ search_arg.key.min_offset = key_offset;
+ search_arg.key.max_offset = key_offset;
+ search_arg.key.max_transid = (u64)-1;
+ search_arg.key.nr_items = 1;
+ ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &search_arg);
+ if (ret < 0) {
+ fprintf(stderr,
+ "ioctl(BTRFS_IOC_TREE_SEARCH, uuid, key %016llx, UUID_KEY, %016llx) ret=%d, error: %s\n",
+ (unsigned long long)key_objectid,
+ (unsigned long long)key_offset, ret, strerror(errno));
+ ret = -ENOENT;
+ goto out;
+ }
+
+ if (search_arg.key.nr_items < 1) {
+ ret = -ENOENT;
+ goto out;
+ }
+ search_header = (struct btrfs_ioctl_search_header *)(search_arg.buf);
+ item_size = search_header->len;
+ if ((item_size & (sizeof(u64) - 1)) || item_size == 0) {
+ printf("btrfs: uuid item with illegal size %lu!\n",
+ (unsigned long)item_size);
+ ret = -ENOENT;
+ goto out;
+ } else {
+ ret = 0;
+ }
+
+ /* return first stored id */
+ memcpy(&lesubid, search_header + 1, sizeof(lesubid));
+ *subid = le64_to_cpu(lesubid);
+
+out:
+ return ret;
+}
+
+int btrfs_lookup_uuid_subvol_item(int fd, const u8 *uuid, u64 *subvol_id)
+{
+ return btrfs_uuid_tree_lookup_any(fd, uuid, BTRFS_UUID_KEY_SUBVOL,
+ subvol_id);
+}
+
+int btrfs_lookup_uuid_received_subvol_item(int fd, const u8 *uuid,
+ u64 *subvol_id)
+{
+ return btrfs_uuid_tree_lookup_any(fd, uuid,
+ BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+ subvol_id);
+}
diff --git a/version.h.in b/version.h.in
new file mode 100644
index 00000000..db5ad51f
--- /dev/null
+++ b/version.h.in
@@ -0,0 +1,14 @@
+#ifndef __LIBBTRFS_VERSION_H__
+#define __LIBBTRFS_VERSION_H__
+
+#define BTRFS_LIB_MAJOR @LIBBTRFS_MAJOR@
+#define BTRFS_LIB_MINOR @LIBBTRFS_MINOR@
+#define BTRFS_LIB_PATCHLEVEL @LIBBTRFS_PATCHLEVEL@
+
+#define BTRFS_LIB_VERSION ( BTRFS_LIB_MAJOR * 10000 + \
+ BTRFS_LIB_MINOR * 100 + \
+ BTRFS_LIB_PATCHLEVEL )
+
+#define BTRFS_BUILD_VERSION "Btrfs @PACKAGE_VERSION@"
+
+#endif
diff --git a/version.sh b/version.sh
new file mode 100755
index 00000000..3ad00e3a
--- /dev/null
+++ b/version.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+#
+# determine-version -- report a useful version for releases
+#
+# Copyright 2008, Aron Griffis <agriffis@n01se.net>
+# Copyright 2008, Oracle
+# Released under the GNU GPLv2
+
+v="v4.4.1"
+
+opt=$1
+
+which git &> /dev/null
+if [ $? == 0 -a -d .git ]; then
+ if head=`git rev-parse --verify HEAD 2>/dev/null`; then
+ if tag=`git describe --tags 2>/dev/null`; then
+ v="$tag"
+ fi
+
+ # Are there uncommitted changes?
+ git update-index --refresh --unmerged > /dev/null
+ if git diff-index --name-only HEAD | grep -v "^scripts/package" \
+ | read dummy; then
+ v="$v"-dirty
+ fi
+ fi
+fi
+
+if [ "$opt" = "--configure" ]; then
+ # Omit the trailing newline, so that m4_esyscmd can use the result directly.
+ echo -n "$v"
+else
+ echo "$v"
+fi
+
+exit 0
+
diff --git a/volumes.c b/volumes.c
new file mode 100644
index 00000000..cc3e6b7f
--- /dev/null
+++ b/volumes.c
@@ -0,0 +1,2133 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <uuid/uuid.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "print-tree.h"
+#include "volumes.h"
+#include "utils.h"
+
+struct stripe {
+ struct btrfs_device *dev;
+ u64 physical;
+};
+
+static inline int nr_parity_stripes(struct map_lookup *map)
+{
+ if (map->type & BTRFS_BLOCK_GROUP_RAID5)
+ return 1;
+ else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+ return 2;
+ else
+ return 0;
+}
+
+static inline int nr_data_stripes(struct map_lookup *map)
+{
+ return map->num_stripes - nr_parity_stripes(map);
+}
+
+#define is_parity_stripe(x) ( ((x) == BTRFS_RAID5_P_STRIPE) || ((x) == BTRFS_RAID6_Q_STRIPE) )
+
+static LIST_HEAD(fs_uuids);
+
+static struct btrfs_device *__find_device(struct list_head *head, u64 devid,
+ u8 *uuid)
+{
+ struct btrfs_device *dev;
+ struct list_head *cur;
+
+ list_for_each(cur, head) {
+ dev = list_entry(cur, struct btrfs_device, dev_list);
+ if (dev->devid == devid &&
+ !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE)) {
+ return dev;
+ }
+ }
+ return NULL;
+}
+
+static struct btrfs_fs_devices *find_fsid(u8 *fsid)
+{
+ struct list_head *cur;
+ struct btrfs_fs_devices *fs_devices;
+
+ list_for_each(cur, &fs_uuids) {
+ fs_devices = list_entry(cur, struct btrfs_fs_devices, list);
+ if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
+ return fs_devices;
+ }
+ return NULL;
+}
+
+static int device_list_add(const char *path,
+ struct btrfs_super_block *disk_super,
+ u64 devid, struct btrfs_fs_devices **fs_devices_ret)
+{
+ struct btrfs_device *device;
+ struct btrfs_fs_devices *fs_devices;
+ u64 found_transid = btrfs_super_generation(disk_super);
+
+ fs_devices = find_fsid(disk_super->fsid);
+ if (!fs_devices) {
+ fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
+ if (!fs_devices)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&fs_devices->devices);
+ list_add(&fs_devices->list, &fs_uuids);
+ memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
+ fs_devices->latest_devid = devid;
+ fs_devices->latest_trans = found_transid;
+ fs_devices->lowest_devid = (u64)-1;
+ device = NULL;
+ } else {
+ device = __find_device(&fs_devices->devices, devid,
+ disk_super->dev_item.uuid);
+ }
+ if (!device) {
+ device = kzalloc(sizeof(*device), GFP_NOFS);
+ if (!device) {
+ /* we can safely leave the fs_devices entry around */
+ return -ENOMEM;
+ }
+ device->fd = -1;
+ device->devid = devid;
+ device->generation = found_transid;
+ memcpy(device->uuid, disk_super->dev_item.uuid,
+ BTRFS_UUID_SIZE);
+ device->name = kstrdup(path, GFP_NOFS);
+ if (!device->name) {
+ kfree(device);
+ return -ENOMEM;
+ }
+ device->label = kstrdup(disk_super->label, GFP_NOFS);
+ if (!device->label) {
+ kfree(device->name);
+ kfree(device);
+ return -ENOMEM;
+ }
+ device->total_devs = btrfs_super_num_devices(disk_super);
+ device->super_bytes_used = btrfs_super_bytes_used(disk_super);
+ device->total_bytes =
+ btrfs_stack_device_total_bytes(&disk_super->dev_item);
+ device->bytes_used =
+ btrfs_stack_device_bytes_used(&disk_super->dev_item);
+ list_add(&device->dev_list, &fs_devices->devices);
+ device->fs_devices = fs_devices;
+ } else if (!device->name || strcmp(device->name, path)) {
+ char *name = strdup(path);
+ if (!name)
+ return -ENOMEM;
+ kfree(device->name);
+ device->name = name;
+ }
+
+
+ if (found_transid > fs_devices->latest_trans) {
+ fs_devices->latest_devid = devid;
+ fs_devices->latest_trans = found_transid;
+ }
+ if (fs_devices->lowest_devid > devid) {
+ fs_devices->lowest_devid = devid;
+ }
+ *fs_devices_ret = fs_devices;
+ return 0;
+}
+
+int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
+{
+ struct btrfs_fs_devices *seed_devices;
+ struct btrfs_device *device;
+
+again:
+ while (!list_empty(&fs_devices->devices)) {
+ device = list_entry(fs_devices->devices.next,
+ struct btrfs_device, dev_list);
+ if (device->fd != -1) {
+ fsync(device->fd);
+ if (posix_fadvise(device->fd, 0, 0, POSIX_FADV_DONTNEED))
+ fprintf(stderr, "Warning, could not drop caches\n");
+ close(device->fd);
+ device->fd = -1;
+ }
+ device->writeable = 0;
+ list_del(&device->dev_list);
+ /* free the memory */
+ free(device->name);
+ free(device->label);
+ free(device);
+ }
+
+ seed_devices = fs_devices->seed;
+ fs_devices->seed = NULL;
+ if (seed_devices) {
+ struct btrfs_fs_devices *orig;
+
+ orig = fs_devices;
+ fs_devices = seed_devices;
+ list_del(&orig->list);
+ free(orig);
+ goto again;
+ } else {
+ list_del(&fs_devices->list);
+ free(fs_devices);
+ }
+
+ return 0;
+}
+
+void btrfs_close_all_devices(void)
+{
+ struct btrfs_fs_devices *fs_devices;
+
+ while (!list_empty(&fs_uuids)) {
+ fs_devices = list_entry(fs_uuids.next, struct btrfs_fs_devices,
+ list);
+ btrfs_close_devices(fs_devices);
+ }
+}
+
+int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int flags)
+{
+ int fd;
+ struct list_head *head = &fs_devices->devices;
+ struct list_head *cur;
+ struct btrfs_device *device;
+ int ret;
+
+ list_for_each(cur, head) {
+ device = list_entry(cur, struct btrfs_device, dev_list);
+ if (!device->name) {
+ printk("no name for device %llu, skip it now\n", device->devid);
+ continue;
+ }
+
+ fd = open(device->name, flags);
+ if (fd < 0) {
+ ret = -errno;
+ goto fail;
+ }
+
+ if (posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED))
+ fprintf(stderr, "Warning, could not drop caches\n");
+
+ if (device->devid == fs_devices->latest_devid)
+ fs_devices->latest_bdev = fd;
+ if (device->devid == fs_devices->lowest_devid)
+ fs_devices->lowest_bdev = fd;
+ device->fd = fd;
+ if (flags & O_RDWR)
+ device->writeable = 1;
+ }
+ return 0;
+fail:
+ btrfs_close_devices(fs_devices);
+ return ret;
+}
+
+int btrfs_scan_one_device(int fd, const char *path,
+ struct btrfs_fs_devices **fs_devices_ret,
+ u64 *total_devs, u64 super_offset, int super_recover)
+{
+ struct btrfs_super_block *disk_super;
+ char buf[BTRFS_SUPER_INFO_SIZE];
+ int ret;
+ u64 devid;
+
+ disk_super = (struct btrfs_super_block *)buf;
+ ret = btrfs_read_dev_super(fd, disk_super, super_offset, super_recover);
+ if (ret < 0)
+ return -EIO;
+ devid = btrfs_stack_device_id(&disk_super->dev_item);
+ if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_METADUMP)
+ *total_devs = 1;
+ else
+ *total_devs = btrfs_super_num_devices(disk_super);
+
+ ret = device_list_add(path, disk_super, devid, fs_devices_ret);
+
+ return ret;
+}
+
+/*
+ * this uses a pretty simple search, the expectation is that it is
+ * called very infrequently and that a given device has a small number
+ * of extents
+ */
+static int find_free_dev_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device,
+ struct btrfs_path *path,
+ u64 num_bytes, u64 *start)
+{
+ struct btrfs_key key;
+ struct btrfs_root *root = device->dev_root;
+ struct btrfs_dev_extent *dev_extent = NULL;
+ u64 hole_size = 0;
+ u64 last_byte = 0;
+ u64 search_start = root->fs_info->alloc_start;
+ u64 search_end = device->total_bytes;
+ int ret;
+ int slot = 0;
+ int start_found;
+ struct extent_buffer *l;
+
+ start_found = 0;
+ path->reada = 2;
+
+ /* FIXME use last free of some kind */
+
+ /* we don't want to overwrite the superblock on the drive,
+ * so we make sure to start at an offset of at least 1MB
+ */
+ search_start = max(BTRFS_BLOCK_RESERVED_1M_FOR_SUPER, search_start);
+
+ if (search_start >= search_end) {
+ ret = -ENOSPC;
+ goto error;
+ }
+
+ key.objectid = device->devid;
+ key.offset = search_start;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto error;
+ ret = btrfs_previous_item(root, path, 0, key.type);
+ if (ret < 0)
+ goto error;
+ l = path->nodes[0];
+ btrfs_item_key_to_cpu(l, &key, path->slots[0]);
+ while (1) {
+ l = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(l)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret == 0)
+ continue;
+ if (ret < 0)
+ goto error;
+no_more_items:
+ if (!start_found) {
+ if (search_start >= search_end) {
+ ret = -ENOSPC;
+ goto error;
+ }
+ *start = search_start;
+ start_found = 1;
+ goto check_pending;
+ }
+ *start = last_byte > search_start ?
+ last_byte : search_start;
+ if (search_end <= *start) {
+ ret = -ENOSPC;
+ goto error;
+ }
+ goto check_pending;
+ }
+ btrfs_item_key_to_cpu(l, &key, slot);
+
+ if (key.objectid < device->devid)
+ goto next;
+
+ if (key.objectid > device->devid)
+ goto no_more_items;
+
+ if (key.offset >= search_start && key.offset > last_byte &&
+ start_found) {
+ if (last_byte < search_start)
+ last_byte = search_start;
+ hole_size = key.offset - last_byte;
+ if (key.offset > last_byte &&
+ hole_size >= num_bytes) {
+ *start = last_byte;
+ goto check_pending;
+ }
+ }
+ if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
+ goto next;
+ }
+
+ start_found = 1;
+ dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
+ last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
+next:
+ path->slots[0]++;
+ cond_resched();
+ }
+check_pending:
+ /* we have to make sure we didn't find an extent that has already
+ * been allocated by the map tree or the original allocation
+ */
+ btrfs_release_path(path);
+ BUG_ON(*start < search_start);
+
+ if (*start + num_bytes > search_end) {
+ ret = -ENOSPC;
+ goto error;
+ }
+ /* check for pending inserts here */
+ return 0;
+
+error:
+ btrfs_release_path(path);
+ return ret;
+}
+
+static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device,
+ u64 chunk_tree, u64 chunk_objectid,
+ u64 chunk_offset,
+ u64 num_bytes, u64 *start)
+{
+ int ret;
+ struct btrfs_path *path;
+ struct btrfs_root *root = device->dev_root;
+ struct btrfs_dev_extent *extent;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = find_free_dev_extent(trans, device, path, num_bytes, start);
+ if (ret) {
+ goto err;
+ }
+
+ key.objectid = device->devid;
+ key.offset = *start;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ sizeof(*extent));
+ BUG_ON(ret);
+
+ leaf = path->nodes[0];
+ extent = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_dev_extent);
+ btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
+ btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
+ btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
+
+ write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
+ (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
+ BTRFS_UUID_SIZE);
+
+ btrfs_set_dev_extent_length(leaf, extent, num_bytes);
+ btrfs_mark_buffer_dirty(leaf);
+err:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int find_next_chunk(struct btrfs_root *root, u64 objectid, u64 *offset)
+{
+ struct btrfs_path *path;
+ int ret;
+ struct btrfs_key key;
+ struct btrfs_chunk *chunk;
+ struct btrfs_key found_key;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+
+ key.objectid = objectid;
+ key.offset = (u64)-1;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto error;
+
+ BUG_ON(ret == 0);
+
+ ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
+ if (ret) {
+ *offset = 0;
+ } else {
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+ path->slots[0]);
+ if (found_key.objectid != objectid)
+ *offset = 0;
+ else {
+ chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_chunk);
+ *offset = found_key.offset +
+ btrfs_chunk_length(path->nodes[0], chunk);
+ }
+ }
+ ret = 0;
+error:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path,
+ u64 *objectid)
+{
+ int ret;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+
+ key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ key.type = BTRFS_DEV_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto error;
+
+ BUG_ON(ret == 0);
+
+ ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
+ BTRFS_DEV_ITEM_KEY);
+ if (ret) {
+ *objectid = 1;
+ } else {
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+ path->slots[0]);
+ *objectid = found_key.offset + 1;
+ }
+ ret = 0;
+error:
+ btrfs_release_path(path);
+ return ret;
+}
+
+/*
+ * the device information is stored in the chunk root
+ * the btrfs_device struct should be fully filled in
+ */
+int btrfs_add_device(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_device *device)
+{
+ int ret;
+ struct btrfs_path *path;
+ struct btrfs_dev_item *dev_item;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ unsigned long ptr;
+ u64 free_devid = 0;
+
+ root = root->fs_info->chunk_root;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = find_next_devid(root, path, &free_devid);
+ if (ret)
+ goto out;
+
+ key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ key.type = BTRFS_DEV_ITEM_KEY;
+ key.offset = free_devid;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ sizeof(*dev_item));
+ if (ret)
+ goto out;
+
+ leaf = path->nodes[0];
+ dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
+
+ device->devid = free_devid;
+ btrfs_set_device_id(leaf, dev_item, device->devid);
+ btrfs_set_device_generation(leaf, dev_item, 0);
+ btrfs_set_device_type(leaf, dev_item, device->type);
+ btrfs_set_device_io_align(leaf, dev_item, device->io_align);
+ btrfs_set_device_io_width(leaf, dev_item, device->io_width);
+ btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
+ btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
+ btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
+ btrfs_set_device_group(leaf, dev_item, 0);
+ btrfs_set_device_seek_speed(leaf, dev_item, 0);
+ btrfs_set_device_bandwidth(leaf, dev_item, 0);
+ btrfs_set_device_start_offset(leaf, dev_item, 0);
+
+ ptr = (unsigned long)btrfs_device_uuid(dev_item);
+ write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
+ ptr = (unsigned long)btrfs_device_fsid(dev_item);
+ write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
+ btrfs_mark_buffer_dirty(leaf);
+ ret = 0;
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+int btrfs_update_device(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device)
+{
+ int ret;
+ struct btrfs_path *path;
+ struct btrfs_root *root;
+ struct btrfs_dev_item *dev_item;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+
+ root = device->dev_root->fs_info->chunk_root;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ key.type = BTRFS_DEV_ITEM_KEY;
+ key.offset = device->devid;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret < 0)
+ goto out;
+
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
+
+ btrfs_set_device_id(leaf, dev_item, device->devid);
+ btrfs_set_device_type(leaf, dev_item, device->type);
+ btrfs_set_device_io_align(leaf, dev_item, device->io_align);
+ btrfs_set_device_io_width(leaf, dev_item, device->io_width);
+ btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
+ btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
+ btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
+ btrfs_mark_buffer_dirty(leaf);
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_key *key,
+ struct btrfs_chunk *chunk, int item_size)
+{
+ struct btrfs_super_block *super_copy = root->fs_info->super_copy;
+ struct btrfs_disk_key disk_key;
+ u32 array_size;
+ u8 *ptr;
+
+ array_size = btrfs_super_sys_array_size(super_copy);
+ if (array_size + item_size + sizeof(disk_key)
+ > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
+ return -EFBIG;
+
+ ptr = super_copy->sys_chunk_array + array_size;
+ btrfs_cpu_key_to_disk(&disk_key, key);
+ memcpy(ptr, &disk_key, sizeof(disk_key));
+ ptr += sizeof(disk_key);
+ memcpy(ptr, chunk, item_size);
+ item_size += sizeof(disk_key);
+ btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
+ return 0;
+}
+
+static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes,
+ int sub_stripes)
+{
+ if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP))
+ return calc_size;
+ else if (type & BTRFS_BLOCK_GROUP_RAID10)
+ return calc_size * (num_stripes / sub_stripes);
+ else if (type & BTRFS_BLOCK_GROUP_RAID5)
+ return calc_size * (num_stripes - 1);
+ else if (type & BTRFS_BLOCK_GROUP_RAID6)
+ return calc_size * (num_stripes - 2);
+ else
+ return calc_size * num_stripes;
+}
+
+
+static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
+{
+ /* TODO, add a way to store the preferred stripe size */
+ return BTRFS_STRIPE_LEN;
+}
+
+/*
+ * btrfs_device_avail_bytes - count bytes available for alloc_chunk
+ *
+ * It is not equal to "device->total_bytes - device->bytes_used".
+ * We do not allocate any chunk in 1M at beginning of device, and not
+ * allowed to allocate any chunk before alloc_start if it is specified.
+ * So search holes from max(1M, alloc_start) to device->total_bytes.
+ */
+static int btrfs_device_avail_bytes(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device,
+ u64 *avail_bytes)
+{
+ struct btrfs_path *path;
+ struct btrfs_root *root = device->dev_root;
+ struct btrfs_key key;
+ struct btrfs_dev_extent *dev_extent = NULL;
+ struct extent_buffer *l;
+ u64 search_start = root->fs_info->alloc_start;
+ u64 search_end = device->total_bytes;
+ u64 extent_end = 0;
+ u64 free_bytes = 0;
+ int ret;
+ int slot = 0;
+
+ search_start = max(BTRFS_BLOCK_RESERVED_1M_FOR_SUPER, search_start);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = device->devid;
+ key.offset = root->fs_info->alloc_start;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+
+ path->reada = 2;
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto error;
+ ret = btrfs_previous_item(root, path, 0, key.type);
+ if (ret < 0)
+ goto error;
+
+ while (1) {
+ l = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(l)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret == 0)
+ continue;
+ if (ret < 0)
+ goto error;
+ break;
+ }
+ btrfs_item_key_to_cpu(l, &key, slot);
+
+ if (key.objectid < device->devid)
+ goto next;
+ if (key.objectid > device->devid)
+ break;
+ if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
+ goto next;
+ if (key.offset > search_end)
+ break;
+ if (key.offset > search_start)
+ free_bytes += key.offset - search_start;
+
+ dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
+ extent_end = key.offset + btrfs_dev_extent_length(l,
+ dev_extent);
+ if (extent_end > search_start)
+ search_start = extent_end;
+ if (search_start > search_end)
+ break;
+next:
+ path->slots[0]++;
+ cond_resched();
+ }
+
+ if (search_start < search_end)
+ free_bytes += search_end - search_start;
+
+ *avail_bytes = free_bytes;
+ ret = 0;
+error:
+ btrfs_free_path(path);
+ return ret;
+}
+
+#define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r) \
+ - sizeof(struct btrfs_item) \
+ - sizeof(struct btrfs_chunk)) \
+ / sizeof(struct btrfs_stripe) + 1)
+
+#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \
+ - 2 * sizeof(struct btrfs_disk_key) \
+ - 2 * sizeof(struct btrfs_chunk)) \
+ / sizeof(struct btrfs_stripe) + 1)
+
+int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *extent_root, u64 *start,
+ u64 *num_bytes, u64 type)
+{
+ u64 dev_offset;
+ struct btrfs_fs_info *info = extent_root->fs_info;
+ struct btrfs_root *chunk_root = info->chunk_root;
+ struct btrfs_stripe *stripes;
+ struct btrfs_device *device = NULL;
+ struct btrfs_chunk *chunk;
+ struct list_head private_devs;
+ struct list_head *dev_list = &info->fs_devices->devices;
+ struct list_head *cur;
+ struct map_lookup *map;
+ int min_stripe_size = 1 * 1024 * 1024;
+ u64 calc_size = 8 * 1024 * 1024;
+ u64 min_free;
+ u64 max_chunk_size = 4 * calc_size;
+ u64 avail = 0;
+ u64 max_avail = 0;
+ u64 percent_max;
+ int num_stripes = 1;
+ int max_stripes = 0;
+ int min_stripes = 1;
+ int sub_stripes = 0;
+ int looped = 0;
+ int ret;
+ int index;
+ int stripe_len = BTRFS_STRIPE_LEN;
+ struct btrfs_key key;
+ u64 offset;
+
+ if (list_empty(dev_list)) {
+ return -ENOSPC;
+ }
+
+ if (type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
+ BTRFS_BLOCK_GROUP_RAID10 |
+ BTRFS_BLOCK_GROUP_DUP)) {
+ if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
+ calc_size = 8 * 1024 * 1024;
+ max_chunk_size = calc_size * 2;
+ min_stripe_size = 1 * 1024 * 1024;
+ max_stripes = BTRFS_MAX_DEVS_SYS_CHUNK;
+ } else if (type & BTRFS_BLOCK_GROUP_DATA) {
+ calc_size = 1024 * 1024 * 1024;
+ max_chunk_size = 10 * calc_size;
+ min_stripe_size = 64 * 1024 * 1024;
+ max_stripes = BTRFS_MAX_DEVS(chunk_root);
+ } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
+ calc_size = 1024 * 1024 * 1024;
+ max_chunk_size = 4 * calc_size;
+ min_stripe_size = 32 * 1024 * 1024;
+ max_stripes = BTRFS_MAX_DEVS(chunk_root);
+ }
+ }
+ if (type & BTRFS_BLOCK_GROUP_RAID1) {
+ num_stripes = min_t(u64, 2,
+ btrfs_super_num_devices(info->super_copy));
+ if (num_stripes < 2)
+ return -ENOSPC;
+ min_stripes = 2;
+ }
+ if (type & BTRFS_BLOCK_GROUP_DUP) {
+ num_stripes = 2;
+ min_stripes = 2;
+ }
+ if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
+ num_stripes = btrfs_super_num_devices(info->super_copy);
+ if (num_stripes > max_stripes)
+ num_stripes = max_stripes;
+ min_stripes = 2;
+ }
+ if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
+ num_stripes = btrfs_super_num_devices(info->super_copy);
+ if (num_stripes > max_stripes)
+ num_stripes = max_stripes;
+ if (num_stripes < 4)
+ return -ENOSPC;
+ num_stripes &= ~(u32)1;
+ sub_stripes = 2;
+ min_stripes = 4;
+ }
+ if (type & (BTRFS_BLOCK_GROUP_RAID5)) {
+ num_stripes = btrfs_super_num_devices(info->super_copy);
+ if (num_stripes > max_stripes)
+ num_stripes = max_stripes;
+ if (num_stripes < 2)
+ return -ENOSPC;
+ min_stripes = 2;
+ stripe_len = find_raid56_stripe_len(num_stripes - 1,
+ btrfs_super_stripesize(info->super_copy));
+ }
+ if (type & (BTRFS_BLOCK_GROUP_RAID6)) {
+ num_stripes = btrfs_super_num_devices(info->super_copy);
+ if (num_stripes > max_stripes)
+ num_stripes = max_stripes;
+ if (num_stripes < 3)
+ return -ENOSPC;
+ min_stripes = 3;
+ stripe_len = find_raid56_stripe_len(num_stripes - 2,
+ btrfs_super_stripesize(info->super_copy));
+ }
+
+ /* we don't want a chunk larger than 10% of the FS */
+ percent_max = div_factor(btrfs_super_total_bytes(info->super_copy), 1);
+ max_chunk_size = min(percent_max, max_chunk_size);
+
+again:
+ if (chunk_bytes_by_type(type, calc_size, num_stripes, sub_stripes) >
+ max_chunk_size) {
+ calc_size = max_chunk_size;
+ calc_size /= num_stripes;
+ calc_size /= stripe_len;
+ calc_size *= stripe_len;
+ }
+ /* we don't want tiny stripes */
+ calc_size = max_t(u64, calc_size, min_stripe_size);
+
+ calc_size /= stripe_len;
+ calc_size *= stripe_len;
+ INIT_LIST_HEAD(&private_devs);
+ cur = dev_list->next;
+ index = 0;
+
+ if (type & BTRFS_BLOCK_GROUP_DUP)
+ min_free = calc_size * 2;
+ else
+ min_free = calc_size;
+
+ /* build a private list of devices we will allocate from */
+ while(index < num_stripes) {
+ device = list_entry(cur, struct btrfs_device, dev_list);
+ ret = btrfs_device_avail_bytes(trans, device, &avail);
+ if (ret)
+ return ret;
+ cur = cur->next;
+ if (avail >= min_free) {
+ list_move_tail(&device->dev_list, &private_devs);
+ index++;
+ if (type & BTRFS_BLOCK_GROUP_DUP)
+ index++;
+ } else if (avail > max_avail)
+ max_avail = avail;
+ if (cur == dev_list)
+ break;
+ }
+ if (index < num_stripes) {
+ list_splice(&private_devs, dev_list);
+ if (index >= min_stripes) {
+ num_stripes = index;
+ if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
+ num_stripes /= sub_stripes;
+ num_stripes *= sub_stripes;
+ }
+ looped = 1;
+ goto again;
+ }
+ if (!looped && max_avail > 0) {
+ looped = 1;
+ calc_size = max_avail;
+ goto again;
+ }
+ return -ENOSPC;
+ }
+ ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+ &offset);
+ if (ret)
+ return ret;
+ key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+ key.offset = offset;
+
+ chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
+ if (!chunk)
+ return -ENOMEM;
+
+ map = kmalloc(btrfs_map_lookup_size(num_stripes), GFP_NOFS);
+ if (!map) {
+ kfree(chunk);
+ return -ENOMEM;
+ }
+
+ stripes = &chunk->stripe;
+ *num_bytes = chunk_bytes_by_type(type, calc_size,
+ num_stripes, sub_stripes);
+ index = 0;
+ while(index < num_stripes) {
+ struct btrfs_stripe *stripe;
+ BUG_ON(list_empty(&private_devs));
+ cur = private_devs.next;
+ device = list_entry(cur, struct btrfs_device, dev_list);
+
+ /* loop over this device again if we're doing a dup group */
+ if (!(type & BTRFS_BLOCK_GROUP_DUP) ||
+ (index == num_stripes - 1))
+ list_move_tail(&device->dev_list, dev_list);
+
+ ret = btrfs_alloc_dev_extent(trans, device,
+ info->chunk_root->root_key.objectid,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset,
+ calc_size, &dev_offset);
+ BUG_ON(ret);
+
+ device->bytes_used += calc_size;
+ ret = btrfs_update_device(trans, device);
+ BUG_ON(ret);
+
+ map->stripes[index].dev = device;
+ map->stripes[index].physical = dev_offset;
+ stripe = stripes + index;
+ btrfs_set_stack_stripe_devid(stripe, device->devid);
+ btrfs_set_stack_stripe_offset(stripe, dev_offset);
+ memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
+ index++;
+ }
+ BUG_ON(!list_empty(&private_devs));
+
+ /* key was set above */
+ btrfs_set_stack_chunk_length(chunk, *num_bytes);
+ btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
+ btrfs_set_stack_chunk_stripe_len(chunk, stripe_len);
+ btrfs_set_stack_chunk_type(chunk, type);
+ btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
+ btrfs_set_stack_chunk_io_align(chunk, stripe_len);
+ btrfs_set_stack_chunk_io_width(chunk, stripe_len);
+ btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
+ btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes);
+ map->sector_size = extent_root->sectorsize;
+ map->stripe_len = stripe_len;
+ map->io_align = stripe_len;
+ map->io_width = stripe_len;
+ map->type = type;
+ map->num_stripes = num_stripes;
+ map->sub_stripes = sub_stripes;
+
+ ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
+ btrfs_chunk_item_size(num_stripes));
+ BUG_ON(ret);
+ *start = key.offset;;
+
+ map->ce.start = key.offset;
+ map->ce.size = *num_bytes;
+
+ ret = insert_cache_extent(&info->mapping_tree.cache_tree, &map->ce);
+ BUG_ON(ret);
+
+ if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
+ ret = btrfs_add_system_chunk(trans, chunk_root, &key,
+ chunk, btrfs_chunk_item_size(num_stripes));
+ BUG_ON(ret);
+ }
+
+ kfree(chunk);
+ return ret;
+}
+
+int btrfs_alloc_data_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *extent_root, u64 *start,
+ u64 num_bytes, u64 type)
+{
+ u64 dev_offset;
+ struct btrfs_fs_info *info = extent_root->fs_info;
+ struct btrfs_root *chunk_root = info->chunk_root;
+ struct btrfs_stripe *stripes;
+ struct btrfs_device *device = NULL;
+ struct btrfs_chunk *chunk;
+ struct list_head *dev_list = &info->fs_devices->devices;
+ struct list_head *cur;
+ struct map_lookup *map;
+ u64 calc_size = 8 * 1024 * 1024;
+ int num_stripes = 1;
+ int sub_stripes = 0;
+ int ret;
+ int index;
+ int stripe_len = BTRFS_STRIPE_LEN;
+ struct btrfs_key key;
+
+ key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+ ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+ &key.offset);
+ if (ret)
+ return ret;
+
+ chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
+ if (!chunk)
+ return -ENOMEM;
+
+ map = kmalloc(btrfs_map_lookup_size(num_stripes), GFP_NOFS);
+ if (!map) {
+ kfree(chunk);
+ return -ENOMEM;
+ }
+
+ stripes = &chunk->stripe;
+ calc_size = num_bytes;
+
+ index = 0;
+ cur = dev_list->next;
+ device = list_entry(cur, struct btrfs_device, dev_list);
+
+ while (index < num_stripes) {
+ struct btrfs_stripe *stripe;
+
+ ret = btrfs_alloc_dev_extent(trans, device,
+ info->chunk_root->root_key.objectid,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset,
+ calc_size, &dev_offset);
+ BUG_ON(ret);
+
+ device->bytes_used += calc_size;
+ ret = btrfs_update_device(trans, device);
+ BUG_ON(ret);
+
+ map->stripes[index].dev = device;
+ map->stripes[index].physical = dev_offset;
+ stripe = stripes + index;
+ btrfs_set_stack_stripe_devid(stripe, device->devid);
+ btrfs_set_stack_stripe_offset(stripe, dev_offset);
+ memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
+ index++;
+ }
+
+ /* key was set above */
+ btrfs_set_stack_chunk_length(chunk, num_bytes);
+ btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
+ btrfs_set_stack_chunk_stripe_len(chunk, stripe_len);
+ btrfs_set_stack_chunk_type(chunk, type);
+ btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
+ btrfs_set_stack_chunk_io_align(chunk, stripe_len);
+ btrfs_set_stack_chunk_io_width(chunk, stripe_len);
+ btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
+ btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes);
+ map->sector_size = extent_root->sectorsize;
+ map->stripe_len = stripe_len;
+ map->io_align = stripe_len;
+ map->io_width = stripe_len;
+ map->type = type;
+ map->num_stripes = num_stripes;
+ map->sub_stripes = sub_stripes;
+
+ ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
+ btrfs_chunk_item_size(num_stripes));
+ BUG_ON(ret);
+ *start = key.offset;
+
+ map->ce.start = key.offset;
+ map->ce.size = num_bytes;
+
+ ret = insert_cache_extent(&info->mapping_tree.cache_tree, &map->ce);
+ BUG_ON(ret);
+
+ kfree(chunk);
+ return ret;
+}
+
+int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
+{
+ struct cache_extent *ce;
+ struct map_lookup *map;
+ int ret;
+
+ ce = search_cache_extent(&map_tree->cache_tree, logical);
+ if (!ce) {
+ fprintf(stderr, "No mapping for %llu-%llu\n",
+ (unsigned long long)logical,
+ (unsigned long long)logical+len);
+ return 1;
+ }
+ if (ce->start > logical || ce->start + ce->size < logical) {
+ fprintf(stderr, "Invalid mapping for %llu-%llu, got "
+ "%llu-%llu\n", (unsigned long long)logical,
+ (unsigned long long)logical+len,
+ (unsigned long long)ce->start,
+ (unsigned long long)ce->start + ce->size);
+ return 1;
+ }
+ map = container_of(ce, struct map_lookup, ce);
+
+ if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
+ ret = map->num_stripes;
+ else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
+ ret = map->sub_stripes;
+ else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
+ ret = 2;
+ else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+ ret = 3;
+ else
+ ret = 1;
+ return ret;
+}
+
+int btrfs_next_bg(struct btrfs_mapping_tree *map_tree, u64 *logical,
+ u64 *size, u64 type)
+{
+ struct cache_extent *ce;
+ struct map_lookup *map;
+ u64 cur = *logical;
+
+ ce = search_cache_extent(&map_tree->cache_tree, cur);
+
+ while (ce) {
+ /*
+ * only jump to next bg if our cur is not 0
+ * As the initial logical for btrfs_next_bg() is 0, and
+ * if we jump to next bg, we skipped a valid bg.
+ */
+ if (cur) {
+ ce = next_cache_extent(ce);
+ if (!ce)
+ return -ENOENT;
+ }
+
+ cur = ce->start;
+ map = container_of(ce, struct map_lookup, ce);
+ if (map->type & type) {
+ *logical = ce->start;
+ *size = ce->size;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
+ u64 chunk_start, u64 physical, u64 devid,
+ u64 **logical, int *naddrs, int *stripe_len)
+{
+ struct cache_extent *ce;
+ struct map_lookup *map;
+ u64 *buf;
+ u64 bytenr;
+ u64 length;
+ u64 stripe_nr;
+ u64 rmap_len;
+ int i, j, nr = 0;
+
+ ce = search_cache_extent(&map_tree->cache_tree, chunk_start);
+ BUG_ON(!ce);
+ map = container_of(ce, struct map_lookup, ce);
+
+ length = ce->size;
+ rmap_len = map->stripe_len;
+ if (map->type & BTRFS_BLOCK_GROUP_RAID10)
+ length = ce->size / (map->num_stripes / map->sub_stripes);
+ else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
+ length = ce->size / map->num_stripes;
+ else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+ BTRFS_BLOCK_GROUP_RAID6)) {
+ length = ce->size / nr_data_stripes(map);
+ rmap_len = map->stripe_len * nr_data_stripes(map);
+ }
+
+ buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
+
+ for (i = 0; i < map->num_stripes; i++) {
+ if (devid && map->stripes[i].dev->devid != devid)
+ continue;
+ if (map->stripes[i].physical > physical ||
+ map->stripes[i].physical + length <= physical)
+ continue;
+
+ stripe_nr = (physical - map->stripes[i].physical) /
+ map->stripe_len;
+
+ if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+ stripe_nr = (stripe_nr * map->num_stripes + i) /
+ map->sub_stripes;
+ } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
+ stripe_nr = stripe_nr * map->num_stripes + i;
+ } /* else if RAID[56], multiply by nr_data_stripes().
+ * Alternatively, just use rmap_len below instead of
+ * map->stripe_len */
+
+ bytenr = ce->start + stripe_nr * rmap_len;
+ for (j = 0; j < nr; j++) {
+ if (buf[j] == bytenr)
+ break;
+ }
+ if (j == nr)
+ buf[nr++] = bytenr;
+ }
+
+ *logical = buf;
+ *naddrs = nr;
+ *stripe_len = rmap_len;
+
+ return 0;
+}
+
+static inline int parity_smaller(u64 a, u64 b)
+{
+ return a > b;
+}
+
+/* Bubble-sort the stripe set to put the parity/syndrome stripes last */
+static void sort_parity_stripes(struct btrfs_multi_bio *bbio, u64 *raid_map)
+{
+ struct btrfs_bio_stripe s;
+ int i;
+ u64 l;
+ int again = 1;
+
+ while (again) {
+ again = 0;
+ for (i = 0; i < bbio->num_stripes - 1; i++) {
+ if (parity_smaller(raid_map[i], raid_map[i+1])) {
+ s = bbio->stripes[i];
+ l = raid_map[i];
+ bbio->stripes[i] = bbio->stripes[i+1];
+ raid_map[i] = raid_map[i+1];
+ bbio->stripes[i+1] = s;
+ raid_map[i+1] = l;
+ again = 1;
+ }
+ }
+ }
+}
+
+int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+ u64 logical, u64 *length,
+ struct btrfs_multi_bio **multi_ret, int mirror_num,
+ u64 **raid_map_ret)
+{
+ return __btrfs_map_block(map_tree, rw, logical, length, NULL,
+ multi_ret, mirror_num, raid_map_ret);
+}
+
+int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+ u64 logical, u64 *length, u64 *type,
+ struct btrfs_multi_bio **multi_ret, int mirror_num,
+ u64 **raid_map_ret)
+{
+ struct cache_extent *ce;
+ struct map_lookup *map;
+ u64 offset;
+ u64 stripe_offset;
+ u64 stripe_nr;
+ u64 *raid_map = NULL;
+ int stripes_allocated = 8;
+ int stripes_required = 1;
+ int stripe_index;
+ int i;
+ struct btrfs_multi_bio *multi = NULL;
+
+ if (multi_ret && rw == READ) {
+ stripes_allocated = 1;
+ }
+again:
+ ce = search_cache_extent(&map_tree->cache_tree, logical);
+ if (!ce) {
+ kfree(multi);
+ *length = (u64)-1;
+ return -ENOENT;
+ }
+ if (ce->start > logical) {
+ kfree(multi);
+ *length = ce->start - logical;
+ return -ENOENT;
+ }
+
+ if (multi_ret) {
+ multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
+ GFP_NOFS);
+ if (!multi)
+ return -ENOMEM;
+ }
+ map = container_of(ce, struct map_lookup, ce);
+ offset = logical - ce->start;
+
+ if (rw == WRITE) {
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_DUP)) {
+ stripes_required = map->num_stripes;
+ } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+ stripes_required = map->sub_stripes;
+ }
+ }
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)
+ && multi_ret && ((rw & WRITE) || mirror_num > 1) && raid_map_ret) {
+ /* RAID[56] write or recovery. Return all stripes */
+ stripes_required = map->num_stripes;
+
+ /* Only allocate the map if we've already got a large enough multi_ret */
+ if (stripes_allocated >= stripes_required) {
+ raid_map = kmalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
+ if (!raid_map) {
+ kfree(multi);
+ return -ENOMEM;
+ }
+ }
+ }
+
+ /* if our multi bio struct is too small, back off and try again */
+ if (multi_ret && stripes_allocated < stripes_required) {
+ stripes_allocated = stripes_required;
+ kfree(multi);
+ multi = NULL;
+ goto again;
+ }
+ stripe_nr = offset;
+ /*
+ * stripe_nr counts the total number of stripes we have to stride
+ * to get to this block
+ */
+ stripe_nr = stripe_nr / map->stripe_len;
+
+ stripe_offset = stripe_nr * map->stripe_len;
+ BUG_ON(offset < stripe_offset);
+
+ /* stripe_offset is the offset of this block in its stripe*/
+ stripe_offset = offset - stripe_offset;
+
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
+ BTRFS_BLOCK_GROUP_RAID10 |
+ BTRFS_BLOCK_GROUP_DUP)) {
+ /* we limit the length of each bio to what fits in a stripe */
+ *length = min_t(u64, ce->size - offset,
+ map->stripe_len - stripe_offset);
+ } else {
+ *length = ce->size - offset;
+ }
+
+ if (!multi_ret)
+ goto out;
+
+ multi->num_stripes = 1;
+ stripe_index = 0;
+ if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
+ if (rw == WRITE)
+ multi->num_stripes = map->num_stripes;
+ else if (mirror_num)
+ stripe_index = mirror_num - 1;
+ else
+ stripe_index = stripe_nr % map->num_stripes;
+ } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+ int factor = map->num_stripes / map->sub_stripes;
+
+ stripe_index = stripe_nr % factor;
+ stripe_index *= map->sub_stripes;
+
+ if (rw == WRITE)
+ multi->num_stripes = map->sub_stripes;
+ else if (mirror_num)
+ stripe_index += mirror_num - 1;
+
+ stripe_nr = stripe_nr / factor;
+ } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
+ if (rw == WRITE)
+ multi->num_stripes = map->num_stripes;
+ else if (mirror_num)
+ stripe_index = mirror_num - 1;
+ } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+ BTRFS_BLOCK_GROUP_RAID6)) {
+
+ if (raid_map) {
+ int rot;
+ u64 tmp;
+ u64 raid56_full_stripe_start;
+ u64 full_stripe_len = nr_data_stripes(map) * map->stripe_len;
+
+ /*
+ * align the start of our data stripe in the logical
+ * address space
+ */
+ raid56_full_stripe_start = offset / full_stripe_len;
+ raid56_full_stripe_start *= full_stripe_len;
+
+ /* get the data stripe number */
+ stripe_nr = raid56_full_stripe_start / map->stripe_len;
+ stripe_nr = stripe_nr / nr_data_stripes(map);
+
+ /* Work out the disk rotation on this stripe-set */
+ rot = stripe_nr % map->num_stripes;
+
+ /* Fill in the logical address of each stripe */
+ tmp = stripe_nr * nr_data_stripes(map);
+
+ for (i = 0; i < nr_data_stripes(map); i++)
+ raid_map[(i+rot) % map->num_stripes] =
+ ce->start + (tmp + i) * map->stripe_len;
+
+ raid_map[(i+rot) % map->num_stripes] = BTRFS_RAID5_P_STRIPE;
+ if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+ raid_map[(i+rot+1) % map->num_stripes] = BTRFS_RAID6_Q_STRIPE;
+
+ *length = map->stripe_len;
+ stripe_index = 0;
+ stripe_offset = 0;
+ multi->num_stripes = map->num_stripes;
+ } else {
+ stripe_index = stripe_nr % nr_data_stripes(map);
+ stripe_nr = stripe_nr / nr_data_stripes(map);
+
+ /*
+ * Mirror #0 or #1 means the original data block.
+ * Mirror #2 is RAID5 parity block.
+ * Mirror #3 is RAID6 Q block.
+ */
+ if (mirror_num > 1)
+ stripe_index = nr_data_stripes(map) + mirror_num - 2;
+
+ /* We distribute the parity blocks across stripes */
+ stripe_index = (stripe_nr + stripe_index) % map->num_stripes;
+ }
+ } else {
+ /*
+ * after this do_div call, stripe_nr is the number of stripes
+ * on this device we have to walk to find the data, and
+ * stripe_index is the number of our device in the stripe array
+ */
+ stripe_index = stripe_nr % map->num_stripes;
+ stripe_nr = stripe_nr / map->num_stripes;
+ }
+ BUG_ON(stripe_index >= map->num_stripes);
+
+ for (i = 0; i < multi->num_stripes; i++) {
+ multi->stripes[i].physical =
+ map->stripes[stripe_index].physical + stripe_offset +
+ stripe_nr * map->stripe_len;
+ multi->stripes[i].dev = map->stripes[stripe_index].dev;
+ stripe_index++;
+ }
+ *multi_ret = multi;
+
+ if (type)
+ *type = map->type;
+
+ if (raid_map) {
+ sort_parity_stripes(multi, raid_map);
+ *raid_map_ret = raid_map;
+ }
+out:
+ return 0;
+}
+
+struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
+ u8 *uuid, u8 *fsid)
+{
+ struct btrfs_device *device;
+ struct btrfs_fs_devices *cur_devices;
+
+ cur_devices = root->fs_info->fs_devices;
+ while (cur_devices) {
+ if (!fsid ||
+ (!memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE) ||
+ root->fs_info->ignore_fsid_mismatch)) {
+ device = __find_device(&cur_devices->devices,
+ devid, uuid);
+ if (device)
+ return device;
+ }
+ cur_devices = cur_devices->seed;
+ }
+ return NULL;
+}
+
+struct btrfs_device *
+btrfs_find_device_by_devid(struct btrfs_fs_devices *fs_devices,
+ u64 devid, int instance)
+{
+ struct list_head *head = &fs_devices->devices;
+ struct btrfs_device *dev;
+ int num_found = 0;
+
+ list_for_each_entry(dev, head, dev_list) {
+ if (dev->devid == devid && num_found++ == instance)
+ return dev;
+ }
+ return NULL;
+}
+
+int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
+{
+ struct cache_extent *ce;
+ struct map_lookup *map;
+ struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+ int readonly = 0;
+ int i;
+
+ /*
+ * During chunk recovering, we may fail to find block group's
+ * corresponding chunk, we will rebuild it later
+ */
+ ce = search_cache_extent(&map_tree->cache_tree, chunk_offset);
+ if (!root->fs_info->is_chunk_recover)
+ BUG_ON(!ce);
+ else
+ return 0;
+
+ map = container_of(ce, struct map_lookup, ce);
+ for (i = 0; i < map->num_stripes; i++) {
+ if (!map->stripes[i].dev->writeable) {
+ readonly = 1;
+ break;
+ }
+ }
+
+ return readonly;
+}
+
+static struct btrfs_device *fill_missing_device(u64 devid)
+{
+ struct btrfs_device *device;
+
+ device = kzalloc(sizeof(*device), GFP_NOFS);
+ device->devid = devid;
+ device->fd = -1;
+ return device;
+}
+
+/*
+ * Slot is used to verfy the chunk item is valid
+ *
+ * For sys chunk in superblock, pass -1 to indicate sys chunk.
+ */
+static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
+ struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk, int slot)
+{
+ struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+ struct map_lookup *map;
+ struct cache_extent *ce;
+ u64 logical;
+ u64 length;
+ u64 stripe_len;
+ u64 devid;
+ u8 uuid[BTRFS_UUID_SIZE];
+ int num_stripes;
+ int ret;
+ int i;
+
+ logical = key->offset;
+ length = btrfs_chunk_length(leaf, chunk);
+ stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+ /* Validation check */
+ if (!num_stripes) {
+ error("invalid chunk num_stripes: %u", num_stripes);
+ return -EIO;
+ }
+ if (!IS_ALIGNED(logical, root->sectorsize)) {
+ error("invalid chunk logical %llu", logical);
+ return -EIO;
+ }
+ if (!length || !IS_ALIGNED(length, root->sectorsize)) {
+ error("invalid chunk length %llu", length);
+ return -EIO;
+ }
+ if (!is_power_of_2(stripe_len)) {
+ error("invalid chunk stripe length: %llu", stripe_len);
+ return -EIO;
+ }
+ if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+ btrfs_chunk_type(leaf, chunk)) {
+ error("unrecognized chunk type: %llu",
+ ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+ BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+ btrfs_chunk_type(leaf, chunk));
+ return -EIO;
+ }
+
+ ce = search_cache_extent(&map_tree->cache_tree, logical);
+
+ /* already mapped? */
+ if (ce && ce->start <= logical && ce->start + ce->size > logical) {
+ return 0;
+ }
+
+ map = kmalloc(btrfs_map_lookup_size(num_stripes), GFP_NOFS);
+ if (!map)
+ return -ENOMEM;
+
+ map->ce.start = logical;
+ map->ce.size = length;
+ map->num_stripes = num_stripes;
+ map->io_width = btrfs_chunk_io_width(leaf, chunk);
+ map->io_align = btrfs_chunk_io_align(leaf, chunk);
+ map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
+ map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ map->type = btrfs_chunk_type(leaf, chunk);
+ map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+
+ /* Check on chunk item type */
+ if (map->type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+ BTRFS_BLOCK_GROUP_PROFILE_MASK)) {
+ fprintf(stderr, "Unknown chunk type bits: %llu\n",
+ map->type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+ BTRFS_BLOCK_GROUP_PROFILE_MASK));
+ ret = -EIO;
+ goto out;
+ }
+
+ /*
+ * Btrfs_chunk contains at least one stripe, and for sys_chunk
+ * it can't exceed the system chunk array size
+ * For normal chunk, it should match its chunk item size.
+ */
+ if (num_stripes < 1 ||
+ (slot == -1 && sizeof(struct btrfs_stripe) * num_stripes >
+ BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) ||
+ (slot >= 0 && sizeof(struct btrfs_stripe) * (num_stripes - 1) >
+ btrfs_item_size_nr(leaf, slot))) {
+ fprintf(stderr, "Invalid num_stripes: %u\n",
+ num_stripes);
+ ret = -EIO;
+ goto out;
+ }
+
+ /*
+ * Device number check against profile
+ */
+ if ((map->type & BTRFS_BLOCK_GROUP_RAID10 && map->sub_stripes == 0) ||
+ (map->type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
+ (map->type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
+ (map->type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
+ (map->type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
+ ((map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
+ num_stripes != 1)) {
+ fprintf(stderr,
+ "Invalid num_stripes:sub_stripes %u:%u for profile %llu\n",
+ num_stripes, map->sub_stripes,
+ map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+ ret = -EIO;
+ goto out;
+ }
+
+ for (i = 0; i < num_stripes; i++) {
+ map->stripes[i].physical =
+ btrfs_stripe_offset_nr(leaf, chunk, i);
+ devid = btrfs_stripe_devid_nr(leaf, chunk, i);
+ read_extent_buffer(leaf, uuid, (unsigned long)
+ btrfs_stripe_dev_uuid_nr(chunk, i),
+ BTRFS_UUID_SIZE);
+ map->stripes[i].dev = btrfs_find_device(root, devid, uuid,
+ NULL);
+ if (!map->stripes[i].dev) {
+ map->stripes[i].dev = fill_missing_device(devid);
+ printf("warning, device %llu is missing\n",
+ (unsigned long long)devid);
+ }
+
+ }
+ ret = insert_cache_extent(&map_tree->cache_tree, &map->ce);
+ BUG_ON(ret);
+
+ return 0;
+out:
+ free(map);
+ return ret;
+}
+
+static int fill_device_from_item(struct extent_buffer *leaf,
+ struct btrfs_dev_item *dev_item,
+ struct btrfs_device *device)
+{
+ unsigned long ptr;
+
+ device->devid = btrfs_device_id(leaf, dev_item);
+ device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+ device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
+ device->type = btrfs_device_type(leaf, dev_item);
+ device->io_align = btrfs_device_io_align(leaf, dev_item);
+ device->io_width = btrfs_device_io_width(leaf, dev_item);
+ device->sector_size = btrfs_device_sector_size(leaf, dev_item);
+
+ ptr = (unsigned long)btrfs_device_uuid(dev_item);
+ read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
+
+ return 0;
+}
+
+static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
+{
+ struct btrfs_fs_devices *fs_devices;
+ int ret;
+
+ fs_devices = root->fs_info->fs_devices->seed;
+ while (fs_devices) {
+ if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
+ ret = 0;
+ goto out;
+ }
+ fs_devices = fs_devices->seed;
+ }
+
+ fs_devices = find_fsid(fsid);
+ if (!fs_devices) {
+ /* missing all seed devices */
+ fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
+ if (!fs_devices) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ INIT_LIST_HEAD(&fs_devices->devices);
+ list_add(&fs_devices->list, &fs_uuids);
+ memcpy(fs_devices->fsid, fsid, BTRFS_FSID_SIZE);
+ }
+
+ ret = btrfs_open_devices(fs_devices, O_RDONLY);
+ if (ret)
+ goto out;
+
+ fs_devices->seed = root->fs_info->fs_devices->seed;
+ root->fs_info->fs_devices->seed = fs_devices;
+out:
+ return ret;
+}
+
+static int read_one_dev(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_dev_item *dev_item)
+{
+ struct btrfs_device *device;
+ u64 devid;
+ int ret = 0;
+ u8 fs_uuid[BTRFS_UUID_SIZE];
+ u8 dev_uuid[BTRFS_UUID_SIZE];
+
+ devid = btrfs_device_id(leaf, dev_item);
+ read_extent_buffer(leaf, dev_uuid,
+ (unsigned long)btrfs_device_uuid(dev_item),
+ BTRFS_UUID_SIZE);
+ read_extent_buffer(leaf, fs_uuid,
+ (unsigned long)btrfs_device_fsid(dev_item),
+ BTRFS_UUID_SIZE);
+
+ if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
+ ret = open_seed_devices(root, fs_uuid);
+ if (ret)
+ return ret;
+ }
+
+ device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
+ if (!device) {
+ printk("warning devid %llu not found already\n",
+ (unsigned long long)devid);
+ device = kzalloc(sizeof(*device), GFP_NOFS);
+ if (!device)
+ return -ENOMEM;
+ device->fd = -1;
+ list_add(&device->dev_list,
+ &root->fs_info->fs_devices->devices);
+ }
+
+ fill_device_from_item(leaf, dev_item, device);
+ device->dev_root = root->fs_info->dev_root;
+ return ret;
+}
+
+int btrfs_read_sys_array(struct btrfs_root *root)
+{
+ struct btrfs_super_block *super_copy = root->fs_info->super_copy;
+ struct extent_buffer *sb;
+ struct btrfs_disk_key *disk_key;
+ struct btrfs_chunk *chunk;
+ u8 *array_ptr;
+ unsigned long sb_array_offset;
+ int ret = 0;
+ u32 num_stripes;
+ u32 array_size;
+ u32 len = 0;
+ u32 cur_offset;
+ struct btrfs_key key;
+
+ sb = btrfs_find_create_tree_block(root->fs_info,
+ BTRFS_SUPER_INFO_OFFSET,
+ BTRFS_SUPER_INFO_SIZE);
+ if (!sb)
+ return -ENOMEM;
+ btrfs_set_buffer_uptodate(sb);
+ write_extent_buffer(sb, super_copy, 0, sizeof(*super_copy));
+ array_size = btrfs_super_sys_array_size(super_copy);
+
+ array_ptr = super_copy->sys_chunk_array;
+ sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
+ cur_offset = 0;
+
+ while (cur_offset < array_size) {
+ disk_key = (struct btrfs_disk_key *)array_ptr;
+ len = sizeof(*disk_key);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
+
+ btrfs_disk_key_to_cpu(&key, disk_key);
+
+ array_ptr += len;
+ sb_array_offset += len;
+ cur_offset += len;
+
+ if (key.type == BTRFS_CHUNK_ITEM_KEY) {
+ chunk = (struct btrfs_chunk *)sb_array_offset;
+ /*
+ * At least one btrfs_chunk with one stripe must be
+ * present, exact stripe count check comes afterwards
+ */
+ len = btrfs_chunk_item_size(1);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
+
+ num_stripes = btrfs_chunk_num_stripes(sb, chunk);
+ if (!num_stripes) {
+ printk(
+ "ERROR: invalid number of stripes %u in sys_array at offset %u\n",
+ num_stripes, cur_offset);
+ ret = -EIO;
+ break;
+ }
+
+ len = btrfs_chunk_item_size(num_stripes);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
+
+ ret = read_one_chunk(root, &key, sb, chunk, -1);
+ if (ret)
+ break;
+ } else {
+ printk(
+ "ERROR: unexpected item type %u in sys_array at offset %u\n",
+ (u32)key.type, cur_offset);
+ ret = -EIO;
+ break;
+ }
+ array_ptr += len;
+ sb_array_offset += len;
+ cur_offset += len;
+ }
+ free_extent_buffer(sb);
+ return ret;
+
+out_short_read:
+ printk("ERROR: sys_array too short to read %u bytes at offset %u\n",
+ len, cur_offset);
+ free_extent_buffer(sb);
+ return -EIO;
+}
+
+int btrfs_read_chunk_tree(struct btrfs_root *root)
+{
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ int ret;
+ int slot;
+
+ root = root->fs_info->chunk_root;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ /*
+ * Read all device items, and then all the chunk items. All
+ * device items are found before any chunk item (their object id
+ * is smaller than the lowest possible object id for a chunk
+ * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID).
+ */
+ key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ key.offset = 0;
+ key.type = 0;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto error;
+ while(1) {
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret == 0)
+ continue;
+ if (ret < 0)
+ goto error;
+ break;
+ }
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+ if (found_key.type == BTRFS_DEV_ITEM_KEY) {
+ struct btrfs_dev_item *dev_item;
+ dev_item = btrfs_item_ptr(leaf, slot,
+ struct btrfs_dev_item);
+ ret = read_one_dev(root, leaf, dev_item);
+ BUG_ON(ret);
+ } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
+ struct btrfs_chunk *chunk;
+ chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
+ ret = read_one_chunk(root, &found_key, leaf, chunk,
+ slot);
+ BUG_ON(ret);
+ }
+ path->slots[0]++;
+ }
+
+ ret = 0;
+error:
+ btrfs_free_path(path);
+ return ret;
+}
+
+struct list_head *btrfs_scanned_uuids(void)
+{
+ return &fs_uuids;
+}
+
+static int rmw_eb(struct btrfs_fs_info *info,
+ struct extent_buffer *eb, struct extent_buffer *orig_eb)
+{
+ int ret;
+ unsigned long orig_off = 0;
+ unsigned long dest_off = 0;
+ unsigned long copy_len = eb->len;
+
+ ret = read_whole_eb(info, eb, 0);
+ if (ret)
+ return ret;
+
+ if (eb->start + eb->len <= orig_eb->start ||
+ eb->start >= orig_eb->start + orig_eb->len)
+ return 0;
+ /*
+ * | ----- orig_eb ------- |
+ * | ----- stripe ------- |
+ * | ----- orig_eb ------- |
+ * | ----- orig_eb ------- |
+ */
+ if (eb->start > orig_eb->start)
+ orig_off = eb->start - orig_eb->start;
+ if (orig_eb->start > eb->start)
+ dest_off = orig_eb->start - eb->start;
+
+ if (copy_len > orig_eb->len - orig_off)
+ copy_len = orig_eb->len - orig_off;
+ if (copy_len > eb->len - dest_off)
+ copy_len = eb->len - dest_off;
+
+ memcpy(eb->data + dest_off, orig_eb->data + orig_off, copy_len);
+ return 0;
+}
+
+static void split_eb_for_raid56(struct btrfs_fs_info *info,
+ struct extent_buffer *orig_eb,
+ struct extent_buffer **ebs,
+ u64 stripe_len, u64 *raid_map,
+ int num_stripes)
+{
+ struct extent_buffer *eb;
+ u64 start = orig_eb->start;
+ u64 this_eb_start;
+ int i;
+ int ret;
+
+ for (i = 0; i < num_stripes; i++) {
+ if (raid_map[i] >= BTRFS_RAID5_P_STRIPE)
+ break;
+
+ eb = calloc(1, sizeof(struct extent_buffer) + stripe_len);
+ if (!eb)
+ BUG();
+
+ eb->start = raid_map[i];
+ eb->len = stripe_len;
+ eb->refs = 1;
+ eb->flags = 0;
+ eb->fd = -1;
+ eb->dev_bytenr = (u64)-1;
+
+ this_eb_start = raid_map[i];
+
+ if (start > this_eb_start ||
+ start + orig_eb->len < this_eb_start + stripe_len) {
+ ret = rmw_eb(info, eb, orig_eb);
+ BUG_ON(ret);
+ } else {
+ memcpy(eb->data, orig_eb->data + eb->start - start, stripe_len);
+ }
+ ebs[i] = eb;
+ }
+}
+
+int write_raid56_with_parity(struct btrfs_fs_info *info,
+ struct extent_buffer *eb,
+ struct btrfs_multi_bio *multi,
+ u64 stripe_len, u64 *raid_map)
+{
+ struct extent_buffer **ebs, *p_eb = NULL, *q_eb = NULL;
+ int i;
+ int j;
+ int ret;
+ int alloc_size = eb->len;
+
+ ebs = kmalloc(sizeof(*ebs) * multi->num_stripes, GFP_NOFS);
+ BUG_ON(!ebs);
+
+ if (stripe_len > alloc_size)
+ alloc_size = stripe_len;
+
+ split_eb_for_raid56(info, eb, ebs, stripe_len, raid_map,
+ multi->num_stripes);
+
+ for (i = 0; i < multi->num_stripes; i++) {
+ struct extent_buffer *new_eb;
+ if (raid_map[i] < BTRFS_RAID5_P_STRIPE) {
+ ebs[i]->dev_bytenr = multi->stripes[i].physical;
+ ebs[i]->fd = multi->stripes[i].dev->fd;
+ multi->stripes[i].dev->total_ios++;
+ BUG_ON(ebs[i]->start != raid_map[i]);
+ continue;
+ }
+ new_eb = kmalloc(sizeof(*eb) + alloc_size, GFP_NOFS);
+ BUG_ON(!new_eb);
+ new_eb->dev_bytenr = multi->stripes[i].physical;
+ new_eb->fd = multi->stripes[i].dev->fd;
+ multi->stripes[i].dev->total_ios++;
+ new_eb->len = stripe_len;
+
+ if (raid_map[i] == BTRFS_RAID5_P_STRIPE)
+ p_eb = new_eb;
+ else if (raid_map[i] == BTRFS_RAID6_Q_STRIPE)
+ q_eb = new_eb;
+ }
+ if (q_eb) {
+ void **pointers;
+
+ pointers = kmalloc(sizeof(*pointers) * multi->num_stripes,
+ GFP_NOFS);
+ BUG_ON(!pointers);
+
+ ebs[multi->num_stripes - 2] = p_eb;
+ ebs[multi->num_stripes - 1] = q_eb;
+
+ for (i = 0; i < multi->num_stripes; i++)
+ pointers[i] = ebs[i]->data;
+
+ raid6_gen_syndrome(multi->num_stripes, stripe_len, pointers);
+ kfree(pointers);
+ } else {
+ ebs[multi->num_stripes - 1] = p_eb;
+ memcpy(p_eb->data, ebs[0]->data, stripe_len);
+ for (j = 1; j < multi->num_stripes - 1; j++) {
+ for (i = 0; i < stripe_len; i += sizeof(unsigned long)) {
+ *(unsigned long *)(p_eb->data + i) ^=
+ *(unsigned long *)(ebs[j]->data + i);
+ }
+ }
+ }
+
+ for (i = 0; i < multi->num_stripes; i++) {
+ ret = write_extent_to_disk(ebs[i]);
+ BUG_ON(ret);
+ if (ebs[i] != eb)
+ kfree(ebs[i]);
+ }
+
+ kfree(ebs);
+
+ return 0;
+}
diff --git a/volumes.h b/volumes.h
new file mode 100644
index 00000000..c0007adc
--- /dev/null
+++ b/volumes.h
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_VOLUMES_H__
+#define __BTRFS_VOLUMES_H__
+
+#include "kerncompat.h"
+#include "ctree.h"
+
+#define BTRFS_STRIPE_LEN (64 * 1024)
+
+struct btrfs_device {
+ struct list_head dev_list;
+ struct btrfs_root *dev_root;
+ struct btrfs_fs_devices *fs_devices;
+
+ u64 total_ios;
+
+ int fd;
+
+ int writeable;
+
+ char *name;
+
+ /* these are read off the super block, only in the progs */
+ char *label;
+ u64 total_devs;
+ u64 super_bytes_used;
+
+ u64 generation;
+
+ /* the internal btrfs device id */
+ u64 devid;
+
+ /* size of the device */
+ u64 total_bytes;
+
+ /* bytes used */
+ u64 bytes_used;
+
+ /* optimal io alignment for this device */
+ u32 io_align;
+
+ /* optimal io width for this device */
+ u32 io_width;
+
+ /* minimal io size for this device */
+ u32 sector_size;
+
+ /* type and info about this device */
+ u64 type;
+
+ /* physical drive uuid (or lvm uuid) */
+ u8 uuid[BTRFS_UUID_SIZE];
+};
+
+struct btrfs_fs_devices {
+ u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+
+ /* the device with this id has the most recent copy of the super */
+ u64 latest_devid;
+ u64 latest_trans;
+ u64 lowest_devid;
+ int latest_bdev;
+ int lowest_bdev;
+ struct list_head devices;
+ struct list_head list;
+
+ int seeding;
+ struct btrfs_fs_devices *seed;
+};
+
+struct btrfs_bio_stripe {
+ struct btrfs_device *dev;
+ u64 physical;
+};
+
+struct btrfs_multi_bio {
+ int error;
+ int num_stripes;
+ struct btrfs_bio_stripe stripes[];
+};
+
+struct map_lookup {
+ struct cache_extent ce;
+ u64 type;
+ int io_align;
+ int io_width;
+ int stripe_len;
+ int sector_size;
+ int num_stripes;
+ int sub_stripes;
+ struct btrfs_bio_stripe stripes[];
+};
+
+#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \
+ (sizeof(struct btrfs_bio_stripe) * (n)))
+#define btrfs_map_lookup_size(n) (sizeof(struct map_lookup) + \
+ (sizeof(struct btrfs_bio_stripe) * (n)))
+
+/*
+ * Restriper's general type filter
+ */
+#define BTRFS_BALANCE_DATA (1ULL << 0)
+#define BTRFS_BALANCE_SYSTEM (1ULL << 1)
+#define BTRFS_BALANCE_METADATA (1ULL << 2)
+
+#define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \
+ BTRFS_BALANCE_SYSTEM | \
+ BTRFS_BALANCE_METADATA)
+
+#define BTRFS_BALANCE_FORCE (1ULL << 3)
+#define BTRFS_BALANCE_RESUME (1ULL << 4)
+
+/*
+ * Balance filters
+ */
+#define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0)
+#define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1)
+#define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2)
+#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3)
+#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
+#define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5)
+#define BTRFS_BALANCE_ARGS_LIMIT_RANGE (1ULL << 6)
+#define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7)
+#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 10)
+
+/*
+ * Profile changing flags. When SOFT is set we won't relocate chunk if
+ * it already has the target profile (even though it may be
+ * half-filled).
+ */
+#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8)
+#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9)
+
+#define BTRFS_RAID5_P_STRIPE ((u64)-2)
+#define BTRFS_RAID6_Q_STRIPE ((u64)-1)
+
+/*
+ * Check if the given range cross stripes.
+ * To ensure kernel scrub won't causing bug on with METADATA in mixed
+ * block group
+ */
+static inline int check_crossing_stripes(u64 start, u64 len)
+{
+ return (start / BTRFS_STRIPE_LEN) !=
+ ((start + len - 1) / BTRFS_STRIPE_LEN);
+}
+
+int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+ u64 logical, u64 *length, u64 *type,
+ struct btrfs_multi_bio **multi_ret, int mirror_num,
+ u64 **raid_map);
+int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+ u64 logical, u64 *length,
+ struct btrfs_multi_bio **multi_ret, int mirror_num,
+ u64 **raid_map_ret);
+int btrfs_next_bg(struct btrfs_mapping_tree *map_tree, u64 *logical,
+ u64 *size, u64 type);
+static inline int btrfs_next_bg_metadata(struct btrfs_mapping_tree *map_tree,
+ u64 *logical, u64 *size)
+{
+ return btrfs_next_bg(map_tree, logical, size,
+ BTRFS_BLOCK_GROUP_METADATA);
+}
+static inline int btrfs_next_bg_system(struct btrfs_mapping_tree *map_tree,
+ u64 *logical, u64 *size)
+{
+ return btrfs_next_bg(map_tree, logical, size,
+ BTRFS_BLOCK_GROUP_SYSTEM);
+}
+int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
+ u64 chunk_start, u64 physical, u64 devid,
+ u64 **logical, int *naddrs, int *stripe_len);
+int btrfs_read_sys_array(struct btrfs_root *root);
+int btrfs_read_chunk_tree(struct btrfs_root *root);
+int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *extent_root, u64 *start,
+ u64 *num_bytes, u64 type);
+int btrfs_alloc_data_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *extent_root, u64 *start,
+ u64 num_bytes, u64 type);
+int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf);
+int btrfs_add_device(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_device *device);
+int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
+ int flags);
+int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
+void btrfs_close_all_devices(void);
+int btrfs_add_device(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_device *device);
+int btrfs_update_device(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device);
+int btrfs_scan_one_device(int fd, const char *path,
+ struct btrfs_fs_devices **fs_devices_ret,
+ u64 *total_devs, u64 super_offset, int super_recover);
+int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len);
+struct list_head *btrfs_scanned_uuids(void);
+int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_key *key,
+ struct btrfs_chunk *chunk, int item_size);
+int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
+struct btrfs_device *
+btrfs_find_device_by_devid(struct btrfs_fs_devices *fs_devices,
+ u64 devid, int instance);
+struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
+ u8 *uuid, u8 *fsid);
+int write_raid56_with_parity(struct btrfs_fs_info *info,
+ struct extent_buffer *eb,
+ struct btrfs_multi_bio *multi,
+ u64 stripe_len, u64 *raid_map);
+#endif