diff options
author | Nicholas D Steeves <nsteeves@gmail.com> | 2016-04-23 00:41:30 +0100 |
---|---|---|
committer | Nicholas D Steeves <nsteeves@gmail.com> | 2016-04-23 00:41:30 +0100 |
commit | cec572daccafa1e912cbed363df6f84687778c6f (patch) | |
tree | 7d99ab9f73d25c1ed8eaf6393f6374edf5316b03 |
btrfs-progs (4.4.1-1.1) unstable; urgency=medium
* Non-maintainer upload.
* New upstream release.
* Rename package to btrfs-progs (Closes: #780081)
* Update standards version to 3.9.7 (no changes needed).
* debian/control: Add "Breaks" per Gianfranco Costamagna's suggestion
* Change lintian override to reflect package rename
* Switch from using postinst and postrm to using triggers
per Christian Seiler's recommendation.
# imported from the archive
256 files changed, 99051 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..a27cb0d9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,75 @@ +*.o +*.static.o +*.o.d +tags +.cc-defines.h +version.h +version +man/*.gz +Documentation/*.gz +Documentation/*.html +btrfs +btrfs.static +btrfs-debug-tree +btrfs-map-logical +btrfs-fragments +btrfsck +calc-size +ioctl-test +dir-test +send-test +quick-test +find-root +mkfs.btrfs +mkfs.btrfs.static +repair +restore +btrfs-convert +btrfs-find-root +btrfs-find-root.static +btrfs-image +btrfs-show-super +btrfs-zero-log +btrfs-corrupt-block +btrfs-select-super +btrfs-calc-size +btrfstune +libbtrfs.a +libbtrfs.so +libbtrfs.so.0 +libbtrfs.so.0.1 +library-test +library-test-static + +/tests/*-tests-results.txt +/tests/test.img + +aclocal.m4 +autom4te.cache +compile +config.cache +config.guess +config.h +config.h.in +config.h.in~ +config.log +config.rpath +config.status +config.sub +config/ltmain.sh +config/py-compile +config/test-driver +configure +cscope.out +depcomp +libtool +m4/*.m4 +Makefile +Documentation/Makefile +missing +mkinstalldirs +stamp-h +stamp-h.in +stamp-h1 +config/* + diff --git a/Android.mk b/Android.mk new file mode 100644 index 00000000..fe3209b6 --- /dev/null +++ b/Android.mk @@ -0,0 +1,111 @@ +LOCAL_PATH:= $(call my-dir) + +#include $(call all-subdir-makefiles) + +CFLAGS := -g -O1 -Wall -D_FORTIFY_SOURCE=2 -include config.h \ + -DBTRFS_FLAT_INCLUDES -D_XOPEN_SOURCE=700 -fno-strict-aliasing -fPIC + +LDFLAGS := -static -rdynamic + +LIBS := -luuid -lblkid -lz -llzo2 -L. -lpthread +LIBBTRFS_LIBS := $(LIBS) + +STATIC_CFLAGS := $(CFLAGS) -ffunction-sections -fdata-sections +STATIC_LDFLAGS := -static -Wl,--gc-sections +STATIC_LIBS := -luuid -lblkid -luuid -lz -llzo2 -L. -pthread + +btrfs_shared_libraries := libext2_uuid \ + libext2_blkid + +objects := ctree.c disk-io.c radix-tree.c extent-tree.c print-tree.c \ + root-tree.c dir-item.c file-item.c inode-item.c inode-map.c \ + extent-cache.c extent_io.c volumes.c utils.c repair.c \ + qgroup.c raid6.c free-space-cache.c list_sort.c props.c \ + ulist.c qgroup-verify.c backref.c string-table.c task-utils.c \ + inode.c file.c find-root.c +cmds_objects := cmds-subvolume.c cmds-filesystem.c cmds-device.c cmds-scrub.c \ + cmds-inspect.c cmds-balance.c cmds-send.c cmds-receive.c \ + cmds-quota.c cmds-qgroup.c cmds-replace.c cmds-check.c \ + cmds-restore.c cmds-rescue.c chunk-recover.c super-recover.c \ + cmds-property.c cmds-fi-usage.c +libbtrfs_objects := send-stream.c send-utils.c rbtree.c btrfs-list.c crc32c.c \ + uuid-tree.c utils-lib.c rbtree-utils.c +libbtrfs_headers := send-stream.h send-utils.h send.h rbtree.h btrfs-list.h \ + crc32c.h list.h kerncompat.h radix-tree.h extent-cache.h \ + extent_io.h ioctl.h ctree.h btrfsck.h version.h +TESTS := fsck-tests.sh convert-tests.sh +blkid_objects := partition/ superblocks/ topology/ + + +# external/e2fsprogs/lib is needed for uuid/uuid.h +common_C_INCLUDES := $(LOCAL_PATH) external/e2fsprogs/lib/ external/lzo/include/ external/zlib/ + +#---------------------------------------------------------- +include $(CLEAR_VARS) +LOCAL_SRC_FILES := $(libbtrfs_objects) +LOCAL_CFLAGS := $(STATIC_CFLAGS) +LOCAL_MODULE := libbtrfs +LOCAL_C_INCLUDES := $(common_C_INCLUDES) +include $(BUILD_STATIC_LIBRARY) + +#---------------------------------------------------------- +include $(CLEAR_VARS) +LOCAL_MODULE := btrfs +#LOCAL_FORCE_STATIC_EXECUTABLE := true +LOCAL_SRC_FILES := \ + $(objects) \ + $(cmds_objects) \ + btrfs.c \ + help.c \ + +LOCAL_C_INCLUDES := $(common_C_INCLUDES) +LOCAL_CFLAGS := $(STATIC_CFLAGS) +#LOCAL_LDLIBS := $(LIBBTRFS_LIBS) +#LOCAL_LDFLAGS := $(STATIC_LDFLAGS) +LOCAL_SHARED_LIBRARIES := $(btrfs_shared_libraries) +LOCAL_STATIC_LIBRARIES := libbtrfs liblzo-static libz +LOCAL_SYSTEM_SHARED_LIBRARIES := libc libcutils + +LOCAL_EXPORT_C_INCLUDES := $(common_C_INCLUDES) +#LOCAL_MODULE_TAGS := optional +include $(BUILD_EXECUTABLE) + +#---------------------------------------------------------- +include $(CLEAR_VARS) +LOCAL_MODULE := mkfs.btrfs +LOCAL_SRC_FILES := \ + $(objects) \ + mkfs.c + +LOCAL_C_INCLUDES := $(common_C_INCLUDES) +LOCAL_CFLAGS := $(STATIC_CFLAGS) +#LOCAL_LDLIBS := $(LIBBTRFS_LIBS) +#LOCAL_LDFLAGS := $(STATIC_LDFLAGS) +LOCAL_SHARED_LIBRARIES := $(btrfs_shared_libraries) +LOCAL_STATIC_LIBRARIES := libbtrfs liblzo-static +LOCAL_SYSTEM_SHARED_LIBRARIES := libc libcutils + +LOCAL_EXPORT_C_INCLUDES := $(common_C_INCLUDES) +#LOCAL_MODULE_TAGS := optional +include $(BUILD_EXECUTABLE) + +#--------------------------------------------------------------- +include $(CLEAR_VARS) +LOCAL_MODULE := btrfstune +LOCAL_SRC_FILES := \ + $(objects) \ + btrfstune.c + +LOCAL_C_INCLUDES := $(common_C_INCLUDES) +LOCAL_CFLAGS := $(STATIC_CFLAGS) +LOCAL_SHARED_LIBRARIES := $(btrfs_shared_libraries) +#LOCAL_LDLIBS := $(LIBBTRFS_LIBS) +#LOCAL_LDFLAGS := $(STATIC_LDFLAGS) +LOCAL_SHARED_LIBRARIES := $(btrfs_shared_libraries) +LOCAL_STATIC_LIBRARIES := libbtrfs liblzo-static +LOCAL_SYSTEM_SHARED_LIBRARIES := libc libcutils + +LOCAL_EXPORT_C_INCLUDES := $(common_C_INCLUDES) +LOCAL_MODULE_TAGS := optional +include $(BUILD_EXECUTABLE) +#-------------------------------------------------------------- diff --git a/COPYING b/COPYING new file mode 100644 index 00000000..10828e06 --- /dev/null +++ b/COPYING @@ -0,0 +1,341 @@ + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/Documentation/Makefile.in b/Documentation/Makefile.in new file mode 100644 index 00000000..f046abd5 --- /dev/null +++ b/Documentation/Makefile.in @@ -0,0 +1,133 @@ +# Guard against environment variables +MAN8_TXT = + +# Top level commands +MAN8_TXT += btrfs.asciidoc +MAN8_TXT += btrfs-convert.asciidoc +MAN8_TXT += btrfs-debug-tree.asciidoc +MAN8_TXT += btrfs-find-root.asciidoc +MAN8_TXT += btrfs-image.asciidoc +MAN8_TXT += btrfs-map-logical.asciidoc +MAN8_TXT += btrfs-show-super.asciidoc +MAN8_TXT += btrfs-select-super.asciidoc +MAN8_TXT += btrfstune.asciidoc +MAN8_TXT += fsck.btrfs.asciidoc +MAN8_TXT += mkfs.btrfs.asciidoc + +# Sub commands for btrfs +MAN8_TXT += btrfs-subvolume.asciidoc +MAN8_TXT += btrfs-filesystem.asciidoc +MAN8_TXT += btrfs-balance.asciidoc +MAN8_TXT += btrfs-device.asciidoc +MAN8_TXT += btrfs-scrub.asciidoc +MAN8_TXT += btrfs-check.asciidoc +MAN8_TXT += btrfs-rescue.asciidoc +MAN8_TXT += btrfs-inspect-internal.asciidoc +MAN8_TXT += btrfs-send.asciidoc +MAN8_TXT += btrfs-receive.asciidoc +MAN8_TXT += btrfs-quota.asciidoc +MAN8_TXT += btrfs-qgroup.asciidoc +MAN8_TXT += btrfs-replace.asciidoc +MAN8_TXT += btrfs-restore.asciidoc +MAN8_TXT += btrfs-property.asciidoc + +# Category 5 manual page +MAN5_TXT += btrfs-man5.asciidoc + +MAN_TXT = $(MAN8_TXT) $(MAN5_TXT) +MAN_XML = $(patsubst %.asciidoc,%.xml,$(MAN_TXT)) +MAN_HTML = $(patsubst %.asciidoc,%.html,$(MAN_TXT)) + +DOC_MAN5 = $(patsubst %.asciidoc,%.5,$(MAN5_TXT)) +GZ_MAN5 = $(patsubst %.asciidoc,%.5.gz,$(MAN5_TXT)) + +DOC_MAN8 = $(patsubst %.asciidoc,%.8,$(MAN8_TXT)) +GZ_MAN8 = $(patsubst %.asciidoc,%.8.gz,$(MAN8_TXT)) + +mandir ?= $(prefix)/share/man +man8dir = $(mandir)/man8 +man5dir = $(mandir)/man5 + +ASCIIDOC = @ASCIIDOC@ +ASCIIDOC_EXTRA = +MANPAGE_XSL = manpage-normal.xsl +XMLTO = @XMLTO@ +XMLTO_EXTRA = +XMLTO_EXTRA = -m manpage-bold-literal.xsl +GZIPCMD = @GZIP@ +INSTALL = @INSTALL@ +RM = @RM@ +RMDIR = @RMDIR@ +LN_S = @LN_S@ +MV = @MV@ +SED = @SED@ +BTRFS_VERSION = $(shell $(SED) -n 's/.*PACKAGE_VERSION "\(.*\)"/\1/p'\ + ../config.h) + +ifneq ($(findstring $(MAKEFLAGS),s),s) +ifndef V + QUIET_RM = @ + QUIET_ASCIIDOC = @echo " [ASCII] $@"; + QUIET_XMLTO = @echo " [XMLTO] $@"; + QUIET_GZIP = @echo " [GZ] $@"; + QUIET_STDERR = 2> /dev/null + QUIET_SUBDIR0 = +@subdir= + QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ + $(MAKE) $(PRINT_DIR) -C $$subdir + export V +endif +endif + +all: man +man: man5 man8 +man5: $(GZ_MAN5) +man8: $(GZ_MAN8) +html: $(MAN_HTML) + +install: install-man + +install-man: man + $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir) + $(INSTALL) -d -m 755 $(DESTDIR)$(man8dir) + $(INSTALL) -m 644 $(GZ_MAN5) $(DESTDIR)$(man5dir) + # the source file name of btrfs.5 clashes with section 8 page, but we + # want to keep the code generic + $(MV) $(DESTDIR)$(man5dir)/btrfs-man5.5.gz $(DESTDIR)$(man5dir)/btrfs.5.gz + $(INSTALL) -m 644 $(GZ_MAN8) $(DESTDIR)$(man8dir) + $(LN_S) -f btrfs-check.8.gz $(DESTDIR)$(man8dir)/btrfsck.8.gz + $(LN_S) -f btrfs-rescue.8.gz $(DESTDIR)$(man8dir)/btrfs-zero-log.8.gz + +uninstall: + cd $(DESTDIR)$(man8dir); rm -f btrfs-check.8.gz $(GZ_MAN8) + $(RMDIR) -p --ignore-fail-on-non-empty $(DESTDIR)$(man8dir) + +clean: + $(QUIET_RM)$(RM) -f *.xml *.xml+ *.5 *.5.gz *.8 *.8.gz *.html + +%.5.gz : %.5 + $(QUIET_GZIP)$(GZIPCMD) -n -c $< > $@ + +%.8.gz : %.8 + $(QUIET_GZIP)$(GZIPCMD) -n -c $< > $@ + +%.5 : %.xml + $(QUIET_XMLTO)$(RM) -f $@ && \ + $(XMLTO) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< + +%.8 : %.xml + $(QUIET_XMLTO)$(RM) -f $@ && \ + $(XMLTO) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< + +%.xml : %.asciidoc asciidoc.conf + $(QUIET_ASCIIDOC)$(RM) -f $@+ $@ && \ + $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \ + $(ASCIIDOC_EXTRA) -abtrfs_version=$(BTRFS_VERSION) \ + -o $@+ $< && \ + $(MV) $@+ $@ + +%.html : %.asciidoc asciidoc.conf + $(QUIET_ASCIIDOC)$(RM) -f $@+ $@ && \ + $(ASCIIDOC) -b html -d article -f asciidoc.conf \ + $(ASCIIDOC_EXTRA) -abtrfs_version=$(BTRFS_VERSION) \ + -o $@+ $< && \ + $(MV) $@+ $@ diff --git a/Documentation/asciidoc.conf b/Documentation/asciidoc.conf new file mode 100644 index 00000000..1ea74591 --- /dev/null +++ b/Documentation/asciidoc.conf @@ -0,0 +1,48 @@ +## linkbtrfs: macro +# +# Usage: linkbtrfs:command[manpage-section] +# +# Note, {0} is the manpage section, while {target} is the command. +# +# Show Btrfslink as: <command>(<section>); if section is defined, else just show +# the command. + +[macros] +(?su)[\\]?(?P<name>linkbtrfs):(?P<target>\S*?)\[(?P<attrlist>.*?)\]= + +[tags] +bracket-emphasis={1?[{1}]}<emphasis><|></emphasis> + +[quotes] +<|>=#bracket-emphasis + +[attributes] +asterisk=* +plus=+ +caret=^ +startsb=[ +endsb=] +backslash=\ +tilde=~ +apostrophe=' +backtick=` +litdd=-- + +ifdef::doctype-manpage[] +ifdef::backend-docbook[] +[header] +template::[header-declarations] +<refentry> +<refmeta> +<refentrytitle>{mantitle}</refentrytitle> +<manvolnum>{manvolnum}</manvolnum> +<refmiscinfo class="source">Btrfs</refmiscinfo> +<refmiscinfo class="version">{btrfs_version}</refmiscinfo> +<refmiscinfo class="manual">Btrfs Manual</refmiscinfo> +</refmeta> +<refnamediv> + <refname>{manname}</refname> + <refpurpose>{manpurpose}</refpurpose> +</refnamediv> +endif::backend-docbook[] +endif::doctype-manpage[] diff --git a/Documentation/btrfs-balance.asciidoc b/Documentation/btrfs-balance.asciidoc new file mode 100644 index 00000000..c8407419 --- /dev/null +++ b/Documentation/btrfs-balance.asciidoc @@ -0,0 +1,217 @@ +btrfs-balance(8) +================ + +NAME +---- +btrfs-balance - balance block groups on a btrfs filesystem + +SYNOPSIS +-------- +*btrfs balance* <subcommand> <args> + +DESCRIPTION +----------- +The primary purpose of the balance feature is to spread block groups accross +all devices so they match constraints defined by the respective profiles. See +`mkfs.btrfs`(8) section 'PROFILES' for more details. +The scope of the balancing process can be further tuned by use of filters that +can select the block groups to process. Balance works only on a mounted +filesystem. + +The balance operation is cancellable by the user. The on-disk state of the +filesystem is always consistent so an unexpected interruption (eg. system crash, +reboot) does not corrupt the filesystem. The progress of the balance operation +is temporarily stored and will be resumed upon mount, unless the mount option +'skip_balance' is specified. + +WARNING: running balance without filters will take a lot of time as it basically +rewrites the entire filesystem and needs to update all block pointers. + +The filters can be used to perform following actions: + +- convert block group profiles (filter 'convert') +- make block group usage more compact (filter 'usage') +- perform actions only on a given device (filters 'devid', 'drange') + +The filters can be applied to a combination of block group types (data, +metadata, system). Note that changing 'system' needs the force option. + +NOTE: the balance operation needs enough work space, ie. space that is +completely unused in the filesystem, otherwise this may lead to ENOSPC reports. +See the section 'ENOSPC' for more details. + +COMPATIBILITY +------------- + +NOTE: The balance subcommand also exists under the *btrfs filesystem* +namespace. This still works for backward compatibility but is deprecated and +should not be used anymore. + +NOTE: A short syntax *btrfs balance <path>* works due to backward compatibility +but is deprecated and should not be used anymore. Use *btrfs balance start* +command instead. + +SUBCOMMAND +---------- +*cancel* <path>:: +cancel running or paused balance + +*pause* <path>:: +pause running balance operation, this will store the state of the balance +progress and used filters to the filesystem + +*resume* <path>:: +resume interrupted balance + +*start* [options] <path>:: +start the balance operation according to the specified filters, no filters +will rewrite the entire filesystem. The process runs in the foreground. ++ +`Options` ++ +-d[<filters>]:::: +act on data block groups, see `FILTERS` section for details about 'filters' +-m[<filters>]:::: +act on metadata chunks, see `FILTERS` section for details about 'filters' +-s[<filters>]:::: +act on system chunks (requires '-f'), see `FILTERS` section for details about 'filters'. +-v:::: +be verbose and print balance filter arguments +-f:::: +force reducing of metadata integrity, eg. when going from 'raid1' to 'single' + +*status* [-v] <path>:: +Show status of running or paused balance. ++ +If '-v' option is given, output will be verbose. + +FILTERS +------- +From kernel 3.3 onwards, btrfs balance can limit its action to a subset of the +full filesystem, and can be used to change the replication configuration (e.g. +moving data from single to RAID1). This functionality is accessed through the +'-d', '-m' or '-s' options to btrfs balance start, which filter on data, +metadata and system blocks respectively. + +A filter has the following stucture: 'type'[='params'][,'type'=...] + +The available types are: + +*profiles=<profiles>*:: +Balances only block groups with the given profiles. Parameters +are a list of profile names separated by "'|'" (pipe). + +*usage=<percent>*:: +*usage=<range>*:: +Balances only block groups with usage under the given percentage. The +value of 0 is allowed and will clean up completely unused block groups, this +should not require any new work space allocated. You may want to use 'usage=0' +in case balance is returnin ENOSPC and your filesystem is not too full. ++ +The argument may be a single value or a range. The single value 'N' means 'at +most N percent used', equivalent to '..N' range syntax. Kernels prior to 4.4 +accept only the single value format. +The minimum range boundary is inclusive, maximum is exclusive. + +*devid=<id>*:: +Balances only block groups which have at least one chunk on the given +device. To list devices with ids use *btrfs fi show*. + +*drange=<range>*:: +Balance only block groups which overlap with the given byte range on any +device. Use in conjunction with 'devid' to filter on a specific device. The +parameter is a range specified as 'start..end'. + +*vrange=<range>*:: +Balance only block groups which overlap with the given byte range in the +filesystem's internal virtual address space. This is the address space that +most reports from btrfs in the kernel log use. The parameter is a range +specified as 'start..end'. + +*convert=<profile>*:: +Convert each selected block group to the given profile name identified by +parameters. ++ +NOTE: starting with kernel 4.5, the 'data' chunks can be converted to/from the +'DUP' profile on a single device. + +*limit=<number>*:: +*limit=<range>*:: +Process only given number of chunks, after all filters are applied. This can be +used to specifically target a chunk in connection with other filters ('drange', +'vrange') or just simply limit the amount of work done by a single balance run. ++ +The argument may be a single value or a range. The single value 'N' means 'at +most N chunks', equivalent to '..N' range syntax. Kernels prior to 4.4 accept +only the single value format. The range minimum and maximum are inclusive. + +*stripes=<range>*:: +Balance only block groups which have the given number of stripes. The parameter +is a range specified as 'start..end'. Makes sense fo block group profiles that +utilize striping, ie. RAID0/10/5/6. The range minimum and maximum are +inclusive. + +*soft*:: +Takes no parameters. Only has meaning when converting between profiles. +When doing convert from one profile to another and soft mode is on, +chunks that already have the target profile are left untouched. +This is useful e.g. when half of the filesystem was converted earlier but got +cancelled. ++ +The soft mode switch is (like every other filter) per-type. +For example, this means that we can convert metadata chunks the "hard" way +while converting data chunks selectively with soft switch. + +Profile names, used in 'profiles' and 'convert' are one of: 'raid0', 'raid1', +'raid10', 'raid5', 'raid6', 'dup', 'single'. The mixed data/metadata profiles +can be converted in the same way, but it's conversion between mixed and non-mixed +is not implemented. For the constraints of the profiles please refer to `mkfs.btrfs`(8), +section 'PROFILES'. + +ENOSPC +------ + +The way balance operates, it usually needs to temporarily create a new block +group and move the old data there. For that it needs work space, otherwise +it fails for ENOSPC reasons. +This is not the same ENOSPC as if the free space is exhausted. This refers to +the space on the level of block groups. + +The free work space can be calculated from the output of the *btrfs filesystem show* +command: + +------------------------------ + Label: 'BTRFS' uuid: 8a9d72cd-ead3-469d-b371-9c7203276265 + Total devices 2 FS bytes used 77.03GiB + devid 1 size 53.90GiB used 51.90GiB path /dev/sdc2 + devid 2 size 53.90GiB used 51.90GiB path /dev/sde1 +------------------------------ + +'size' - 'used' = 'free work space' + +'53.90GiB' - '51.90GiB' = '2.00GiB' + +An example of a filter that does not require workspace is 'usage=0'. This will +scan through all unused block groups of a given type and will reclaim the +space. Ater that it might be possible to run other filters. + +**CONVERSIONS ON MULTIPLE DEVICES** + +Conversion to profiles based on striping (RAID0, RAID5/6) require the work +space on each device. An interrupted balance may leave partially filled block +groups that might consume the work space. + +EXIT STATUS +----------- +*btrfs balance* returns a zero exit status if it succeeds. Non zero is +returned in case of failure. + +AVAILABILITY +------------ +*btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`mkfs.btrfs`(8), +`btrfs-device`(8) diff --git a/Documentation/btrfs-check.asciidoc b/Documentation/btrfs-check.asciidoc new file mode 100644 index 00000000..327a45d6 --- /dev/null +++ b/Documentation/btrfs-check.asciidoc @@ -0,0 +1,60 @@ +btrfs-check(8) +============== + +NAME +---- +btrfs-check - check or repair an unmounted btrfs filesystem + +SYNOPSIS +-------- +*btrfs check* [options] <device> + +DESCRIPTION +----------- +*btrfs check* is used to check or repair an unmounted btrfs filesystem. + +NOTE: Since btrfs is under development, the *btrfs check* capabilities are +continuously enhanced. It's highly recommended to read the following btrfs +wiki before executing *btrfs check* with '--repair' option: + +https://btrfs.wiki.kernel.org/index.php/Btrfsck + +*btrfsck* is an alias of *btrfs check* command and is now deprecated. + +OPTIONS +------- +-s|--super <superblock>:: +use <superblock>th superblock copy, valid values are 0 up to 2 if the +respective superblock offset is within the filesystem +--repair:: +try to repair the filesystem +--init-csum-tree:: +create a new CRC tree and recalculate all checksums +--init-extent-tree:: +create a new extent tree +--check-data-csum:: +verify checksums of data blocks +-p|--progress:: +indicate progress at various checking phases +--qgroup-report:: +verify qgroup accounting and compare against filesystem accounting +--subvol-extents <subvolid>:: +show extent state for a subvolume +--tree-root <bytenr>:: +use the given bytenr for the tree root + +EXIT STATUS +----------- +*btrfs check* returns a zero exit status if it succeeds. Non zero is +returned in case of failure. + +AVAILABILITY +------------ +*btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`mkfs.btrfs`(8), +`btrfs-scrub`(8), +`btrfs-rescue`(8) diff --git a/Documentation/btrfs-convert.asciidoc b/Documentation/btrfs-convert.asciidoc new file mode 100644 index 00000000..ca3417f4 --- /dev/null +++ b/Documentation/btrfs-convert.asciidoc @@ -0,0 +1,98 @@ +btrfs-convert(8) +================ + +NAME +---- +btrfs-convert - convert from ext2/3/4 filesystem to btrfs + +SYNOPSIS +-------- +*btrfs-convert* [options] <device> + +DESCRIPTION +----------- +*btrfs-convert* is used to convert existing ext2/3/4 filesystem image to a +btrfs filesystem in-place. The original filesystem image is accessible +subvolume named 'ext2_saved' as file 'image'. + +WARNING: If you are going to perform rollback to ext2/3/4, you should not +execute *btrfs balance* command on the converted filesystem. This will change +the extent layout and make *btrfs-convert* unable to rollback. + +The conversion utilizes free space of the original filesystem. The exact +estimate of the required space cannot be foretold. The final btrfs metadata +might occupy several gigabytes on a hundreds-gigabyte filesystem. + +If you decide not to rollback anymore, it is recommended to perform a few more +steps to transform the btrfs filesystem to a more compact layout. The +conversion inherits the original data block fragmentation and the metadata +blocks are bound to the original free space layout. + +**REMOVE THE ORIGINAL FILESYSTEM METADATA** + +By removing the 'ext2_saved' subvolume, all metadata of the original filesystem +will be removed: + + # btrfs subvolume delete /mnt/ext2_saved + +At this point it's not possible to do rollback. The filesystem is usable but may +be impacted by the fragmentation. + +**MAKE FILE DATA MORE CONTIGUOUS** + +An optional but recommended step is to run defragmentation on the entire +filesystem. This will attempt to make file extents more contiguous. + + # btrfs filesystem defrag -v -r -f -t 32M /mnt/btrfs + +Verbose recursive defragmentation ('-v', '-r'), flush data per-file ('-f') with target +extent size 32M ('-t'). + +**ATTEMPT TO MAKE BTRFS METADATA MORE COMPACT** + +Optional but recommended step. + +The metadata block groups after conversion may be smaller than the default size +(256MiB or 1GiB). Running a balance will attempt to merge the block groups. +This depends on the free space layout (and fragmentation) and may fail. This is +a soft error leaving the filesystem usable but the block group layout may +remain unchanged. + +Note that balance operation takes a lot of time. + + # btrfs balance start -m /mnt/btrfs + +OPTIONS +------- +-d|--no-datasum:: +disable data checksum calculations and set NODATASUM file flag, this can speed +up the conversion +-i|--no-xattr:: +ignore xattrs and ACLs of files +-n|--no-inline:: +disable inlining of small files to metadata blocks, this will decrease the metadata +consumption and may help to convert a filesystem with low free space +-N|--nodesize <SIZE>:: +set filesystem nodesize, the tree block size in which btrfs stores its metadata. +The default value is 16KB (16384) or the page size, whichever is bigger. +Must be a multiple of the sectorsize, but not larger than 65536. Se +`mkfs.btrfs`(8) for more details. +-r|--rollback:: +rollback to the original ext2/3/4 filesystem if possible +-l|--label <LABEL>:: +set filesystem label during conversion +-L|--copy-label:: +use label from the converted filesystem +-p|--progress:: +show progress of conversion, on by default +--no-progress:: +disable detailed progress and show only the main phases of conversion + +EXIT STATUS +----------- +*btrfs-convert* will return 0 if no error happened. +If any problems happened, 1 will be returned. + +SEE ALSO +-------- +`mkfs.btrfs`(8) diff --git a/Documentation/btrfs-debug-tree.asciidoc b/Documentation/btrfs-debug-tree.asciidoc new file mode 100644 index 00000000..23fc1156 --- /dev/null +++ b/Documentation/btrfs-debug-tree.asciidoc @@ -0,0 +1,38 @@ +btrfs-debug-tree(8) +=================== + +NAME +---- +btrfs-debug-tree - dump btrfs filesystem metadata into stdout + +SYNOPSIS +-------- +*btrfs-debug-tree* [options] <device> + +DESCRIPTION +----------- +*btrfs-debug-tree* is used to dump the whole tree of the given device. + +This is maybe useful for analyzing filesystem state or inconsistence and has +a positive educational effect on understanding the internal structure. +<device> is the device file where the filesystem is stored. + +OPTIONS +------- +-e:: +Print detailed extents info. +-d:: +Print info of btrfs device and root tree dirs only. +-r:: +Print info of roots only. +-b <block_num>:: +Print info of the specified block only. + +EXIT STATUS +----------- +*btrfs-debug-tree* will return 0 if no error happened. +If any problems happened, 1 will be returned. + +SEE ALSO +-------- +`mkfs.btrfs`(8) diff --git a/Documentation/btrfs-device.asciidoc b/Documentation/btrfs-device.asciidoc new file mode 100644 index 00000000..2827598a --- /dev/null +++ b/Documentation/btrfs-device.asciidoc @@ -0,0 +1,145 @@ +btrfs-device(8) +=============== + +NAME +---- +btrfs-device - control btrfs devices + +SYNOPSIS +-------- +*btrfs device* <subcommand> <args> + +DESCRIPTION +----------- +*btrfs device* is used to control the btrfs devices, since btrfs can be used +across several devices, *btrfs device* is used for multiple device management. + +DEVICE MANAGEMENT +----------------- +Btrfs filesystem is capable to manage multiple devices. + +Btrfs filesystem uses different profiles to manage different RAID level, and +use balance to rebuild chunks, also devices can be added/removed/replace +online. + +Profile:: +Btrfs filesystem uses data/metadata profiles to manage allocation/duplication +mechanism. + +Profiles like RAID level can be assigned to data and metadata separately. ++ +See `mkfs.btrfs`(8) for more details. + +RAID level:: +Btrfs filesystem supports most of the standard RAID level: 0/1/5/6/10. + +RAID levels can be assigned at mkfs time or online. ++ +See `mkfs.btrfs`(8) for mkfs time RAID level assign and `btrfs-balance`(8) for +online RAID level assign. ++ +NOTE: Since btrfs is under heavy development especially the RAID5/6 support, +it is *highly* recommended to read the follow btrfs wiki page to get more +updated details on RAID5/6: + +https://btrfs.wiki.kernel.org/index.php/RAID56 + +Balance:: +`btrfs-balance`(8) subcommand can be used to balance or rebuild chunks to the +desired profile. ++ +Due to the fact that balance can rebuild/recovery chunks according to its RAID +duplication if possible, so when using RAID1/5/6/10 with some devices failed +and you just added a new device to btrfs using `btrfs-device`(8), you should +run `btrfs-balance`(8) to rebuild the chunks. ++ +See `btrfs-balance`(8) for more details. + +Device add/remove/replace:: +Device can be added/removed using `btrfs-device`(8) subcommand and replaced +using `btrfs-replace`(8). ++ +When device is removed or replaced, btrfs will do the chunk rebuild if needed. ++ +See `btrfs-replace`(8) man page for more details on device replace. + +SUBCOMMAND +---------- +*add* [-Kf] <dev> [<dev>...] <path>:: +Add device(s) to the filesystem identified by <path>. ++ +If applicable, a whole device discard (TRIM) operation is performed. ++ +`Options` ++ +-K|--nodiscard:::: +do not perform discard by default +-f|--force:::: +force overwrite of existing filesystem on the given disk(s) + +*remove* <dev> [<dev>...] <path>:: +Remove device(s) from a filesystem identified by <path>. + +*delete* <dev> [<dev>...] <path>:: +Alias of remove kept for backwards compatability + +*ready* <device>:: +Check device to see if it has all of it's devices in cache for mounting. + +*scan* [(--all-devices|-d)|<device> [<device>...]]:: +Scan devices for a btrfs filesystem. ++ +If one or more devices are passed, these are scanned for a btrfs filesystem. +If no devices are passed, btrfs uses block devices containing btrfs +filesystem as listed by blkid. +Finally, if '--all-devices' or '-d' is passed, all the devices under /dev are +scanned. + +*stats* [-z] <path>|<device>:: +Read and print the device IO stats for all mounted devices of the filesystem +identified by <path> or for a single <device>. ++ +`Options` ++ +-z:::: +Reset stats to zero after reading them. + +*usage* [options] <path> [<path>...]:: +Show detailed information about internal allocations in devices. ++ +`Options` ++ +-b|--raw:::: +raw numbers in bytes, without the 'B' suffix +-h|--human-readable:::: +print human friendly numbers, base 1024, this is the default +-H:::: +print human friendly numbers, base 1000 +--iec:::: +select the 1024 base for the following options, according to the IEC standard +--si:::: +select the 1000 base for the following options, according to the SI standard +-k|--kbytes:::: +show sizes in KiB, or kB with --si +-m|--mbytes:::: +show sizes in MiB, or MB with --si +-g|--gbytes:::: +show sizes in GiB, or GB with --si +-t|--tbytes:::: +show sizes in TiB, or TB with --si + +If conflicting options are passed, the last one takes precedence. + +EXIT STATUS +----------- +*btrfs device* returns a zero exit status if it succeeds. Non zero is +returned in case of failure. + +AVAILABILITY +------------ +*btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`mkfs.btrfs`(8), +`btrfs-replace`(8), +`btrfs-balance`(8) diff --git a/Documentation/btrfs-filesystem.asciidoc b/Documentation/btrfs-filesystem.asciidoc new file mode 100644 index 00000000..26126175 --- /dev/null +++ b/Documentation/btrfs-filesystem.asciidoc @@ -0,0 +1,338 @@ +btrfs-filesystem(8) +=================== + +NAME +---- +btrfs-filesystem - command group of btrfs that usually work on the whole filesystem + +SYNOPSIS +-------- +*btrfs filesystem* <subcommand> <args> + +DESCRIPTION +----------- +*btrfs filesystem* is used to do the whole filesystem level tasks, including +all the regular filesystem operations like resizing, space stats, label +setting/getting, and defragmentation. + +SUBCOMMAND +---------- +*df* [options] <path>:: +Show a terse summary information about allocation of block group types of a given +mount point. The original purpose of this command was a debugging helper. The +output needs to be further interpreted and is not suitable for quick overview. ++ +-- +An example with description: + +* device size: '1.9TiB', one device, no RAID +* filesystem size: '1.9TiB' +* created with: 'mkfs.btrfs -d single -m single' +-- ++ +------------------------------ +$ btrfs filesystem df /path +Data, single: total=1.15TiB, used=1.13TiB +System, single: total=32.00MiB, used=144.00KiB +Metadata, single: total=12.00GiB, used=6.45GiB +GlobalReserve, single: total=512.00MiB, used=0.00B +------------------------------ ++ +-- +* 'Data', 'System' and 'Metadata' are separeate block group types. +'GlobalReserve' is an artificial and internal emergency space, see below. +* 'single' -- the allocation profile, defined at mkfs time +* 'total' -- sum of space reserved for +all allocation profiles of the given type, ie. all Data/single. Note that it's +not total size of filesystem. +* 'used' -- sum of used space of the above, ie. file extents, metadata blocks +-- ++ +'GlobalReserve' is an artificial and internal emergency space. It is used eg. +when the filesystem is full. Its 'total' size is dynamic based on the +filesystem size, usually not larger than 512MiB, 'used' may fluctuate. ++ +The global block reserve is accounted within Metadata. In case the filesystem +metadata are exhausted, 'GlobalReserve/total + Metadata/used = Metadata/total'. ++ +`Options` ++ +-b|--raw:::: +raw numbers in bytes, without the 'B' suffix +-h|--human-readable:::: +print human friendly numbers, base 1024, this is the default +-H:::: +print human friendly numbers, base 1000 +--iec:::: +select the 1024 base for the following options, according to the IEC standard +--si:::: +select the 1000 base for the following options, according to the SI standard +-k|--kbytes:::: +show sizes in KiB, or kB with --si +-m|--mbytes:::: +show sizes in MiB, or MB with --si +-g|--gbytes:::: +show sizes in GiB, or GB with --si +-t|--tbytes:::: +show sizes in TiB, or TB with --si ++ +If conflicting options are passed, the last one takes precedence. + +*defragment* [options] <file>|<dir> [<file>|<dir>...]:: +Defragment file data on a mounted filesytem. ++ +If '-r' is passed, files in dir will be defragmented recursively. +The start position and the number of bytes to defragment can be specified by +start and len using '-s' and '-l' options below. +Extents bigger than value given by '-t' will be skipped, otherwise this value +is used as a target extent size, but is only advisory and may not be reached +if the free space is too fragmented. +Use 0 to take the kernel default, which is 256kB but may change in the future. +You can also turn on compression in defragment operations. ++ +WARNING: Defragmenting with Linux kernel versions < 3.9 or ≥ 3.14-rc2 as well as +with Linux stable kernel versions ≥ 3.10.31, ≥ 3.12.12 or ≥ 3.13.4 will break up +the ref-links of COW data (for example files copied with `cp --reflink`, +snapshots or de-duplicated data). +This may cause considerable increase of space usage depending on the broken up +ref-links. ++ +`Options` ++ +-v:::: +be verbose, print file names as they're submitted for defragmentation +-c[<algo>]:::: +compress file contents while defragmenting. Optional argument selects the compression +algorithm, 'zlib' (default) or 'lzo'. Currently it's not possible to select no +compression. See also section 'EXAMPLES'. +-r:::: +defragment files recursively in given directories +-f:::: +flush data for each file before going to the next file. This will limit the amount +of dirty data to current file, otherwise the amount cumulates from several files +and may increase system load. +-s <start>[kKmMgGtTpPeE]:::: +defragmentation will start from the given offset, default is beginning of a file +-l <len>[kKmMgGtTpPeE]:::: +defragment only up to 'len' bytes, default is the file size +-t <size>[kKmMgGtTpPeE]:::: +target extent size, do not touch extents bigger than 'size' ++ +For 'start', 'len', 'size' it is possible to append +units designator: \'K', \'M', \'G', \'T', \'P', or \'E', which represent +KiB, MiB, GiB, TiB, PiB, or EiB, respectively (case does not matter). ++ +NOTE: Directory arguments without '-r' do not defragment files recursively but will +defragment certain internal trees (extent tree and the subvolume tree). This has been +confusing and could be removed in the future. + +*label* [<dev>|<mountpoint>] [<newlabel>]:: +Show or update the label of a filesystem. This works on a mounted filesystem or +a filesystem image. ++ +The 'newlabel' argument is optional. Current label is printed if the the argument +is omitted. ++ +NOTE: the maximum allowable length shall be less than 256 chars and must not contain +a newline. The trailing newline is stripped automatically. + +// Some wording are extracted by the resize2fs man page +*resize* [<devid>:][+/-]<size>[kKmMgGtTpPeE]|[<devid>:]max <path>:: +Resize a mounted filesystem identified by 'path'. A particular device +can be resized by specifying a 'devid'. ++ +WARNING: If 'path' is a file containing a BTRFS image then resize does not work +as expected and does not resize the image. This would resize the underlying +filesystem instead. ++ +The 'devid' can be found in the output of *btrfs filesystem show* and +defaults to 1 if not specified. +The 'size' parameter specifies the new size of the filesystem. +If the prefix '+' or '-' is present the size is increased or decreased +by the quantity 'size'. +If no units are specified, bytes are assumed for 'size'. +Optionally, the size parameter may be suffixed by one of the following +units designators: \'K', \'M', \'G', \'T', \'P', or \'E', which represent +KiB, MiB, GiB, TiB, PiB, or EiB, respectively (case does not matter). ++ +If 'max' is passed, the filesystem will occupy all available space on the +device respecting 'devid' (remember, devid 1 by default). ++ +The resize command does not manipulate the size of underlying +partition. If you wish to enlarge/reduce a filesystem, you must make sure you +can expand the partition before enlarging the filesystem and shrink the +partition after reducing the size of the filesystem. This can done using +`fdisk`(8) or `parted`(8) to delete the existing partition and recreate +it with the new desired size. When recreating the partition make sure to use +the same starting partition offset as before. ++ +Growing is usually instant as it only updates the size. However, shrinking could +take a long time if there are data in the device area that's beyond the new +end. Relocation of the data takes time. ++ +See also section 'EXAMPLES'. + +*show* [options] [<path>|<uuid>|<device>|<label>]:: +Show the btrfs filesystem with some additional info about devices and space +allocation. ++ +If no option none of 'path'/'uuid'/'device'/'label' is passed, information +about all the BTRFS filesystems is shown, both mounted and unmounted. ++ +`Options` ++ +-m|--mounted:::: +probe kernel for mounted BTRFS filesystems +-d|--all-devices:::: +scan all devices under /dev, otherwise the devices list is extracted from the +/proc/partitions file. This is a fallback option if there's no device node +manager (like udev) available in the system. +--raw:::: +raw numbers in bytes, without the 'B' suffix +--human-readable:::: +print human friendly numbers, base 1024, this is the default +--iec:::: +select the 1024 base for the following options, according to the IEC standard +--si:::: +select the 1000 base for the following options, according to the SI standard +--kbytes:::: +show sizes in KiB, or kB with --si +--mbytes:::: +show sizes in MiB, or MB with --si +--gbytes:::: +show sizes in GiB, or GB with --si +--tbytes:::: +show sizes in TiB, or TB with --si + +*sync* <path>:: +Force a sync of the filesystem at 'path'. This is done via a special ioctl and +will also trigger cleaning of deleted subvolumes. Besides that it's equivalent +to the `sync`(1) command. + +*usage* [options] <path> [<path>...]:: +Show detailed information about internal filesystem usage. This is supposed to +replace the *btrfs filesystem df* command in the long run. ++ +The level of detail can differ if the command is run under a regular or the +root user (due to use of restricted ioctl). For both there's a summary section +with information about space usage: ++ +------------------------- +$ btrfs fi usage /path +WARNING: cannot read detailed chunk info, RAID5/6 numbers will be incorrect, run as root +Overall: + Device size: 1.82TiB + Device allocated: 1.17TiB + Device unallocated: 669.99GiB + Device missing: 0.00B + Used: 1.14TiB + Free (estimated): 692.57GiB (min: 692.57GiB) + Data ratio: 1.00 + Metadata ratio: 1.00 + Global reserve: 512.00MiB (used: 0.00B) +------------------------- ++ +The root user will also see stats broken down by block group types: ++ +------------------------- +Data,single: Size:1.15TiB, Used:1.13TiB + /dev/sdb 1.15TiB + +Metadata,single: Size:12.00GiB, Used:6.45GiB + /dev/sdb 12.00GiB + +System,single: Size:32.00MiB, Used:144.00KiB + /dev/sdb 32.00MiB + +Unallocated: + /dev/sdb 669.99GiB +------------------------- ++ +`Options` ++ +-b|--raw:::: +raw numbers in bytes, without the 'B' suffix +-h|--human-readable:::: +print human friendly numbers, base 1024, this is the default +-H:::: +print human friendly numbers, base 1000 +--iec:::: +select the 1024 base for the following options, according to the IEC standard +--si:::: +select the 1000 base for the following options, according to the SI standard +-k|--kbytes:::: +show sizes in KiB, or kB with --si +-m|--mbytes:::: +show sizes in MiB, or MB with --si +-g|--gbytes:::: +show sizes in GiB, or GB with --si +-t|--tbytes:::: +show sizes in TiB, or TB with --si +-T:::: +show data in tabular format ++ +If conflicting options are passed, the last one takes precedence. + +EXAMPLES +-------- + +*$ btrfs filesystem defrag -v -r dir/* + +Recursively defragment files under 'dir/', print files as they are processed. +The file names will be printed in batches, similarly the amount of data triggered +by defragmentation will be proportional to last N printed files. The system dirty +memory throttling will slow down the defragmentation but there can still be a lot +of IO load and the system may stall for a moment. + +*$ btrfs filesystem defrag -v -r -f dir/* + +Recusively defragment files under 'dir/', be verbose and wait until all blocks +are flushed before processing next file. You can note slower progress of the +output and lower IO load (proportional to currently defragmented file). + +*$ btrfs filesystem defrag -v -r -f -clzo dir/* + +Recusively defragment files under 'dir/', be verbose, wait until all blocks are +flushed and force file compression. + +*$ btrfs filesystem defrag -v -r -t 64M dir/* + +Recusively defragment files under 'dir/', be verbose and try to merge extents +to be about 64MiB. As stated above, the success rate depends on actual free +space fragmentation and the final result is not guaranteed to meet the target +even if run repeatedly. + +*$ btrfs filesystem resize -1G /path* + +*$ btrfs filesystem resize 1:-1G /path* + +Shrink size of the filesystem's device id 1 by 1GiB. The first syntax expects a +device with id 1 to exist, otherwise fails. The second is equivalent and more +explicit. For a single-device filesystem it's typically not necessary to +specify the devid though. + +*$ btrfs filesystem resize max /path* + +*$ btrfs filesystem resize 1:max /path* + +Let's assume that devid 1 exists, the filesystem does not occupy the whole block +device, eg. it has been enlarged and we wan the grow the filesystem. Simply using +'max' as size we will achieve that. + +NOTE: There are two ways to minimize the filesystem on a given device. The +*btrfs inspect-internal min-dev-size* command, or iteratively shrink in steps. + +EXIT STATUS +----------- +*btrfs filesystem* returns a zero exit status if it succeeds. Non zero is +returned in case of failure. + +AVAILABILITY +------------ +*btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`mkfs.btrfs`(8), diff --git a/Documentation/btrfs-find-root.asciidoc b/Documentation/btrfs-find-root.asciidoc new file mode 100644 index 00000000..e04cd3e8 --- /dev/null +++ b/Documentation/btrfs-find-root.asciidoc @@ -0,0 +1,35 @@ +btrfs-find-root(8) +================== + +NAME +---- +btrfs-find-root - filter to find btrfs root + +SYNOPSIS +-------- +*btrfs-find-root* [options] <dev> + +DESCRIPTION +----------- +*btrfs-find-root* is used to find the satisfied root, you can filter by +root tree's objectid, generation, level. + +OPTIONS +------- +-a:: +Search through all the metadata extents, even the root is already found. +-g <generation>:: +Filter root tree by it's original transaction id, tree root's generation in default. +-o <objectid>:: +Filter root tree by it's objectid,tree root's objectid in default. +-l <level>:: +Filter root tree by B-+ tree's level, level 0 in default. + +EXIT STATUS +----------- +*btrfs-find-root* will return 0 if no error happened. +If any problems happened, 1 will be returned. + +SEE ALSO +-------- +`mkfs.btrfs`(8) diff --git a/Documentation/btrfs-image.asciidoc b/Documentation/btrfs-image.asciidoc new file mode 100644 index 00000000..38265171 --- /dev/null +++ b/Documentation/btrfs-image.asciidoc @@ -0,0 +1,66 @@ +btrfs-image(8) +============== + +NAME +---- +btrfs-image - create/restore an image of the filesystem + +SYNOPSIS +-------- +*btrfs-image* [options] <source> <target> + +DESCRIPTION +----------- +*btrfs-image* is used to create an image of a btrfs filesystem. +All data will be zeroed, but metadata and the like is preserved. +Mainly used for debugging purposes. + +In the dump mode, source is the btrfs device/file and target is the output +file (use '-' for stdout). + +In the restore mode (option -r), source is the dumped image and target is the btrfs device/file. + + +OPTIONS +------- +-r:: +Restore metadump image. By default, this fixes super's chunk tree, by +using 1 stripe pointing to primary device, so that file system can be +restored by running tree log reply if possible. To restore without +changing number of stripes in chunk tree check -o option. + +-c <value>:: +Compression level (0 ~ 9). + +-t <value>:: +Number of threads (1 ~ 32) to be used to process the image dump or restore. + +-o:: +Use the old restore method, this does not fixup the chunk tree so the restored +file system will not be able to be mounted. + +-s:: +Sanitize the file names when generating the image. One -s means just +generate random garbage, which means that the directory indexes won't match up +since the hashes won't match with the garbage filenames. Using -ss will +calculate a collision for the filename so that the hashes match, and if it +can't calculate a collision then it will just generate garbage. The collision +calculator is very time and CPU intensive so only use it if you are having +problems with your file system tree and need to have it mostly working. + +-w:: +Walk all the trees manually and copy any blocks that are referenced. Use this +option if your extent tree is corrupted to make sure that all of the metadata is +captured. + +-m:: +Restore for multiple devices, more than 1 device should be provided. + +EXIT STATUS +----------- +*btrfs-image* will return 0 if no error happened. +If any problems happened, 1 will be returned. + +SEE ALSO +-------- +`mkfs.btrfs`(8) diff --git a/Documentation/btrfs-inspect-internal.asciidoc b/Documentation/btrfs-inspect-internal.asciidoc new file mode 100644 index 00000000..1c7c3611 --- /dev/null +++ b/Documentation/btrfs-inspect-internal.asciidoc @@ -0,0 +1,84 @@ +btrfs-inspect-internal(8) +========================= + +NAME +---- +btrfs-inspect-internal - query various internal information + +SYNOPSIS +-------- +*btrfs inspect-internal* <subcommand> <args> + +DESCRIPTION +----------- + +This command group provides an interface to query internal information. The +functionality ranges from a simple UI to an ioctl or a more complex query that +assembles the result from several internal structures. The latter usually +requires calls to privileged ioctls. + +SUBCOMMAND +---------- +*inode-resolve* [-v] <ino> <path>:: +(needs root privileges) ++ +resolve paths to all files with given inode number 'ino' in a given subvolume +at 'path', ie. all hardlinks ++ +`Options` ++ +-v:::: +verbose mode, print count of returned paths and ioctl() return value + +*logical-resolve* [-Pv] [-s <bufsize>] <logical> <path>:: +(needs root privileges) ++ +resolve paths to all files at given 'logical' address in the linear filesystem space ++ +`Options` ++ +-P:::: +skip the path resolving and print the inodes instead +-v:::: +verbose mode, print count of returned paths and all ioctl() return values +-s <bufsize>:::: +set internal buffer for storing the file names to 'bufsize', default is 4096, maximum 64k + +*min-dev-size* [options] <path>:: +(needs root privileges) ++ +return the minimum size the device can be shrunk to, without performing any +resize operation, this may be useful before executing the actual resize operation ++ +`Options` ++ +--id <id>:::: +specify the device 'id' to query, default is 1 if this option is not used + +*rootid* <path>:: +for a given file or directory, return the containing tree root id, for a +subvolume itself return it's own tree id (ie. subvol id) ++ +NOTE: The result is undefined for the so-called empty subvolumes (identified by +inode number 2), but such subvolume does not contain any files anyway + +*subvolid-resolve* <subvolid> <path>:: +(needs root privileges) ++ +resolve the absolute path of a the subvolume id 'subvolid' + +EXIT STATUS +----------- +*btrfs inspect-internal* returns a zero exit status if it succeeds. Non zero is +returned in case of failure. + +AVAILABILITY +------------ +*btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`mkfs.btrfs`(8), +`btrfs-debug-tree`(8) diff --git a/Documentation/btrfs-man5.asciidoc b/Documentation/btrfs-man5.asciidoc new file mode 100644 index 00000000..d4323917 --- /dev/null +++ b/Documentation/btrfs-man5.asciidoc @@ -0,0 +1,406 @@ +btrfs-man5(5) +============== + +NAME +---- +btrfs-man5 - topics about the BTRFS filesystem (mount options, supported file attributes and other) + +DESCRIPTION +----------- +This document describes topics related to BTRFS that are not specific to the +tools. Currently covers: + +1. mount options + +2. file attributes + +MOUNT OPTIONS +------------- + +This section describes mount options specific to BTRFS. For the generic mount +options please refer to `mount`(8) manpage. The options are sorted alphabetically +(discarding the 'no' prefix). + +*acl*:: +*noacl*:: +(default: on) ++ +Enable/disable support for Posix Access Control Lists (ACLs). See the +`acl`(5) manual page for more information about ACLs. + +*alloc_start='bytes'*:: +(default: 1M, minimum: 1M) ++ +Debugging option to force all block allocations above a certain +byte threshold on each block device. The value is specified in +bytes, optionally with a K, M, or G suffix (case insensitive). ++ +This option was used for testing and has not practial use, it's slated to be +removed in the future. + +*autodefrag*:: +*noautodefrag*:: +(since: 3.0, default: off) ++ +Enable automatic file defragmentation. +When enabled, small random writes into files (in a range of tens of kilobytes, +currently it's 64K) are detected and queued up for the defragmentation process. +Not well suited for large database workloads. ++ +The read latency may increase due to reading the adjacent blocks that make up the +range for defragmentation, successive write will merge the blocks in the new +location. ++ +WARNING: Defragmenting with Linux kernel versions < 3.9 or ≥ 3.14-rc2 as +well as with Linux stable kernel versions ≥ 3.10.31, ≥ 3.12.12 or +≥ 3.13.4 will break up the ref-links of CoW data (for example files +copied with `cp --reflink`, snapshots or de-duplicated data). +This may cause considerable increase of space usage depending on the +broken up ref-links. + +*barrier*:: +*nobarrier*:: +(default: on) ++ +Ensure that all IO write operations make it through the device cache and are stored +permanently when the filesystem is at it's consistency checkpoint. This +typically means that a flush command is sent to the device that will +synchronize all pending data and ordinary metadata blocks, then writes the +superblock and issues another flush. ++ +The write flushes incur a slight hit and also prevent the IO block +scheduler to reorder requests in more effective way. Disabling barriers gets +rid of that penalty but will most certainly lead to a corrupted filesystem in +case of a crash or power loss. The ordinary metadata blocks could be yet +unwrittent at the time the new superblock is stored permanently, expecting that +the block pointers to metadata were stored permanently before. ++ +On a device with a volatile battery-backed write-back cache, the 'nobarrier' +option will not lead to filesystem corruption as the pending blocks are +supposed to make it to the permanent storage. + +*check_int*:: +*check_int_data*:: +*check_int_print_mask='value'*:: +(since: 3.0, default: off) ++ +These debugging options control the behavior of the integrity checking +module (the BTRFS_FS_CHECK_INTEGRITY config option required). + ++ +`check_int` enables the integrity checker module, which examines all +block write requests to ensure on-disk consistency, at a large +memory and CPU cost. + ++ +`check_int_data` includes extent data in the integrity checks, and +implies the check_int option. + ++ +`check_int_print_mask` takes a bitmask of BTRFSIC_PRINT_MASK_* values +as defined in 'fs/btrfs/check-integrity.c', to control the integrity +checker module behavior. + ++ +See comments at the top of 'fs/btrfs/check-integrity.c' +for more info. + +*clear_cache*:: +Force clearing and rebuilding of the disk space cache if something +has gone wrong. See also: 'space_cache'. + +*commit='seconds'*:: +(since: 3.12, default: 30) ++ +Set the interval of periodic commit. Higher +values defer data being synced to permanent storage with obvious +consequences when the system crashes. The upper bound is not forced, +but a warning is printed if it's more than 300 seconds (5 minutes). + +*compress*:: +*compress='type'*:: +*compress-force*:: +*compress-force='type'*:: +(default: off) ++ +Control BTRFS file data compression. Type may be specified as 'zlib', +'lzo' or 'no' (for no compression, used for remounting). If no type +is specified, 'zlib' is used. If compress-force is specified, +all files will be compressed, whether or not they compress well. ++ +NOTE: If compression is enabled, 'nodatacow' and 'nodatasum' are disabled. + +*datacow*:: +*nodatacow*:: +(default: on) ++ +Enable data copy-on-write for newly created files. +'Nodatacow' implies 'nodatasum', and disables 'compression'. All files created +under 'nodatacow' are also set the NOCOW file attribute (see `chattr`(1)). + +*datasum*:: +*nodatasum*:: +(default: on) ++ +Enable data checksumming for newly created files. +'Datasum' implies 'datacow', ie. the normal mode of operation. All files created +under 'nodatasum' inherit the "no checksums" property, however there's no +corresponding file attribute (see `chattr`(1)). + +*degraded*:: +(default: off) ++ +Allow mounts with less devices than the raid profile constraints +require. A read-write mount (or remount) may fail with too many devices +missing, for example if a stripe member is completely missing from RAID0. + +*device='devicepath'*:: +Specify a path to a device that will be scanned for BTRFS filesystem during +mount. This is usually done automatically by a device manager (like udev) or +using the *btrfs device scan* command (eg. run from the initial ramdisk). In +cases where this is not possible the 'device' mount option can help. ++ +NOTE: booting eg. a RAID1 system may fail even if all filesystem's 'device' +paths are provided as the actual device nodes may not be discovered by the +system at that point. + +*discard*:: +*nodiscard*:: +(default: off) ++ +Enable discarding of freed file blocks using TRIM operation. This is useful +for SSD devices, thinly provisioned LUNs or virtual machine images where the +backing device understands the operation. Depending on support of the +underlying device, the operation may severly hurt performance in case the TRIM +operation is synchronous (eg. with SATA devices up to revision 3.0). ++ +If discarding is not necessary to be done at the block freeing time, there's +*fstrim* tool that lets the filesystem discard all free blocks in a batch, +possibly not much interfering with other operations. + +*enospc_debug*:: +*noenospc_debug*:: +(default: off) ++ +Enable verbose output for some ENOSPC conditions. It's safe to use but can +be noisy if the system hits reaches near-full state. + +*fatal_errors='action'*:: +(since: 3.4, default: bug) ++ +Action to take when encountering a fatal error. ++ +*bug*:::: +'BUG()' on a fatal error, the system will stay in the crashed state and may be +still partially usable, but reboot is required for full operation ++ +*panic*:::: +'panic()' on a fatal error, depending on other system configuration, this may +be followed by a reboot. Please refer to the documentation of kernel boot +parameters, eg. 'panic', 'oops' or 'crashkernel'. + +*flushoncommit*:: +*noflushoncommit*:: +(default: on) ++ +This option forces any data dirtied by a write in a prior transaction to commit +as part of the current commit. This makes the committed state a fully +consistent view of the file system from the application's perspective (i.e., it +includes all completed file system operations). This was previously the +behavior only when a snapshot was created. ++ +Disabling flushing may improve performance but is not crash-safe. + +*fragment='type'*:: +(depends on compile-time option BTRFS_DEBUG, since: 4.4, default: off) ++ +A debugging helper to intentionally fragment given 'type' of block groups. The +type can be 'data', 'metadata' or 'all'. This mount option should not be used +outside of debugging environments and is not recognized if the kernel config +option 'BTRFS_DEBUG' is not enabled. + +*inode_cache*:: +*noinode_cache*:: +(since: 3.0, default: off) ++ +Enable free inode number caching. Not recommended to use unless files on your +filesystem get assigned inode numbers that are approaching 2^64^. Normally, new +files in each subvolume get assigned incrementally (plus one from the last +time) and are not reused. The mount option turns on caching of the existing +inode numbers and reuse of inode numbers of deleted files. ++ +This option may slow down your system at first run, or after mounting without +the option. ++ +NOTE: Defaults to off due to a potential overflow problem when the free space +checksums don't fit inside a single page. + +*max_inline='bytes'*:: +(default: min(8192, page size) ) ++ +Specify the maximum amount of space, in bytes, that can be inlined in +a metadata B-tree leaf. The value is specified in bytes, optionally +with a K suffix (case insensitive). In practice, this value +is limited by the filesystem block size (named 'sectorsize' at mkfs time), +and memory page size of the system. In case of sectorsize limit, there's +some space unavailable due to leaf headers. For example, a 4k sectorsize, max +inline data is ~3900 bytes. ++ +Inlining can be completely turned off specifying 0. This will increase data +block slack if file sizes are much smaller than block size but will reduce +metadata consumption in return. + +*metadata_ratio='value'*:: +(default: 0, internal logic) ++ +Specifies that 1 metadata chunk should be allocated after every 'value' data +chunks. Default behaviour depends on internal logic, some percent of unused +metadata space is attempted to be maintained but is not always possible if +there's not space left for chunk allocation. The option could be useful to +override the internal logic in favor of the metadata allocation if the expected +workload is supposed to be metadata intense (snapshots, reflinks, xattrs, +inlined files). + +*recovery*:: +(since: 3.2, default: off) ++ +Enable autorecovery attempts if a bad tree root is found at mount time. +Currently this scans a backup list of several previous tree roots and tries to +use the first readable. This can be used with read-only mounts as well. + +*rescan_uuid_tree*:: +(since: 3.12, default: off) ++ +Force check and rebuild procedure of the UUID tree. This should not +normally be needed. + +*skip_balance*:: +(since: 3.3, default: off) ++ +Skip automatic resume of interrupted balance operation after mount. +May be resumed with *btrfs balance resume* or the paused state can be removed +by *btrfs balance cancel*. + +*space_cache*:: +*nospace_cache*:: +('nospace_cache' since: 3.2, default: on) ++ +Disable freespace cache loading without clearing the cache and the free space +cache will not be used during the mount. This affects performance as searching +for new free blocks could take longer. On the other hand, managing the space +cache consumes some resources. + +*ssd*:: +*nossd*:: +*ssd_spread*:: +(default: SSD autodetected) ++ +Options to control SSD allocation schemes. By default, BTRFS will +enable or disable SSD allocation heuristics depending on whether a +rotational or nonrotational disk is in use. The 'ssd' and 'nossd' options +can override this autodetection. ++ +The 'ssd_spread' mount option attempts to allocate into bigger and aligned +chunks of unused space, and may perform better on low-end SSDs. 'ssd_spread' +implies 'ssd', enabling all other SSD heuristics as well. + +*subvol='path'*:: +Mount subvolume from 'path' rather than the toplevel subvolume. The +'path' is absolute (ie. starts at the toplevel subvolume). +This mount option overrides the default subvolume set for the given filesystem. + +*subvolid='subvolid'*:: +Mount subvolume specified by a 'subvolid' number rather than the toplevel +subvolume. You can use *btrfs subvolume list* to see subvolume ID numbers. +This mount option overrides the default subvolume set for the given filesystem. + +*subvolrootid='objectid'*:: +(irrelevant since: 3.2, formally deprecated since: 3.10) ++ +A workaround option from times (pre 3.2) when it was not possible to mount a +subvolume that did not reside directly under the toplevel subvolume. + +*thread_pool='number'*:: +(default: min(NRCPUS + 2, 8) ) ++ +The number of worker threads to allocate. NRCPUS is number of on-line CPUs +detected at the time of mount. Small number leads to less parallelism in +processing data and metadata, higher numbers could lead to a performance due to +increased locking contention, cache-line bouncing or costly data transfers +between local CPU memories. + +*treelog*:: +*notreelog*:: +(default: on) ++ +Enable the tree logging used for 'fsync' and 'O_SYNC' writes. The tree log +stores changes without the need of a full filesystem sync. The log operations +are flushed at sync and transaction commit. If the system crashes between two +such syncs, the pending tree log operations are replayed during mount. ++ +WARNING: currently, the tree log is replayed even with a read-only mount! ++ +The tree log could contain new files/directories, these would not exist on +a mounted filesystm if the log is not replayed. + +*user_subvol_rm_allowed*:: +(default: off) ++ +Allow subvolumes to be deleted by their respective owner. Otherwise, only the +root user can do that. + +FILE ATTRIBUTES +--------------- +The btrfs filesystem supports setting the following file attributes using the +`chattr`(1) utility: + +*a*:: +'append only', new writes are always written at the end of the file + +*A*:: +'no atime updates' + +*c*:: +'compress data', all data written after this attribute is set will be compressed. +Please note that compression is also affected by the mount options or the parent +directory attributes. ++ +When set on a directory, all newly created files will inherit this attribute. + +*C*:: +'no copy-on-write', file modifications are done in-place ++ +When set on a directory, all newly created files will inherit this attribute. ++ +NOTE: due to implementation limitations, this flag can be set/unset only on +empty files. + +*d*:: +'no dump', makes sense with 3rd party tools like `dump`(8), on BTRFS the +attribute can be set/unset on no other special handling is done + +*D*:: +'synchronous directory updates', for more details search `open`(2) for 'O_SYNC' +and 'O_DSYNC' + +*i*:: +'immutable', no file data and metadata changes allowed even to the root user as +long as this attribute is set (obviously the exception is unsetting the attribute) + +*S*:: +'synchronous updates', for more details search `open`(2) for 'O_SYNC' and +'O_DSYNC' + +*X*:: +'no compression', permanently turn off compression on the given file, other +compression mount options will not affect that ++ +When set on a directory, all newly created files will inherit this attribute. + +No other attributes are supported. For the complete list please refer to the +`chattr`(1) manual page. + +SEE ALSO +-------- +`acl`(5), +`btrfs`(8), +`chattr`(1), +`fstrim`(8), +`mkfs.btrfs`(8), +`mount`(8) diff --git a/Documentation/btrfs-map-logical.asciidoc b/Documentation/btrfs-map-logical.asciidoc new file mode 100644 index 00000000..a3d110cb --- /dev/null +++ b/Documentation/btrfs-map-logical.asciidoc @@ -0,0 +1,37 @@ +btrfs-map-logical(8) +==================== + +NAME +---- +btrfs-map-logical - map btrfs logical extent to physical extent + +SYNOPSIS +-------- +*btrfs-map-logical* <options> <device> + +DESCRIPTION +----------- +*btrfs-map-logical* can be used to find out what the physical offsets are +on the mirrors, the result is dumped into stdout in default. + +Mainly used for debug purpose. + +OPTIONS +------- +-l|--logical <logical_num>:: +Logical extent to map. +-c|--copy <copy>:: +Copy of the extent to read(usually 1 or 2). +-o|--output <filename>:: +Output file to hold the extent. +-b|--bytes <bytes>:: +Number of bytes to read. + +EXIT STATUS +----------- +*btrfs-map-logical* will return 0 if no error happened. +If any problems happened, 1 will be returned. + +SEE ALSO +-------- +`mkfs.btrfs`(8) diff --git a/Documentation/btrfs-property.asciidoc b/Documentation/btrfs-property.asciidoc new file mode 100644 index 00000000..8b9b7f03 --- /dev/null +++ b/Documentation/btrfs-property.asciidoc @@ -0,0 +1,73 @@ +btrfs-property(8) +================= + +NAME +---- +btrfs-property - get/set/list properties for given btrfs object. + +SYNOPSIS +-------- +*btrfs property* <subcommand> <args> + +DESCRIPTION +----------- +*btrfs property* is used to get/set/list property for given btrfs object. +See the description of *get* subcommand for more information about +both btrfs object and property. + +*btrfs property* provides an unified and user-friendly method to tune different +btrfs properties instead of using the traditional method like `chattr`(1) or +`lsattr`(1). + +SUBCOMMAND +---------- +*get* [-t <type>] <object> [<name>]:: +Gets a property from a btrfs object. ++ +A btrfs object, which is set by <object>, can be a btrfs filesystem +itself, a btrfs subvolume, an inode(file or directory) inside btrfs, +or a device on which a btrfs exists. ++ +The '-t <type>' option can be used to explicitly +specify what type of object you meant. This is only needed when a +property could be set for more then one object type. ++ +Possible types are 's[ubvol]', 'f[ilesystem]', 'i[node]' and 'd[evice]'. ++ +Set the name of property by '<name>'. If no '<name>' is specified, +all properties for the given object are printed. '<name>' is one of +the followings. + +ro:::: +read-only flag of subvolume: true or false +label:::: +label of device +compression:::: +compression setting for an inode: lzo, zlib, or "" (empty string) + +*list* [-t <type>] <object>:: +Lists available properties with their descriptions for the given object. ++ +See the description of *get* subcommand for the meaning of each option. + +*set* [-t <type>] <object> <name> <value>:: +Sets a property on a btrfs object. ++ +See the description of *get* subcommand for the meaning of each option. + +EXIT STATUS +----------- +*btrfs property* returns a zero exit status if it succeeds. Non zero is +returned in case of failure. + +AVAILABILITY +------------ +*btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`mkfs.btrfs`(8), +`lsattr`(1), +`chattr`(1) diff --git a/Documentation/btrfs-qgroup.asciidoc b/Documentation/btrfs-qgroup.asciidoc new file mode 100644 index 00000000..57cf012d --- /dev/null +++ b/Documentation/btrfs-qgroup.asciidoc @@ -0,0 +1,145 @@ +btrfs-qgroup(8) +=============== + +NAME +---- +btrfs-qgroup - control the quota group of a btrfs filesystem + +SYNOPSIS +-------- +*btrfs qgroup* <subcommand> <args> + +DESCRIPTION +----------- +*btrfs qgroup* is used to control quota group (qgroup) of a btrfs filesystem. + +NOTE: To use qgroup you need to enable quota first using *btrfs quota enable* +command. + +WARNING: Qgroup is not stable yet and will impact performance in current mainline +kernel (v3.14 so far). + +QGROUP +------ +Quota groups or qgroup in btrfs make a tree hierarchy, the leaf qgroups are +attached to subvolumes. The size limits are set per qgroup and apply when any +limit is reached in tree that contains a given subvolume. + +The limit sare separated between shared and exclusive and reflect the extent +ownership. For example a fresh snapshot shares almost all the blocks with the +original subvolume, new writes to either subvolume will raise towards the +exclusive limit. + +The qgroup identifiers conform to 'level/id' where level 0 is reserved to the +qgroups associated with subvolumes. Such qgroups are created automatically. + +The qgroup hierarchy is built by commands *create* and *assign*. + +NOTE: If the qgroup of a subvolume is destroyed, quota about the subvolume +will not be functional until qgroup '0/<subvolume id>' is created again. + +SUBCOMMAND +---------- +*assign* [options] <src> <dst> <path>:: +Assign qgroup <src> as the child qgroup of <dst> in the btrfs filesystem +identified by <path>. ++ +`Options` ++ +--rescan:::: +Automatically schedule quota rescan if the new qgroup assignment leads to +quota inconsistency. +--no-rescan:::: +Explicitly ask not to do a rescan. + +*create* <qgroupid> <path>:: +Create a subvolume quota group. ++ +For the '0/<subvolume id>' qgroup, a qgroup can be created even before the +subvolume created. + +*destroy* <qgroupid> <path>:: +Destroy a qgroup. ++ +If a qgroup is no isolated,which means it is a parent or child qgroup, it +can't be destroyed. + +*limit* [options] <size>|none [<qgroupid>] <path>:: +Limit the size of a qgroup to <size> or no limit in the btrfs filesystem +identified by <path>. ++ +If <qgroupid> is not given, qgroup of the subvolume identified by <path> +is used if possible. ++ +`Options` ++ +-c:::: +limit amount of data after compression. This is the default, it is currently not +possible to turn off this option. ++ +-e:::: +limit space exclusively assigned to this qgroup. + +*remove* <src> <dst> <path>:: +Remove the relationship between child qgroup <src> and parent qgroup <dst> in +the btrfs filesystem identified by <path>. + +*show* [options] <path>:: +Show all qgroups in the btrfs filesystem identified by <path>. ++ +`Options` ++ +-p:::: +print parent qgroup id. +-c:::: +print child qgroup id. +-r:::: +print limit of referenced size of qgroup. +-e:::: +print limit of exclusive size of qgroup. +-F:::: +list all qgroups which impact the given path(include ancestral qgroups) +-f:::: +list all qgroups which impact the given path(exclude ancestral qgroups) +--raw:::: +raw numbers in bytes, without the 'B' suffix. +--human-readable:::: +print human friendly numbers, base 1024, this is the default +--iec:::: +select the 1024 base for the following options, according to the IEC standard. +--si:::: +select the 1000 base for the following options, according to the SI standard. +--kbytes:::: +show sizes in KiB, or kB with --si. +--mbytes:::: +show sizes in MiB, or MB with --si. +--gbytes:::: +show sizes in GiB, or GB with --si. +--tbytes:::: +show sizes in TiB, or TB with --si. +--sort=[\+/-]<attr>[,[+/-]<attr>]...:::: +list qgroups in order of <attr>. ++ +<attr> can be one or more of qgroupid,rfer,excl,max_rfer,max_excl. ++ +Prefix \'+' means ascending order and \'-' means descending order of <attr>. +If no prefix is given, use ascending order by default. ++ +If multiple <attr>s is given, use comma to separate. + +EXIT STATUS +----------- +*btrfs qgroup* returns a zero exit status if it succeeds. Non zero is +returned in case of failure. + +AVAILABILITY +------------ +*btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`mkfs.btrfs`(8), +`btrfs-subvolume`(8), +`btrfs-quota`(8), diff --git a/Documentation/btrfs-quota.asciidoc b/Documentation/btrfs-quota.asciidoc new file mode 100644 index 00000000..00e09c61 --- /dev/null +++ b/Documentation/btrfs-quota.asciidoc @@ -0,0 +1,56 @@ +btrfs-quota(8) +============== + +NAME +---- +btrfs-quota - control the quota of a btrfs filesystem + +SYNOPSIS +-------- +*btrfs quota* <subcommand> <args> + +DESCRIPTION +----------- +*btrfs quota* is used to enable/disable or rescan subvolume quota of a btrfs +filesystem. + +For setting quota or other quota operations on a btrfs filesystem, please see +`btrfs-qgroup`(8) for details. + +WARNING: Quota and qgroup in btrfs filesystem is not stable and impacts +performance in mainline kernel yet(v3.14 so far). + +SUBCOMMAND +---------- +*disable* <path>:: +Disable subvolume quota support for a filesystem. + +*enable* <path>:: +Enable subvolume quota support for a filesystem. + +*rescan* [-s] <path>:: +Trash all qgroup numbers and scan the metadata again with the current config. ++ +`Options` ++ +-s:::: +show status of a running rescan operation. +-w:::: +wait for rescan operation to finish(can be already in progress). + +EXIT STATUS +----------- +*btrfs quota* returns a zero exit status if it succeeds. Non zero is +returned in case of failure. + +AVAILABILITY +------------ +*btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`mkfs.btrfs`(8), +`btrfs-subvolume`(8), +`btrfs-qgroup`(8) diff --git a/Documentation/btrfs-receive.asciidoc b/Documentation/btrfs-receive.asciidoc new file mode 100644 index 00000000..84b85c1c --- /dev/null +++ b/Documentation/btrfs-receive.asciidoc @@ -0,0 +1,67 @@ +btrfs-receive(8) +================ + +NAME +---- +btrfs-receive - receive subvolumes from stdin/file. + +SYNOPSIS +-------- +*btrfs receive* [options] <mount> + +DESCRIPTION +----------- +Receives one or more subvolumes that were previously +sent with *btrfs send*. The received subvolumes are stored +into <mount>. + +*btrfs receive* will fail with the following case: + +1. a receiving subvolume already exists. + +2. a previously received subvolume was changed after it was received. + +3. default subvolume is changed or you don't mount btrfs filesystem with +fs tree. + +After receiving a subvolume, it is immediately set to read only. + +`Options` + +-v:: +Enable verbose debug output. Each occurrence of this option increases the +verbose level more. +-f <infile>:: +By default, btrfs receive uses stdin to receive the subvolumes. +Use this option to specify a file to use instead. +-C|--chroot:: +Confine the process to <mount> using chroot. +-e:: +Terminate after receiving an <end cmd> in the data stream. +Without this option, the receiver terminates only if an error is recognized +or on EOF. +--max-errors <N>:: +Terminate as soon as N errors happened while processing commands from the send +stream. Default value is 1. A value of 0 means no limit. +-m:: +The root mount point of the destination fs. ++ +By default the mountpoint is searched in /proc/self/mounts. +If you do not have /proc, eg. in a chroot environment, use this option to tell +us where this filesystem is mounted. + +EXIT STATUS +----------- +*btrfs receive* returns a zero exit status if it succeeds. Non zero is +returned in case of failure. + +AVAILABILITY +------------ +*btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`mkfs.btrfs`(8), +`btrfs-send`(8) diff --git a/Documentation/btrfs-replace.asciidoc b/Documentation/btrfs-replace.asciidoc new file mode 100644 index 00000000..5a14a40a --- /dev/null +++ b/Documentation/btrfs-replace.asciidoc @@ -0,0 +1,76 @@ +btrfs-replace(8) +=============== + +NAME +---- +btrfs-replace - replace devices managed by btrfs with other device. + +SYNOPSIS +-------- +*btrfs replace* <subcommand> <args> + +DESCRIPTION +----------- +*btrfs replace* is used to replace btrfs managed devices with other device. + +SUBCOMMAND +---------- +*cancel* <mount_point>:: +Cancel a running device replace operation. + +*start* [-Bfr] <srcdev>|<devid> <targetdev> <path>:: +Replace device of a btrfs filesystem. ++ +On a live filesystem, duplicate the data to the target device which +is currently stored on the source device. +If the source device is not available anymore, or if the -r option is set, +the data is built only using the RAID redundancy mechanisms. +After completion of the operation, the source device is removed from the +filesystem. +If the <srcdev> is a numerical value, it is assumed to be the device id +of the filesystem which is mounted at <path>, otherwise is is +the path to the source device. If the source device is disconnected, +from the system, you have to use the devid parameter format. +The <targetdev> needs to be same size or larger than the <srcdev>. ++ +`Options` ++ +-r:::: +only read from <srcdev> if no other zero-defect mirror exists. +(enable this if your drive has lots of read errors, the access would be very +slow) +-f:::: +force using and overwriting <targetdev> even if it looks like +containing a valid btrfs filesystem. ++ +A valid filesystem is assumed if a btrfs superblock is found which contains a +correct checksum. Devices which are currently mounted are +never allowed to be used as the <targetdev>. ++ +-B:::: +no background replace. + +*status* [-1] <mount_point>:: +Print status and progress information of a running device replace operation. ++ +`Options` ++ +-1:::: +print once instead of print continuously until the replace +operation finishes (or is canceled) + +EXIT STATUS +----------- +*btrfs replace* returns a zero exit status if it succeeds. Non zero is +returned in case of failure. + +AVAILABILITY +------------ +*btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`mkfs.btrfs`(8), +`btrfs-device`(8), diff --git a/Documentation/btrfs-rescue.asciidoc b/Documentation/btrfs-rescue.asciidoc new file mode 100644 index 00000000..42aca645 --- /dev/null +++ b/Documentation/btrfs-rescue.asciidoc @@ -0,0 +1,90 @@ +btrfs-rescue(8) +============== + +NAME +---- +btrfs-rescue - Recover a damaged btrfs filesystem + +SYNOPSIS +-------- +*btrfs rescue* <subcommand> <args> + +DESCRIPTION +----------- +*btrfs rescue* is used to try to recover a damaged btrfs filesystem. + +SUBCOMMAND +---------- +*chunk-recover* [options] <device>:: +Recover the chunk tree by scanning the devices ++ +`Options` ++ +-y:::: +assume an answer of 'yes' to all questions. +-v:::: +verbose mode. +-h:::: +help. + +NOTE: Since *chunk-recover* will scan the whole device, it will be *VERY* slow +especially executed on a large device. + +*super-recover* [options] <device>:: +Recover bad superblocks from good copies. ++ +`Options` ++ +-y:::: +assume an answer of 'yes' to all questions. +-v:::: +verbose mode. + +*zero-log* <device>:: +clear the filesystem log tree + +This command will clear the filesystem log tree. This may fix a specific +set of problem when the filesystem mount fails due to the log replay. See below +for sample stacktraces that may show up in system log. + +The common case where this happens has been fixed a long time ago, +so it is unlikely that you will see this particular problem, but the utility is +kept around. + +NOTE: clearing the log may lead to loss of changes that were made since the +last transaction commit. This may be up to 30 seconds (default commit period) +or less if the commit was implied by other filesystem activity. + +One can determine whether *zero-log* is needed according to the kernel +backtrace: +---- +? replay_one_dir_item+0xb5/0xb5 [btrfs] +? walk_log_tree+0x9c/0x19d [btrfs] +? btrfs_read_fs_root_no_radix+0x169/0x1a1 [btrfs] +? btrfs_recover_log_trees+0x195/0x29c [btrfs] +? replay_one_dir_item+0xb5/0xb5 [btrfs] +? btree_read_extent_buffer_pages+0x76/0xbc [btrfs] +? open_ctree+0xff6/0x132c [btrfs] +---- + +If the errors are like above, then *zero-log* should be used to clear +the log and the filesystem may be mounted normally again. The keywords to look +for are 'open_ctree' which says that it's during mount and function names +that contain 'replay', 'recover' or 'log_tree'. + +EXIT STATUS +----------- +*btrfs rescue* returns a zero exit status if it succeeds. Non zero is +returned in case of failure. + +AVAILABILITY +------------ +*btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`mkfs.btrfs`(8), +`btrfs-scrub`(8), +`btrfs-check`(8) diff --git a/Documentation/btrfs-restore.asciidoc b/Documentation/btrfs-restore.asciidoc new file mode 100644 index 00000000..ec3a08bc --- /dev/null +++ b/Documentation/btrfs-restore.asciidoc @@ -0,0 +1,91 @@ +btrfs-restore(8) +================ + +NAME +---- +btrfs-restore - try to restore files from a damaged btrfs filesystem image + +SYNOPSIS +-------- +*btrfs restore* [options] <device> <path> | -l <device> + +DESCRIPTION +----------- +*btrfs restore* is used to try to salvage files from a damaged filesystem and +restore them into <path> or just list the tree roots. + +Since current `btrfs-check`(8) or `btrfs-rescue`(8) only has very limited usage, +*btrfs restore* is normally a better choice. + +NOTE: It is recommended to read the following btrfs wiki page if your data is +not salvaged with default option: + +https://btrfs.wiki.kernel.org/index.php/Restore + +OPTIONS +------- +-s|--snapshots:: +get snapshots, btrfs restore skips snapshots in default. + +-x|--xattr:: +get extended attributes. + +-m|--metadata:: +restore owner, mode and times. + +-S|--symlinks:: +restore symbolic links as well as normal files. + +-v|--verbose:: +verbose. + +-i|--ignore-errors:: +ignore errors. + +-o|--overwrite:: +overwrite directories/files in <path>. + +-t <bytenr>:: +use <bytenr> to read root tree. + +-f <bytenr>:: +only restore files that are under specified root whose root bytenr is <bytenr>. + +-u|--super <mirror>:: +use given superblock mirror identified by <mirror>, it can be 0,1,2. + +-r|--root <rootid>:: +only restore files that are under specified root whose objectid is <rootid>. + +-d:: +find dir. + +-l|--list-roots:: +list tree roots. + +-D|--dry-run:: +dry run (only list files that would be recovered). + +--path-regex <regex>:: +restore only filenames matching regex, you have to use following syntax (possibly quoted): ++ ++^/(|home(|/username(|/Desktop(|/.*))))$+ + +-c:: +ignore case (--path-regex only). + +EXIT STATUS +----------- +*btrfs restore* returns a zero exit status if it succeeds. Non zero is +returned in case of failure. + +AVAILABILITY +------------ +*btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`mkfs.btrfs`(8), +`btrfs-rescue`(8), +`btrfs-check`(8) diff --git a/Documentation/btrfs-scrub.asciidoc b/Documentation/btrfs-scrub.asciidoc new file mode 100644 index 00000000..7750868d --- /dev/null +++ b/Documentation/btrfs-scrub.asciidoc @@ -0,0 +1,94 @@ +btrfs-scrub(8) +============== + +NAME +---- +btrfs-scrub - scrub btrfs filesystem + +SYNOPSIS +-------- +*btrfs scrub* <subcommand> <args> + +DESCRIPTION +----------- +*btrfs scrub* is used to scrub a btrfs filesystem, which will read all data +from all disks and verify checksums. + +SUBCOMMAND +---------- +*cancel* <path>|<device>:: +If a scrub is running on the filesystem identified by <path>, cancel it. ++ +Progress is saved in the scrub progress file and scrubbing can be resumed later +using the scrub resume command. +If a <device> is given, the corresponding filesystem is found and +scrub cancel behaves as if it was called on that filesystem. + +*resume* [-BdqrR] [-c <ioprio_class> -n <ioprio_classdata>] <path>|<device>:: +Resume a canceled or interrupted scrub cycle on the filesystem identified by +<path> or on a given <device>. ++ +Does not start a new scrub if the last scrub finished successfully. ++ +`Options` ++ +see *scrub start*. + +*start* [-BdqrRf] [-c <ioprio_class> -n <ioprio_classdata>] <path>|<device>:: +Start a scrub on all devices of the filesystem identified by <path> or on +a single <device>. If a scrub is already running, the new one fails. ++ +Without options, scrub is started as a background process. +Progress can be obtained with the *scrub status* command. Scrubbing +involves reading all data from all disks and verifying checksums. Errors are +corrected along the way if possible. ++ +The default IO priority of scrub is the idle class. The priority can be +configured similar to the `ionice`(1) syntax using '-c' and '-n' options. ++ +`Options` ++ +-B:::: +Do not background and print scrub statistics when finished. +-d:::: +Print separate statistics for each device of the filesystem (-B only). +-q:::: +Quiet. Omit error messages and statistics. +-r:::: +Read only mode. Do not attempt to correct anything. +-R:::: +Raw print mode. Print full data instead of summary. +-c <ioprio_class>:::: +Set IO priority class (see `ionice`(1) manpage). +-n <ioprio_classdata>:::: +Set IO priority classdata (see `ionice`(1) manpage). +-f:::: +Force starting new scrub even if a scrub is already running. +This is useful when scrub stat record file is damaged. + +*status* [-d] <path>|<device>:: +Show status of a running scrub for the filesystem identified by <path> or +for the specified <device>. ++ +If no scrub is running, show statistics of the last finished or canceled scrub +for that filesystem or device. ++ +`Options` ++ +-d:::: +Print separate statistics for each device of the filesystem. + +EXIT STATUS +----------- +*btrfs scrub* returns a zero exit status if it succeeds. Non zero is +returned in case of failure. + +AVAILABILITY +------------ +*btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`mkfs.btrfs`(8), diff --git a/Documentation/btrfs-select-super.asciidoc b/Documentation/btrfs-select-super.asciidoc new file mode 100644 index 00000000..a8d7ef00 --- /dev/null +++ b/Documentation/btrfs-select-super.asciidoc @@ -0,0 +1,29 @@ +btrfs-select-super(8) +===================== + +NAME +---- +btrfs-select-super - overwrite superblock with a backup + +SYNOPSIS +-------- +*btrfs-select-super* -s number dev + +DESCRIPTION +----------- +*btrfs-select-super* destructively overwrites all copies of the superblock +with a specified copy. This helps with certain cases of damage, especially +when barriers were disabled during a power failure. You can find a valid +copy of the superblock with *btrfs check -s*. + +The filesystem specified by `dev` must not be mounted. + +OPTIONS +------- +-s|--super <superblock>:: +use <superblock>th superblock copy, valid values are 0 up to 2 if the +respective superblock offset is within the filesystem + +SEE ALSO +-------- +`btrfsck check`(8) diff --git a/Documentation/btrfs-send.asciidoc b/Documentation/btrfs-send.asciidoc new file mode 100644 index 00000000..e05342ff --- /dev/null +++ b/Documentation/btrfs-send.asciidoc @@ -0,0 +1,63 @@ +btrfs-send(8) +============= + +NAME +---- +btrfs-send - send data of subvolume(s) to stdout/file. + +SYNOPSIS +-------- +*btrfs send* [-ve] [-p <parent>] [-c <clone-src>] [-f <outfile>] <subvol> [<subvol>...] + +DESCRIPTION +----------- +Sends the subvolume(s) specified by <subvol> to stdout. +<subvol> should be read-only here. + +By default, this will send the whole subvolume. To do an incremental +send, use '-p <parent>'. + +If you want to allow btrfs to clone from any additional local snapshots, +use '-c <clone-src>' (multiple times where applicable). + +You must not specify clone sources unless you guarantee that these snapshots +are exactly in the same state on both sides, the sender and the receiver. + +It is allowed to omit the '-p <parent>' option when '-c <clone-src>' options +are given, in which case *btrfs send* will determine a suitable parent among the +clone sources itself. + +`Options` + +-v:: +Enable verbose debug output. Each occurrence of this option increases the +verbose level more. +-e:: +If sending multiple subvols at once, use the new format and omit the <end cmd> between the subvols. +-p <parent>:: +Send an incremental stream from <parent> to <subvol>. +-c <clone-src>:: +Use this snapshot as a clone source for an incremental send (multiple allowed). +-f <outfile>:: +Output is normally written to stdout. To write to a file, use this option. +An alternative would be to use pipes. +--no-data:: +Send in NO_FILE_DATA mode. The output stream does not contain any file +data and thus cannot be used to transfer changes. This mode is faster and +useful to show the differences in metadata. + +EXIT STATUS +----------- +*btrfs send* returns a zero exit status if it succeeds. Non zero is +returned in case of failure. + +AVAILABILITY +------------ +*btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`mkfs.btrfs`(8), +`btrfs-receive`(8) diff --git a/Documentation/btrfs-show-super.asciidoc b/Documentation/btrfs-show-super.asciidoc new file mode 100644 index 00000000..8866c940 --- /dev/null +++ b/Documentation/btrfs-show-super.asciidoc @@ -0,0 +1,54 @@ +btrfs-show-super(8) +==================== + +NAME +---- +btrfs-show-super - show btrfs superblock information stored in devices + +SYNOPSIS +-------- +*btrfs-show-super* [options] <dev> [<dev>...] + +DESCRIPTION +----------- +*btrfs-show-super* is used to print the information of superblock, +you can specify which mirror to print out. + +By default, every device's first superblock will be printed out. + +Mainly used for debug purpose. + +OPTIONS +------- +-f:: +Print full superblock information. ++ +Including the system chunk array and backup roots. + +-a:: +Print information of all superblocks. ++ +If this option is given, '-i' option will be ignored. + +-i <super_mirror>:: +Specify which mirror to print out. ++ +<super_mirror> is between 0 and 2. +If several '-i <super_mirror>' are given, only the last one is valid. + +-F:: +Attempt to print the superblock even if no superblock magic is found. May end +badly. + +-s <bytenr>:: +specifiy offset to a superblock in a non-standard location at 'bytenr', useful +for debugging (disables the '-f' option) + +EXIT STATUS +----------- +*btrfs-show-super* will return 0 if no error happened. +If any problems happened, 1 will be returned. + +SEE ALSO +-------- +`mkfs.btrfs`(8) diff --git a/Documentation/btrfs-subvolume.asciidoc b/Documentation/btrfs-subvolume.asciidoc new file mode 100644 index 00000000..96cfe4ac --- /dev/null +++ b/Documentation/btrfs-subvolume.asciidoc @@ -0,0 +1,182 @@ +btrfs-subvolume(8) +================== + +NAME +---- +btrfs-subvolume - control btrfs subvolume(s) + +SYNOPSIS +-------- +*btrfs subvolume* <subcommand> [<args>] + +DESCRIPTION +----------- +*btrfs subvolume* is used to control the filesystem to create/delete/list/show +subvolumes and snapshots. + +SUBVOLUME AND SNAPSHOT +---------------------- +A subvolume in btrfs is not like an LVM logical volume, which is quite +independent from each other, a btrfs subvolume has its hierarchy and relations +between other subvolumes. + +A subvolume in btrfs can be accessed in two ways. + +1. From the parent subvolume + +When accessing from the parent subvolume, the subvolume can be used just +like a directory. It can have child subvolumes and its own files/directories. + +2. Separate mounted filesystem + +When `mount`(8) using 'subvol' or 'subvolid' mount option, one can access +files/directories/subvolumes inside it, but nothing in parent subvolumes. + +Also every btrfs filesystem has a default subvolume as its initially top-level +subvolume, whose subvolume id is 5. (0 is also acceptable as an alias.) + +A btrfs snapshot is much like a subvolume, but shares its data(and metadata) +with other subvolume/snapshot. Due to the capabilities of COW, modifications +inside a snapshot will only show in a snapshot but not in its source subvolume. + +Although in btrfs, subvolumes/snapshots are treated as directories, only +subvolume/snapshot can be the source of a snapshot, snapshot can not be made +from normal directories. + +SUBCOMMAND +----------- +*create* [-i <qgroupid>] [<dest>]<name>:: +Create a subvolume <name> in <dest>. ++ +If <dest> is not given, subvolume <name> will be created in the currently +directory. ++ +`Options` ++ +-i <qgroupid>:::: +Add the newly created subvolume to a qgroup. This option can be given multiple +times. + +*delete* [options] <subvolume> [<subvolume>...]:: +Delete the subvolume(s) from the filesystem. ++ +If <subvolume> is not a subvolume, btrfs returns an error but continues if +there are more arguments to process. ++ +The corresponding directory is removed instantly but the data blocks are +removed later. The deletion does not involve full commit by default due to +performance reasons (as a consequence, the subvolume may appear again after a +crash). Use one of the '--commit' options to wait until the operation is safely +stored on the media. ++ +`Options` ++ +-c|--commit-after:::: +wait for transaction commit at the end of the operation ++ +-C|--commit-each:::: +wait for transaction commit after delet each subvolume + +*find-new* <subvolume> <last_gen>:: +List the recently modified files in a subvolume, after <last_gen> ID. + +*get-default* <path>:: +Get the default subvolume of the filesystem <path>. ++ +The output format is similar to *subvolume list* command. + +*list* [options] [-G [\+|-]<value>] [-C [+|-]<value>] [--sort=rootid,gen,ogen,path] <path>:: +List the subvolumes present in the filesystem <path>. ++ +For every subvolume the following information is shown by default. + +ID <ID> top level <ID> path <path> + +where path is the relative path of the subvolume to the top level subvolume. +The subvolume's ID may be used by the subvolume set-default command, +or at mount time via the subvolid= option. +If `-p` is given, then parent <ID> is added to the output between ID +and top level. The parent's ID may be used at mount time via the +`subvolrootid=` option. ++ +`Options` ++ +-p:::: +print parent ID. +-a:::: +print all the subvolumes in the filesystem and distinguish between +absolute and relative path with respect to the given <path>. +-c:::: +print the ogeneration of the subvolume, aliases: ogen or origin generation. +-g:::: +print the generation of the subvolume. +-o:::: +print only subvolumes below specified <path>. +-u:::: +print the UUID of the subvolume. +-q:::: +print the parent uuid of subvolumes (and snapshots). +-R:::: +print the UUID of the sent subvolume, where the subvolume is the result of a receive operation +-t:::: +print the result as a table. +-s:::: +only snapshot subvolumes in the filesystem will be listed. +-r:::: +only readonly subvolumes in the filesystem will be listed. +-G [+|-]<value>:::: +list subvolumes in the filesystem that its generation is +>=, \<= or = value. \'\+' means >= value, \'-' means \<= value, If there is +neither \'+' nor \'-', it means = value. +-C [+|-]<value>:::: +list subvolumes in the filesystem that its ogeneration is +>=, \<= or = value. The usage is the same to '-g' option. +--sort=rootid,gen,ogen,path:::: +list subvolumes in order by specified items. +you can add \'\+' or \'-' in front of each items, \'+' means ascending, +\'-' means descending. The default is ascending. ++ +for --sort you can combine some items together by \',', just like +-sort=+ogen,-gen,path,rootid. + +*set-default* <id> <path>:: +Set the subvolume of the filesystem <path> which is mounted as +default. ++ +The subvolume is identified by <id>, which is returned by the *subvolume list* +command. + +*show* <path>:: +Show information of a given subvolume in the <path>. + +*snapshot* [-r] <source> <dest>|[<dest>/]<name>:: +Create a writable/readonly snapshot of the subvolume <source> with the +name <name> in the <dest> directory. ++ +If only <dest> is given, the subvolume will be named the basename of <source>. +If <source> is not a subvolume, btrfs returns an error. +If '-r' is given, the snapshot will be readonly. + +*sync* <path> [subvolid...]:: +Wait until given subvolume(s) are completely removed from the filesystem +after deletion. If no subvolume id is given, wait until all current deletion +requests are completed, but do not wait for subvolumes deleted meanwhile. +The status of subvolume ids is checked periodically. ++ +`Options` ++ +-s <N>:::: +sleep N seconds between checks (default: 1) + +EXIT STATUS +----------- +*btrfs subvolume* returns a zero exit status if it succeeds. A non-zero value is +returned in case of failure. + +AVAILABILITY +------------ +*btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`mkfs.btrfs`(8), +`btrfs-quota`(8), +`btrfs-qgroup`(8), diff --git a/Documentation/btrfs.asciidoc b/Documentation/btrfs.asciidoc new file mode 100644 index 00000000..abf1ff89 --- /dev/null +++ b/Documentation/btrfs.asciidoc @@ -0,0 +1,127 @@ +btrfs(8) +======== + +NAME +---- +btrfs - control a btrfs filesystem + +SYNOPSIS +-------- +*btrfs* <command> [<args>] + +DESCRIPTION +----------- +The *btrfs* utility is a toolbox for managing btrfs filesystems. There are +command groups to work with subvolumes, devices, for whole filesystem or other +specific actions. See section *COMMANDS*. + +COMMAND SYTNAX +-------------- + +Any command name can be shortened as far as it stays unambiguous, +however it is recommended to use full command names in scripts. +All command groups have their manual page named *btrfs-<group>*. + +For example: it is possible to run *btrfs sub snaps* instead of +*btrfs subvolume snapshot*. +But *btrfs file s* is not allowed, because *file s* may be interpreted +both as *filesystem show* and as *filesystem sync*. + +If the command name is ambiguous, the list of conflicting options is +printed. + +For an overview of a given command use 'btrfs command --help' +or 'btrfs [command...] --help --full' to print all available options. + +COMMANDS +-------- +*balance*:: + Balance btrfs filesystem chunks across single or several devices. + + See `btrfs-balance`(8) for details. + +*check*:: + Do off-line check on a btrfs filesystem. + + See `btrfs-check`(8) for details. + +*device*:: + Manage devices managed by btrfs, including add/delete/scan and so + on. + + See `btrfs-device`(8) for details. + +*filesystem*:: + Manage a btrfs filesystem, including label setting/sync and so on. + + See `btrfs-filesystem`(8) for details. + +*inspect-internal*:: + Debug tools for developers/hackers. + + See `btrfs-inspect-internal`(8) for details. + +*property*:: + Get/set a property from/to a btrfs object. + + See `btrfs-property`(8) for details. + +*qgroup*:: + Manage quota group(qgroup) for btrfs filesystem. + + See `btrfs-qgroup`(8) for details. + +*quota*:: + Manage quota on btrfs filesystem like enabling/rescan and etc. + + See `btrfs-quota`(8) and `btrfs-qgroup`(8) for details. + +*receive*:: + Receive subvolume data from stdin/file for restore and etc. + + See `btrfs-receive`(8) for details. + +*replace*:: + Replace btrfs devices. + + See `btrfs-replace`(8) for details. + +*rescue*:: + Try to rescue damaged btrfs filesystem. + + See `btrfs-rescue`(8) for details. + +*restore*:: + Try to restore files from a damaged btrfs filesystem. + + See `btrfs-restore`(8) for details. + +*scrub*:: + Scrub a btrfs filesystem. + + See `btrfs-scrub`(8) for details. + +*send*:: + Send subvolume data to stdout/file for backup and etc. + + See `btrfs-send`(8) for details. + +*subvolume*:: + Create/delete/list/manage btrfs subvolume. + + See `btrfs-subvolume`(8) for details. + +EXIT STATUS +----------- +*btrfs* returns a zero exit status if it succeeds. Non zero is returned in +case of failure. + +AVAILABILITY +------------ +*btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`mkfs.btrfs`(8), `ionice`(1), +`btrfs-balance`(8), +`btrfs-check`(8), +`btrfs-device`(8), +`btrfs-filesystem`(8), +`btrfs-inspect-internal`(8), +`btrfs-property`(8), +`btrfs-qgroup`(8), +`btrfs-quota`(8), +`btrfs-receive`(8), +`btrfs-replace`(8), +`btrfs-rescue`(8), +`btrfs-restore`(8), +`btrfs-scrub`(8), +`btrfs-send`(8), +`btrfs-subvolume`(8), diff --git a/Documentation/btrfstune.asciidoc b/Documentation/btrfstune.asciidoc new file mode 100644 index 00000000..f5cf15e7 --- /dev/null +++ b/Documentation/btrfstune.asciidoc @@ -0,0 +1,75 @@ +btrfstune(8) +============ + +NAME +---- +btrfstune - tune various filesystem parameters + +SYNOPSIS +-------- +*btrfstune* [options] <dev> [<dev>...] + +DESCRIPTION +----------- +*btrfstune* can be used to enable, disable or set various filesystem +parameters. The filesystem must be unmounted. + +The common usecase is to enable features that were not enabled at mkfs time. +Please make sure that you have kernel support for the features. You can find a +complete list of features and kernel version of their introduction at +https://btrfs.wiki.kernel.org/index.php/Changelog#By_feature . + +OPTIONS +------- +-S <0|1>:: +Enable seeding on a given device. Value 1 will enable seeding, 0 will disable it. + +A seeding filesystem is forced to be mounted read-only. A new device can be added +to the filesystem and will capture all writes keeping the seeding device intact. +-r:: +Enable extended inode refs (hardlink limit per file in a directory is 65536), +enabled by mkfs feature 'extref'. Since kernel 3.7. +-x:: +Enable skinny metadata extent refs (more efficient representation of extents), +enabled by mkfs feature 'skinny-metadata'. Since kernel 3.10. +-n:: +Enable no-holes feature (more efficient representation of file holes), enabled +by mkfs feature 'no-holes'. Since kernel 3.14. +-f:: +Allow dangerous changes, e.g. clear the seeding flag or change fsid. Make sure +that you are aware of the dangers. +-u:: +Change fsid to a randomly generated UUID or continue previous fsid change +operation in case it was interrupted. +-U <UUID>:: +Change fsid to 'UUID'. ++ +The 'UUID' should be a 36 bytes string in `printf`(3) format +'"%08x-%04x-%04x-%04x-%012x"'. +If there is a previous unfinished fsid change, it will continue only if the +'UUID' matches the unfinished one or if you use the option '-u'. + +WARNING: Cancelling or interrupting a UUID change operation will make the +filesystem temporarily unmountable. To fix it, rerun 'btrfstune -u' to restore +the UUID and let it complete. + +WARNING: Clearing the seeding flag on a device may be dangerous. +If a previously-seeding device is changed, all filesystems that used that +device will become unmountable. Setting the seeding flag back will not fix +that. + +A valid usecase is 'seeding device as a base image'. Clear the seeding +flag, update the filesystem and make it seeding again, provided that it's ok +to throw away all filesystems built on top of the previous base. + +EXIT STATUS +----------- +*btrfstune* returns 0 if no error happened, 1 otherwise. + +COMPATIBILITY NOTE +------------------ +This tool exists for historical reasons but is still in use today. The +functionality is about to be merged to the main tool someday and *btrfstune* +will become deprecated and removed afterwards. + +SEE ALSO +-------- +`mkfs.btrfs`(8) diff --git a/Documentation/fsck.btrfs.asciidoc b/Documentation/fsck.btrfs.asciidoc new file mode 100644 index 00000000..0bad075b --- /dev/null +++ b/Documentation/fsck.btrfs.asciidoc @@ -0,0 +1,51 @@ +fsck.btrfs(8) +============= + +NAME +---- +fsck.btrfs - do nothing, successfully + +SYNOPSIS +-------- +*fsck.btrfs* [-aApy] [<device>...] + +DESCRIPTION +----------- +*fsck.btrfs* is a type of utility that should exist for any filesystem and is +called during system setup when the corresponding `/etc/fstab` entries +contain non-zero value for `fs_passno` , see `fstab`(5) for more. + +Traditional filesystems need to run their respective fsck utility in case the +filesystem was not unmounted cleanly and the log needs to be replayed before +mount. This is not needed for BTRFS. You should set fs_passno to 0. + +If you wish to check the consistency of a BTRFS filesystem or repair a damaged +filesystem, see `btrfs-check`(8). By default the filesystem +consistency is checked, the repair mode is enabled via '--repair' option (use +with care!). + +OPTIONS +------- +The options are all the same and detect if *fsck.btrfs* is executed in +non-interactive mode and exits with success, +otherwise prints a message about btrfs check. + +EXIT STATUS +----------- +There are two possible exit code returned: + +0:: +No error + +8:: +Operational error, eg. device does not exist + +FILES +----- +`/etc/fstab` + +SEE ALSO +-------- +`btrfs`(8), +`fsck`(8), +`fstab`(5), diff --git a/Documentation/manpage-base.xsl b/Documentation/manpage-base.xsl new file mode 100644 index 00000000..a264fa61 --- /dev/null +++ b/Documentation/manpage-base.xsl @@ -0,0 +1,35 @@ +<!-- manpage-base.xsl: + special formatting for manpages rendered from asciidoc+docbook --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<!-- these params silence some output from xmlto --> +<xsl:param name="man.output.quietly" select="1"/> +<xsl:param name="refentry.meta.get.quietly" select="1"/> + +<!-- convert asciidoc callouts to man page format; + git.docbook.backslash and git.docbook.dot params + must be supplied by another XSL file or other means --> +<xsl:template match="co"> + <xsl:value-of select="concat( + $git.docbook.backslash,'fB(', + substring-after(@id,'-'),')', + $git.docbook.backslash,'fR')"/> +</xsl:template> +<xsl:template match="calloutlist"> + <xsl:value-of select="$git.docbook.dot"/> + <xsl:text>sp </xsl:text> + <xsl:apply-templates/> + <xsl:text> </xsl:text> +</xsl:template> +<xsl:template match="callout"> + <xsl:value-of select="concat( + $git.docbook.backslash,'fB', + substring-after(@arearefs,'-'), + '. ',$git.docbook.backslash,'fR')"/> + <xsl:apply-templates/> + <xsl:value-of select="$git.docbook.dot"/> + <xsl:text>br </xsl:text> +</xsl:template> + +</xsl:stylesheet> diff --git a/Documentation/manpage-bold-literal.xsl b/Documentation/manpage-bold-literal.xsl new file mode 100644 index 00000000..608eb5df --- /dev/null +++ b/Documentation/manpage-bold-literal.xsl @@ -0,0 +1,17 @@ +<!-- manpage-bold-literal.xsl: + special formatting for manpages rendered from asciidoc+docbook --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<!-- render literal text as bold (instead of plain or monospace); + this makes literal text easier to distinguish in manpages + viewed on a tty --> +<xsl:template match="literal"> + <xsl:value-of select="$git.docbook.backslash"/> + <xsl:text>fB</xsl:text> + <xsl:apply-templates/> + <xsl:value-of select="$git.docbook.backslash"/> + <xsl:text>fR</xsl:text> +</xsl:template> + +</xsl:stylesheet> diff --git a/Documentation/manpage-normal.xsl b/Documentation/manpage-normal.xsl new file mode 100644 index 00000000..a48f5b11 --- /dev/null +++ b/Documentation/manpage-normal.xsl @@ -0,0 +1,13 @@ +<!-- manpage-normal.xsl: + special settings for manpages rendered from asciidoc+docbook + handles anything we want to keep away from docbook-xsl 1.72.0 --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<xsl:import href="manpage-base.xsl"/> + +<!-- these are the normal values for the roff control characters --> +<xsl:param name="git.docbook.backslash">\</xsl:param> +<xsl:param name="git.docbook.dot" >.</xsl:param> + +</xsl:stylesheet> diff --git a/Documentation/mkfs.btrfs.asciidoc b/Documentation/mkfs.btrfs.asciidoc new file mode 100644 index 00000000..6a492658 --- /dev/null +++ b/Documentation/mkfs.btrfs.asciidoc @@ -0,0 +1,341 @@ +mkfs.btrfs(8) +============= + +NAME +---- +mkfs.btrfs - create a btrfs filesystem + +SYNOPSIS +-------- +*mkfs.btrfs* +$$[-A|--alloc-start <alloc-start>]$$ +$$[-b|--byte-count <byte-count>]$$ +$$[-d|--data <data-profile>]$$ +$$[-m|--metadata <metadata profile>]$$ +$$[-M|--mixed]$$ +$$[-l|--leafsize <leafsize>]$$ +$$[-n|--nodesize <nodesize>]$$ +$$[-s|--sectorsize <sectorsize>]$$ +$$[-L|--label <label>]$$ +$$[-K|--nodiscard]$$ +$$[-r|--rootdir <rootdir>]$$ +$$[-O|--features <feature1>[,<feature2>...]]$$ +$$[-U|--uuid <UUID>]$$ +$$[-f|--force]$$ +$$[-q|--quiet]$$ +$$[--help]$$ +$$[-V|--version]$$ +$$<device> [<device>...]$$ + +DESCRIPTION +----------- +*mkfs.btrfs* is used to create the btrfs filesystem on a single or multiple +devices. <device> is typically a block device but can be a file-backed image +as well. Multiple devices are grouped by UUID of the filesystem. + +Before mounting such filesystem, the kernel module must know all the devices +either via preceding execution of *btrfs device scan* or using the *device* +mount option. See section *MULTIPLE DEVICES* for more details. + +OPTIONS +------- +*-A|--alloc-start <offset>*:: +(An option to help debugging chunk allocator.) +Specify the (physical) offset from the start of the device at which allocations +start. The default value is zero. + +*-b|--byte-count <size>*:: +Specify the size of the filesystem. If this option is not used, +mkfs.btrfs uses the entire device space for the filesystem. + +*-d|--data <profile>*:: +Specify the profile for the data block groups. Valid values are 'raid0', +'raid1', 'raid5', 'raid6', 'raid10' or 'single' or dup (case does not matter). ++ +See 'DUP PROFILES ON A SINGLE DEVICE' for more. + +*-m|--metadata <profile>*:: +Specify the profile for the metadata block groups. +Valid values are 'raid0', 'raid1', 'raid5', 'raid6', 'raid10', 'single' or +'dup', (case does not matter). ++ +A single device filesystem will default to 'DUP', unless a SSD is detected. Then +it will default to 'single'. The detection is based on the value of +`/sys/block/DEV/queue/rotational`, where 'DEV' is the short name of the device. ++ +Note that the rotational status can be arbitrarily set by the underlying block +device driver and may not reflect the true status (network block device, memory-backed +SCSI devices etc). Use the options '--data/--metadata' to avoid confusion. ++ +See 'DUP PROFILES ON A SINGLE DEVICE' for more details. + +*-M|--mixed*:: +Normally the data and metadata block groups are isolated. The 'mixed' mode +will remove the isolation and store both types in the same block group type. +This helps to utilize the free space regardless of the purpose and is suitable +for small devices. The separate allocation of block groups leads to a situation +where the space is reserved for the other block group type, is not available for +allocation and can lead to ENOSPC state. ++ +The recommended size for the mixed mode is for filesystems less than 1GiB. The +soft recommendation is to use it for filesystems smaller than 5GiB. The mixed +mode may lead to degraded performance on larger filesystems, but is otherwise +usable, even on multiple devices. ++ +The 'nodesize' and 'sectorsize' must be equal, and the block group types must +match. ++ +NOTE: versions up to 4.2.x forced the mixed mode for devices smaller than 1GiB. +This has been removed in 4.3+ as it caused some usability issues. + +*-l|--leafsize <size>*:: +Alias for --nodesize. Deprecated. + +*-n|--nodesize <size>*:: +Specify the nodesize, the tree block size in which btrfs stores metadata. The +default value is 16KiB (16384) or the page size, whichever is bigger. Must be a +multiple of the sectorsize, but not larger than 64KiB (65536). Leafsize always +equals nodesize and the options are aliases. ++ +Smaller node size increases fragmentation but lead to higher b-trees which in +turn leads to lower locking contention. Higher node sizes give better packing +and less fragmentation at the cost of more expensive memory operations while +updating the metadata blocks. ++ +NOTE: versions up to 3.11 set the nodesize to 4k. + +*-s|--sectorsize <size>*:: +Specify the sectorsize, the minimum data block allocation unit. ++ +The default value is the page size and is autodetected. If the sectorsize +differs from the page size, the created filesystem may not be mountable by the +kernel. Therefore it is not recommended to use this option unless you are going +to mount it on a system with the appropriate page size. + +*-L|--label <string>*:: +Specify a label for the filesystem. The 'string' should be less than 256 +bytes and must not contain newline characters. + +*-K|--nodiscard*:: +Do not perform whole device TRIM operation on devices that are capable of that. + +*-r|--rootdir <rootdir>*:: +Populate the toplevel subvolume with files from 'rootdir'. This does not +require root permissions and does not mount the filesystem. + +*-O|--features <feature1>[,<feature2>...]*:: +A list of filesystem features turned on at mkfs time. Not all features are +supported by old kernels. To disable a feature, prefix it with '^'. ++ +See section *FILESYSTEM FEATURES* for more details. To see all available +features that mkfs.btrfs supports run: ++ ++mkfs.btrfs -O list-all+ + +*-f|--force*:: +Forcibly overwrite the block devices when an existing filesystem is detected. +By default, mkfs.btrfs will utilize 'libblkid' to check for any known +filesystem on the devices. Alternatively you can use the `wipefs` utility +to clear the devices. + +*-q|--quiet*:: +Print only error or warning messages. Options --features or --help are unaffected. + +*-U|--uuid <UUID>*:: +Create the filesystem with the given 'UUID'. The UUID must not exist on any +filesystem currently present. + +*-V|--version*:: +Print the *mkfs.btrfs* version and exit. + +*--help*:: +Print help. + +SIZE UNITS +---------- +The default unit is 'byte'. All size parameters accept suffixes in the 1024 +base. The recognized suffixes are: 'k', 'm', 'g', 't', 'p', 'e', both uppercase +and lowercase. + +MULTIPLE DEVICES +---------------- + +Before mounting a multiple device filesystem, the kernel module must know the +association of the block devices that are attached to the filesystem UUID. + +There is typically no action needed from the user. On a system that utilizes a +udev-like daemon, any new block device is automatically registered. The rules +call *btrfs device scan*. + +The same command can be used to trigger the device scanning if the btrfs kernel +module is reloaded (naturally all previous information about the device +registration is lost). + +Another possibility is to use the mount options *device* to specify the list of +devices to scan at the time of mount. + + # mount -o device=/dev/sdb,device=/dev/sdc /dev/sda /mnt + +NOTE: that this means only scanning, if the devices do not exist in the system, +mount will fail anyway. This can happen on systems without initramfs/initrd and +root partition created with RAID1/10/5/6 profiles. The mount action can happen +before all block devices are discovered. The waiting is usually done on the +initramfs/initrd systems. + +FILESYSTEM FEATURES +------------------- + +*mixed-bg*:: +mixed data and metadata block groups, also set by option '--mixed' + +*extref*:: +(default since btrfs-progs 3.12, kernel support since 3.7) ++ +increased hardlink limit per file in a directory to 65536, older kernels +supported a varying number of hardlinks depending on the sum of all file name +sizes that can be stored into one metadata block + +*raid56*:: +extended format for RAID5/6, also enabled if raid5 or raid6 block groups +are selected + +*skinny-metadata*:: +(default since btrfs-progs 3.18, kernel support since 3.10) ++ +reduced-size metadata for extent references, saves a few percent of metadata + +*no-holes*:: +improved representation of file extents where holes are not explicitly +stored as an extent, saves a few percent of metadata if sparse files are used + +BLOCK GROUPS, CHUNKS, RAID +-------------------------- + +The highlevel organizational units of a filesystem are block groups of three types: +data, metadata and system. + +*DATA*:: +store data blocks and nothing else + +*METADATA*:: +store internal metadata in b-trees, can store file data if they fit into the +inline limit + +*SYSTEM*:: +store structures that describe the mapping between the physical devices and the +linear logical space representing the filesystem + +Other terms commonly used: + +*block group*:: +*chunk*:: +a logical range of space of a given profile, stores data, metadata or both; +sometimes the terms are used interchangably ++ +A typical size of metadata block group is 256MiB (filesystem smaller than +50GiB) and 1GiB (larger than 50GiB), for data it's 1GiB. The system block group +size is a few megabytes. + +*RAID*:: +a block group profile type that utilizes RAID-like features on multiple +devices: striping, mirroring, parity + +*profile*:: +when used in connection with block groups refers to the allocation strategy +and constraints, see the section 'PROFILES' for more details + +PROFILES +-------- + +There are the following block group types available: + +[ cols="^,^,^,^,^",width="60%" ] +|============================================================= +.2+^.<h| Profile 3+^.^h| Redundancy .2+^.<h| Min/max devices + ^.^h| Copies ^.^h| Parity ^.<h| Striping +| single | 1 | | | 1/any +| DUP | 2 / 1 device | | | 1/1 ^(see note)^ +| RAID0 | | | 1 to N | 2/any +| RAID1 | 2 | | | 2/any +| RAID10 | 2 | | 1 to N | 4/any +| RAID5 | 1 | 1 | 2 to N - 1 | 2/any +| RAID6 | 1 | 2 | 3 to N - 2 | 3/any +|============================================================= + +'Note:' DUP may exist on more than 1 device if it starts on a single device and +another one is added, but *mkfs.btrfs* will not let you create DUP on multiple +devices. + +DUP PROFILES ON A SINGLE DEVICE +------------------------------- + +The mkfs utility will let the user create a filesystem with profiles that write +the logical blocks to 2 physical locations. Whether there are really 2 +physical copies highly depends on the underlying device type. + +For example, a SSD drive can remap the blocks internally to a single copy thus +deduplicating them. This negates the purpose of increased redunancy and just +wastes space. + +The duplicated data/metadata may still be useful to statistically improve the +chances on a device that might perform some internal optimizations. The actual +details are not usually disclosed by vendors. As another example, the widely +used USB flash or SD cards use a translation layer. The data lifetime may +be affected by frequent plugging. The memory cells could get damaged, hopefully +not destroying both copies of particular data. + +The traditional rotational hard drives usually fail at the sector level. + +In any case, a device that starts to misbehave and repairs from the DUP copy +should be replaced! *DUP is not backup*. + +KNOWN ISSUES +------------ + +**SMALL FILESYSTEMS AND LARGE NODESIZE** + +The combination of small filesystem size and large nodesize is not recommended +in general and can lead to various ENOSPC-related issues during mount time or runtime. + +Since mixed block group creation is optional, we allow small +filesystem instances with differing values for 'sectorsize' and 'nodesize' +to be created and could end up in the following situation: + + # mkfs.btrfs -f -n 65536 /dev/loop0 + btrfs-progs v3.19-rc2-405-g976307c + See http://btrfs.wiki.kernel.org for more information. + + Performing full device TRIM (512.00MiB) ... + Label: (null) + UUID: 49fab72e-0c8b-466b-a3ca-d1bfe56475f0 + Node size: 65536 + Sector size: 4096 + Filesystem size: 512.00MiB + Block group profiles: + Data: single 8.00MiB + Metadata: DUP 40.00MiB + System: DUP 12.00MiB + SSD detected: no + Incompat features: extref, skinny-metadata + Number of devices: 1 + Devices: + ID SIZE PATH + 1 512.00MiB /dev/loop0 + + # mount /dev/loop0 /mnt/ + mount: mount /dev/loop0 on /mnt failed: No space left on device + +The ENOSPC occurs during the creation of the UUID tree. This is caused +by large metadata blocks and space reservation strategy that allocates more +than can fit into the filesystem. + + +AVAILABILITY +------------ +*mkfs.btrfs* is part of btrfs-progs. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. + +SEE ALSO +-------- +`btrfs`(8), `wipefs`(8) diff --git a/INSTALL b/INSTALL new file mode 100644 index 00000000..85a839f1 --- /dev/null +++ b/INSTALL @@ -0,0 +1,79 @@ +Installation instructions +========================= + +The Btrfs utility programs require the following libraries/tools to build: + +- libuuid - provided by util-linux, e2fsprogs/e2fslibs or libuuid +- libblkid - block device id library +- liblzo2 - LZO data compression library +- zlib - ZLIB data compression library + +For the btrfs-convert utility: + +- e2fsprogs - ext2/ext3/ext4 file system libraries, or called e2fslibs + +Generating documentation: + +- asciidoc - text document format tool +- xmlto - text document format tool + +XATTR library should be provided by the standard C library or by + +- libattr - extended attribute library + +Please note that the package names may differ according to the distribution. +See https://btrfs.wiki.kernel.org/index.php/Btrfs_source_repositories#Dependencies . + + +Building from sources +--------------------- + +To build from git sources you need to generate the configure script using the +autotools: + + $ ./autogen.sh + +To build from the released tarballs: + + $ ./configure + $ make + $ make install + +You may disable building some parts like documentation, btrfs-convert or +backtrace support. See ./configure --help for more. + +Specific CFLAGS or LDFLAGS should be set like + + $ CFLAGS=... LDFLAGS=... ./configure --prefix=/usr + +and not as arguments to make. You can specify additional flags to build via +variables EXTRA_CFLAGS and EXTRA_LDFLAGS that get appended to the predefined +values of the respective variables. + + $ make EXTRA_CFLAGS=-ggdb3 + +The build utilizes autotools, dependencies for generating the configure +scripts are: + +* autconf, autoheader +* automake, aclocal +* pkg-config + + +Staticly built binaries +----------------------- + +The makefiles are ready to let you build static binaries of the utilities. This +may be handy in rescue environments. Your system has to provide static version +of the libraries. + +$ make static +$ make btrfs.static +$ make btrfs-convert.static + +The resulting static binaries have the '.static' suffix, the intermediate object +files do not conflict with the normal (dynamic) build. + + +References: +* https://btrfs.wiki.kernel.org diff --git a/Makefile.extrawarn b/Makefile.extrawarn new file mode 100644 index 00000000..1f4bda94 --- /dev/null +++ b/Makefile.extrawarn @@ -0,0 +1,90 @@ +# From linux.git/scripts/Kbuild.include +# +# try-run +# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise) +# Exit code chooses option. "$$TMP" is can be used as temporary file and +# is automatically cleaned up. +try-run = $(shell set -e; \ + TMP="$(TMPOUT).$$$$.tmp"; \ + TMPO="$(TMPOUT).$$$$.o"; \ + if ($(1)) >/dev/null 2>&1; \ + then echo "$(2)"; \ + else echo "$(3)"; \ + fi; \ + rm -f "$$TMP" "$$TMPO") + + # cc-option + # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586) + + cc-option = $(call try-run,\ + $(CC) $(CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) + +# From linux.git/scripts/Makefile.extrawarn +# ========================================================================== +# +# make W=... settings +# +# W=1 - warnings that may be relevant and does not occur too often +# W=2 - warnings that occur quite often but may still be relevant +# W=3 - the more obscure warnings, can most likely be ignored +# +# $(call cc-option, -W...) handles gcc -W.. options which +# are not supported by all versions of the compiler +# ========================================================================== + +ifeq ("$(origin W)", "command line") + export BUILD_ENABLE_EXTRA_GCC_CHECKS := $(W) +endif + +ifdef BUILD_ENABLE_EXTRA_GCC_CHECKS +warning- := $(empty) + +warning-1 := -Wextra -Wunused -Wno-unused-parameter +warning-1 += -Wmissing-declarations +warning-1 += -Wmissing-format-attribute +warning-1 += $(call cc-option, -Wmissing-prototypes) +warning-1 += -Wold-style-definition +warning-1 += $(call cc-option, -Wmissing-include-dirs) +warning-1 += $(call cc-option, -Wunused-but-set-variable) +warning-1 += $(call cc-disable-warning, missing-field-initializers) + +warning-2 := -Waggregate-return +warning-2 += -Wcast-align +warning-2 += -Wdisabled-optimization +warning-2 += -Wnested-externs +warning-2 += -Wshadow +warning-2 += $(call cc-option, -Wlogical-op) +warning-2 += $(call cc-option, -Wmissing-field-initializers) + +warning-3 := -Wbad-function-cast +warning-3 += -Wcast-qual +warning-3 += -Wconversion +warning-3 += -Wpacked +warning-3 += -Wpadded +warning-3 += -Wpointer-arith +warning-3 += -Wredundant-decls +warning-3 += -Wswitch-default +warning-3 += $(call cc-option, -Wpacked-bitfield-compat) +warning-3 += $(call cc-option, -Wvla) + +warning := $(warning-$(findstring 1, $(BUILD_ENABLE_EXTRA_GCC_CHECKS))) +warning += $(warning-$(findstring 2, $(BUILD_ENABLE_EXTRA_GCC_CHECKS))) +warning += $(warning-$(findstring 3, $(BUILD_ENABLE_EXTRA_GCC_CHECKS))) + +ifeq ("$(strip $(warning))","") + $(error W=$(BUILD_ENABLE_EXTRA_GCC_CHECKS) is unknown) +endif + +EXTRAWARN_CFLAGS += $(warning) +else + +ifeq ($(COMPILER),clang) +EXTRAWARN_CFLAGS += $(call cc-disable-warning, initializer-overrides) +EXTRAWARN_CFLAGS += $(call cc-disable-warning, unused-value) +EXTRAWARN_CFLAGS += $(call cc-disable-warning, format) +EXTRAWARN_CFLAGS += $(call cc-disable-warning, unknown-warning-option) +EXTRAWARN_CFLAGS += $(call cc-disable-warning, sign-compare) +EXTRAWARN_CFLAGS += $(call cc-disable-warning, format-zero-length) +EXTRAWARN_CFLAGS += $(call cc-disable-warning, uninitialized) +endif +endif diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 00000000..91847896 --- /dev/null +++ b/Makefile.in @@ -0,0 +1,399 @@ +# btrfs-progs +# +# Basic build targets: +# all all main tools +# static build static bnaries, requires static version of the libraries +# test run the full testsuite +# install install to default location (/usr/local) +# clean clean built binaries (not the documentation) +# +# Tuning by variables (environment or make arguments): +# V=1 verbose, print command lines (default: quiet) +# C=1 run checker before compilation (default checker: sparse) +# W=123 build with warnings (default: off) +# EXTRA_CFLAGS additional compiler flags +# EXTRA_LDFLAGS additional linker flags +# +# Static checkers: +# CHECKER static checker binary to be called (default: sparse) +# CHECKER_FLAGS flags to pass to CHECKER, can override CFLAGS +# + +# Export all variables to sub-makes by default +export + +include Makefile.extrawarn + +CC = @CC@ +LN_S = @LN_S@ +AR = @AR@ +RM = @RM@ +RMDIR = @RMDIR@ +INSTALL = @INSTALL@ +DISABLE_DOCUMENTATION = @DISABLE_DOCUMENTATION@ +DISABLE_BTRFSCONVERT = @DISABLE_BTRFSCONVERT@ + +EXTRA_CFLAGS := +EXTRA_LDFLAGS := + +# Common build flags +CFLAGS = @CFLAGS@ \ + -include config.h \ + -DBTRFS_FLAT_INCLUDES \ + -D_XOPEN_SOURCE=700 \ + -fno-strict-aliasing \ + -fPIC $(EXTRAWARN_CFLAGS) $(EXTRA_CFLAGS) + +LDFLAGS = @LDFLAGS@ \ + -rdynamic $(EXTRA_LDFLAGS) + +LIBS = @UUID_LIBS@ @BLKID_LIBS@ @ZLIB_LIBS@ @LZO2_LIBS@ -L. -pthread +LIBBTRFS_LIBS = $(LIBS) + +# Static compilation flags +STATIC_CFLAGS = $(CFLAGS) -ffunction-sections -fdata-sections +STATIC_LDFLAGS = -static -Wl,--gc-sections +STATIC_LIBS = @UUID_LIBS_STATIC@ @BLKID_LIBS_STATIC@ \ + @ZLIB_LIBS_STATIC@ @LZO2_LIBS_STATIC@ -L. -pthread + +# don't use FORTIFY with sparse because glibc with FORTIFY can +# generate so many sparse errors that sparse stops parsing, +# which masks real errors that we want to see. +CHECKER := sparse +check_defs := .cc-defines.h +CHECKER_FLAGS := -include $(check_defs) -D__CHECKER__ \ + -D__CHECK_ENDIAN__ -Wbitwise -Wuninitialized -Wshadow -Wundef \ + -U_FORTIFY_SOURCE + +objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ + root-tree.o dir-item.o file-item.o inode-item.o inode-map.o \ + extent-cache.o extent_io.o volumes.o utils.o repair.o \ + qgroup.o raid6.o free-space-cache.o list_sort.o props.o \ + ulist.o qgroup-verify.o backref.o string-table.o task-utils.o \ + inode.o file.o find-root.o free-space-tree.o help.o +cmds_objects = cmds-subvolume.o cmds-filesystem.o cmds-device.o cmds-scrub.o \ + cmds-inspect.o cmds-balance.o cmds-send.o cmds-receive.o \ + cmds-quota.o cmds-qgroup.o cmds-replace.o cmds-check.o \ + cmds-restore.o cmds-rescue.o chunk-recover.o super-recover.o \ + cmds-property.o cmds-fi-usage.o +libbtrfs_objects = send-stream.o send-utils.o rbtree.o btrfs-list.o crc32c.o \ + uuid-tree.o utils-lib.o rbtree-utils.o +libbtrfs_headers = send-stream.h send-utils.h send.h rbtree.h btrfs-list.h \ + crc32c.h list.h kerncompat.h radix-tree.h extent-cache.h \ + extent_io.h ioctl.h ctree.h btrfsck.h version.h +TESTS = fsck-tests.sh convert-tests.sh + +prefix ?= @prefix@ +exec_prefix = @exec_prefix@ +bindir = @bindir@ +libdir ?= @libdir@ +incdir = @includedir@/btrfs + +ifeq ("$(origin V)", "command line") + BUILD_VERBOSE = $(V) +endif +ifndef BUILD_VERBOSE + BUILD_VERBOSE = 0 +endif + +ifeq ($(BUILD_VERBOSE),1) + Q = +else + Q = @ +endif + +MAKEOPTS = --no-print-directory Q=$(Q) + +# build all by default +progs = $(progs_install) btrfsck btrfs-corrupt-block btrfs-calc-size + +# install only selected +progs_install = btrfs mkfs.btrfs btrfs-debug-tree \ + btrfs-map-logical btrfs-image btrfs-zero-log \ + btrfs-find-root btrfstune btrfs-show-super \ + btrfs-select-super + +progs_extra = btrfs-fragments + +progs_static = $(foreach p,$(progs),$(p).static) + +ifneq ($(DISABLE_BTRFSCONVERT),1) +progs_install += btrfs-convert +endif + +# external libs required by various binaries; for btrfs-foo, +# specify btrfs_foo_libs = <list of libs>; see $($(subst...)) rules below +btrfs_convert_libs = @EXT2FS_LIBS@ @COM_ERR_LIBS@ +btrfs_fragments_libs = -lgd -lpng -ljpeg -lfreetype + +SUBDIRS = +BUILDDIRS = $(patsubst %,build-%,$(SUBDIRS)) +INSTALLDIRS = $(patsubst %,install-%,$(SUBDIRS)) +CLEANDIRS = $(patsubst %,clean-%,$(SUBDIRS)) + +ifneq ($(DISABLE_DOCUMENTATION),1) +BUILDDIRS += build-Documentation +INSTALLDIRS += install-Documentation +endif + +.PHONY: $(SUBDIRS) +.PHONY: $(BUILDDIRS) +.PHONY: $(INSTALLDIRS) +.PHONY: $(TESTDIRS) +.PHONY: $(CLEANDIRS) +.PHONY: all install clean + +# Create all the static targets +static_objects = $(patsubst %.o, %.static.o, $(objects)) +static_cmds_objects = $(patsubst %.o, %.static.o, $(cmds_objects)) +static_libbtrfs_objects = $(patsubst %.o, %.static.o, $(libbtrfs_objects)) + +libs_shared = libbtrfs.so.0.1 +libs_static = libbtrfs.a +libs = $(libs_shared) $(libs_static) +lib_links = libbtrfs.so.0 libbtrfs.so +headers = $(libbtrfs_headers) + +# make C=1 to enable sparse +ifdef C + # We're trying to use sparse against glibc headers which go wild + # trying to use internal compiler macros to test features. We + # copy gcc's and give them to sparse. But not __SIZE_TYPE__ + # 'cause sparse defines that one. + # + dummy := $(shell $(CC) -dM -E -x c - < /dev/null | \ + grep -v __SIZE_TYPE__ > $(check_defs)) + check = $(CHECKER) + check_echo = echo +else + check = true + check_echo = true +endif + +%.o.d: %.c + $(Q)$(CC) -MM -MG -MF $@ -MT $(@:.o.d=.o) -MT $(@:.o.d=.static.o) -MT $@ $(CFLAGS) $< + +.c.o: + @$(check_echo) " [SP] $<" + $(Q)$(check) $(CFLAGS) $(CHECKER_FLAGS) $< + @echo " [CC] $@" + $(Q)$(CC) $(CFLAGS) -c $< + +%.static.o: %.c + @echo " [CC] $@" + $(Q)$(CC) $(STATIC_CFLAGS) -c $< -o $@ + +all: $(progs) $(BUILDDIRS) +$(SUBDIRS): $(BUILDDIRS) +$(BUILDDIRS): + @echo "Making all in $(patsubst build-%,%,$@)" + $(Q)$(MAKE) $(MAKEOPTS) -C $(patsubst build-%,%,$@) + +test-convert: btrfs btrfs-convert + @echo " [TEST] convert-tests.sh" + $(Q)bash tests/convert-tests.sh + +test-fsck: btrfs btrfs-image btrfs-corrupt-block btrfs-debug-tree mkfs.btrfs + @echo " [TEST] fsck-tests.sh" + $(Q)bash tests/fsck-tests.sh + +test-misc: btrfs btrfs-image btrfs-corrupt-block btrfs-debug-tree mkfs.btrfs btrfstune + @echo " [TEST] misc-tests.sh" + $(Q)bash tests/misc-tests.sh + +test-mkfs: btrfs mkfs.btrfs + @echo " [TEST] mkfs-tests.sh" + $(Q)bash tests/mkfs-tests.sh + +test-fuzz: btrfs + @echo " [TEST] fuzz-tests.sh" + $(Q)bash tests/fuzz-tests.sh + +test-clean: + @echo "Cleaning tests" + $(Q)bash tests/clean-tests.sh + +test: test-fsck test-mkfs test-convert test-misc test-fuzz + +# +# NOTE: For static compiles, you need to have all the required libs +# static equivalent available +# +static: $(progs_static) + +version.h: version.sh version.h.in configure.ac + @echo " [SH] $@" + $(Q)bash ./config.status --silent $@ + +$(libs_shared): $(libbtrfs_objects) $(lib_links) send.h + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) $(libbtrfs_objects) $(LDFLAGS) $(LIBBTRFS_LIBS) \ + -shared -Wl,-soname,libbtrfs.so.0 -o libbtrfs.so.0.1 + +$(libs_static): $(libbtrfs_objects) + @echo " [AR] $@" + $(Q)$(AR) cr libbtrfs.a $(libbtrfs_objects) + +$(lib_links): + @echo " [LN] $@" + $(Q)$(LN_S) -f libbtrfs.so.0.1 $@ + +# keep intermediate files from the below implicit rules around +.PRECIOUS: $(addsuffix .o,$(progs)) + +# Make any btrfs-foo out of btrfs-foo.o, with appropriate libs. +# The $($(subst...)) bits below takes the btrfs_*_libs definitions above and +# turns them into a list of libraries to link against if they exist +# +# For static variants, use an extra $(subst) to get rid of the ".static" +# from the target name before translating to list of libs + +btrfs-%.static: $(static_objects) btrfs-%.static.o $(static_libbtrfs_objects) + @echo " [LD] $@" + $(Q)$(CC) $(STATIC_CFLAGS) -o $@ $@.o $(static_objects) \ + $(static_libbtrfs_objects) $(STATIC_LDFLAGS) \ + $($(subst -,_,$(subst .static,,$@)-libs)) $(STATIC_LIBS) + +btrfs-%: $(objects) $(libs_static) btrfs-%.o + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o $@ $(objects) $@.o $(libs_static) \ + $(LDFLAGS) $(LIBS) $($(subst -,_,$@-libs)) + +btrfs: $(objects) btrfs.o $(cmds_objects) $(libs_static) + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o btrfs btrfs.o $(cmds_objects) \ + $(objects) $(libs_static) $(LDFLAGS) $(LIBS) + +btrfs.static: $(static_objects) btrfs.static.o $(static_cmds_objects) $(static_libbtrfs_objects) + @echo " [LD] $@" + $(Q)$(CC) $(STATIC_CFLAGS) -o btrfs.static btrfs.static.o $(static_cmds_objects) \ + $(static_objects) $(static_libbtrfs_objects) $(STATIC_LDFLAGS) $(STATIC_LIBS) + +# For backward compatibility, 'btrfs' changes behaviour to fsck if it's named 'btrfsck' +btrfsck: btrfs + @echo " [LN] $@" + $(Q)$(LN_S) -f btrfs btrfsck + +btrfsck.static: btrfs.static + @echo " [LN] $@" + $(Q)$(LN_S) -f $^ $@ + +mkfs.btrfs: $(objects) $(libs_static) mkfs.o + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o mkfs.btrfs $(objects) $(libs_static) mkfs.o $(LDFLAGS) $(LIBS) + +mkfs.btrfs.static: $(static_objects) mkfs.static.o $(static_libbtrfs_objects) + @echo " [LD] $@" + $(Q)$(CC) $(STATIC_CFLAGS) -o mkfs.btrfs.static mkfs.static.o $(static_objects) \ + $(static_libbtrfs_objects) $(STATIC_LDFLAGS) $(STATIC_LIBS) + +btrfstune: $(objects) $(libs_static) btrfstune.o + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o btrfstune $(objects) btrfstune.o $(libs_static) $(LDFLAGS) $(LIBS) + +btrfstune.static: $(static_objects) btrfstune.static.o $(static_libbtrfs_objects) + @echo " [LD] $@" + $(Q)$(CC) $(STATIC_CFLAGS) -o $@ btrfstune.static.o $(static_objects) \ + $(static_libbtrfs_objects) $(STATIC_LDFLAGS) $(STATIC_LIBS) + +dir-test: $(objects) $(libs) dir-test.o + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o dir-test $(objects) $(libs) dir-test.o $(LDFLAGS) $(LIBS) + +quick-test: $(objects) $(libs) quick-test.o + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o quick-test $(objects) $(libs) quick-test.o $(LDFLAGS) $(LIBS) + +ioctl-test: $(objects) $(libs) ioctl-test.o + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o ioctl-test $(objects) $(libs) ioctl-test.o $(LDFLAGS) $(LIBS) + +send-test: $(objects) $(libs) send-test.o + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o send-test $(objects) $(libs) send-test.o $(LDFLAGS) $(LIBS) + +library-test: $(libs_shared) library-test.o + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o library-test library-test.o $(LDFLAGS) -lbtrfs + +library-test.static: $(libs_static) library-test.o + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o library-test-static library-test.o $(LDFLAGS) $(libs_static) + +test-build: test-build-pre test-build-real + +test-build-pre: + $(MAKE) clean-all + ./autogen.sh + ./configure + +test-build-real: + $(MAKE) library-test + -$(MAKE) library-test.static + $(MAKE) -j 8 all + -$(MAKE) -j 8 static + $(MAKE) -j 8 $(progs_extra) + +manpages: + $(Q)$(MAKE) $(MAKEOPTS) -C Documentation + + +clean-all: clean clean-doc clean-gen + +clean: $(CLEANDIRS) + @echo "Cleaning" + $(Q)$(RM) -f $(progs) cscope.out *.o *.o.d \ + dir-test ioctl-test quick-test send-test library-test library-test-static \ + btrfs.static mkfs.btrfs.static \ + $(check_defs) \ + $(libs) $(lib_links) \ + $(progs_static) $(progs_extra) + +clean-doc: + @echo "Cleaning Documentation" + $(Q)$(MAKE) $(MAKEOPTS) -C Documentation clean + +clean-gen: + @echo "Cleaning Generated Files" + $(Q)$(RM) -rf version.h config.status config.cache connfig.log \ + configure.lineno config.status.lineno Makefile \ + Documentation/Makefile \ + config.log config.h config.h.in~ aclocal.m4 \ + configure autom4te.cache/ config/ + +$(CLEANDIRS): + @echo "Cleaning $(patsubst clean-%,%,$@)" + $(Q)$(MAKE) $(MAKEOPTS) -C $(patsubst clean-%,%,$@) clean + +install: $(libs) $(progs_install) $(INSTALLDIRS) + $(INSTALL) -m755 -d $(DESTDIR)$(bindir) + $(INSTALL) $(progs_install) $(DESTDIR)$(bindir) + $(INSTALL) fsck.btrfs $(DESTDIR)$(bindir) + # btrfsck is a link to btrfs in the src tree, make it so for installed file as well + $(LN_S) -f btrfs $(DESTDIR)$(bindir)/btrfsck + $(INSTALL) -m755 -d $(DESTDIR)$(libdir) + $(INSTALL) $(libs) $(DESTDIR)$(libdir) + cp -a $(lib_links) $(DESTDIR)$(libdir) + $(INSTALL) -m755 -d $(DESTDIR)$(incdir) + $(INSTALL) -m644 $(headers) $(DESTDIR)$(incdir) + +install-static: $(progs_static) $(INSTALLDIRS) + $(INSTALL) -m755 -d $(DESTDIR)$(bindir) + $(INSTALL) $(progs_static) $(DESTDIR)$(bindir) + # btrfsck is a link to btrfs in the src tree, make it so for installed file as well + $(LN_S) -f btrfs.static $(DESTDIR)$(bindir)/btrfsck.static + +$(INSTALLDIRS): + @echo "Making install in $(patsubst install-%,%,$@)" + $(Q)$(MAKE) $(MAKEOPTS) -C $(patsubst install-%,%,$@) install + +uninstall: + $(Q)$(MAKE) $(MAKEOPTS) -C Documentation uninstall + cd $(DESTDIR)$(incdir); $(RM) -f $(headers) + $(RMDIR) -p --ignore-fail-on-non-empty $(DESTDIR)$(incdir) + cd $(DESTDIR)$(libdir); $(RM) -f $(lib_links) $(libs) + cd $(DESTDIR)$(bindir); $(RM) -f btrfsck fsck.btrfs $(progs_install) + +ifneq ($(MAKECMDGOALS),clean) +-include $(objects:.o=.o.d) $(cmds_objects:.o=.o.d) $(subst .btrfs,, $(filter-out btrfsck.o.d, $(progs:=.o.d))) +endif diff --git a/README.md b/README.md new file mode 100644 index 00000000..2f9d4e7e --- /dev/null +++ b/README.md @@ -0,0 +1,41 @@ +Btrfs-progs +=========== + +Userspace utilities to manage btrfs filesystems. +License: GPLv2. + +Btrfs is a copy on write (COW) filesystem for Linux aimed at implementing +advanced features while focusing on fault tolerance, repair and easy +administration. + + +This repository hosts following utilities: + +* **btrfs** — the main administration tool ([manual page](https://btrfs.wiki.kernel.org/index.php/Manpage/btrfs)) +* **mkfs.btrfs** — utility to create the filesystem ([manual page](https://btrfs.wiki.kernel.org/index.php/Manpage/mkfs.btrfs)) + +See INSTALL for build instructions. + +Release cycle +------------- + +The major version releases are time-based and follow the cycle of the linux +kernel releases. The cycle usually takes 2 months. A minor version releases may +happen in the meantime if there are queued bug fixes or minor useful +improvements. + +Development +----------- + +The patch submissions, development or general discussions take place at +*linux-btrfs@vger.kernel.org* mailinglist, subsciption not required. + +References +---------- + +* [Wiki with more information](https://btrfs.wiki.kernel.org) +* [Btrfs-progs changelogs](https://btrfs.wiki.kernel.org/index.php/Changelog#By_version_.28btrfs-progs.29) +* [wiki/FAQ](https://btrfs.wiki.kernel.org/index.php/FAQ) +* [wiki/Getting started](https://btrfs.wiki.kernel.org/index.php/Getting_started) +* [wiki/TODO](https://btrfs.wiki.kernel.org/index.php/Project_ideas#Userspace_tools_projects) +* [wiki/Developer's FAQ](https://btrfs.wiki.kernel.org/index.php/Developer's_FAQ) diff --git a/androidcompat.h b/androidcompat.h new file mode 100644 index 00000000..eec76dad --- /dev/null +++ b/androidcompat.h @@ -0,0 +1,28 @@ +/* + * Compatibility layer for Android. + * + * Stub calls or alternate functions for pthreads. + */ + +#ifndef __ANDROID_H__ +#define __ANDROID_H__ + +#ifdef ANDROID + +#define pthread_setcanceltype(type, oldtype) (0) +#define pthread_setcancelstate(state, oldstate) (0) + +#define pthread_cancel(ret) pthread_kill((ret), SIGUSR1) + +typedef struct blkid_struct_probe *blkid_probe; + +#include <dirent.h> +#define direct dirent + +#else /* !ANDROID */ + +#include <sys/dir.h> + +#endif /* !ANDROID */ + +#endif /* __ANDROID_H__ */ diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 00000000..96698502 --- /dev/null +++ b/autogen.sh @@ -0,0 +1,99 @@ +#!/bin/sh + +# +# Helps generate autoconf stuff, when code is checked out from SCM. +# +# Copyright (C) 2006-2014 - Karel Zak <kzak@redhat.com> +# + +srcdir=`dirname $0` +test -z "$srcdir" && srcdir=. + +THEDIR=`pwd` +cd $srcdir +DIE=0 + +test -f btrfs.c || { + echo + echo "You must run this script in the top-level btrfs-progs directory" + echo + DIE=1 +} + +(autoconf --version) < /dev/null > /dev/null 2>&1 || { + echo + echo "You must have autoconf installed to generate btrfs-progs build system." + echo + DIE=1 +} +(autoheader --version) < /dev/null > /dev/null 2>&1 || { + echo + echo "You must have autoheader installed to generate btrfs-progs build system." + echo "The autoheader command is part of the GNU autoconf package." + echo + DIE=1 +} + +(automake --version) < /dev/null > /dev/null 2>&1 || { + echo + echo "You must have automake installed to generate btrfs-progs build system." + echo + DIE=1 +} + +(pkg-config --version) < /dev/null > /dev/null 2>&1 || { + echo + echo "You must have pkg-config installed to use btrfs-progs build system." + echo "The pkg-config utility was not found in the standard location, set" + echo "the PKG_CONFIG/PKG_CONFIG_PATH/PKG_CONFIG_LIBDIR variables at the" + echo "configure time." + echo +} + +if test "$DIE" -eq 1; then + exit 1 +fi + +echo +echo "Generate build-system by:" +echo " aclocal: $(aclocal --version | head -1)" +echo " autoconf: $(autoconf --version | head -1)" +echo " autoheader: $(autoheader --version | head -1)" +echo " automake: $(automake --version | head -1)" + +chmod +x version.sh +rm -rf autom4te.cache + +aclocal $AL_OPTS +autoconf $AC_OPTS +autoheader $AH_OPTS + +# it's better to use helper files from automake installation than +# maintain copies in git tree +find_autofile() { + if [ -f "$1" ]; then + return + fi + for HELPER_DIR in $(automake --print-libdir 2>/dev/null) \ + /usr/share/libtool \ + /usr/share/automake-* ; do + f="$HELPER_DIR/$1" + if [ -f "$f" ]; then + cp "$f" config/ + return + fi + done + echo "Cannot find "$1" in known locations" + exit 1 +} + +mkdir -p config/ +find_autofile config.guess +find_autofile config.sub +find_autofile install-sh + +cd $THEDIR + +echo +echo "Now type '$srcdir/configure' and 'make' to compile." +echo diff --git a/backref.c b/backref.c new file mode 100644 index 00000000..8f41f829 --- /dev/null +++ b/backref.c @@ -0,0 +1,1651 @@ +/* + * Copyright (C) 2011 STRATO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include "kerncompat.h" +#include "ctree.h" +#include "disk-io.h" +#include "backref.h" +#include "ulist.h" +#include "transaction.h" + +#define pr_debug(...) do { } while (0) + +struct extent_inode_elem { + u64 inum; + u64 offset; + struct extent_inode_elem *next; +}; + +static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb, + struct btrfs_file_extent_item *fi, + u64 extent_item_pos, + struct extent_inode_elem **eie) +{ + u64 offset = 0; + struct extent_inode_elem *e; + + if (!btrfs_file_extent_compression(eb, fi) && + !btrfs_file_extent_encryption(eb, fi) && + !btrfs_file_extent_other_encoding(eb, fi)) { + u64 data_offset; + u64 data_len; + + data_offset = btrfs_file_extent_offset(eb, fi); + data_len = btrfs_file_extent_num_bytes(eb, fi); + + if (extent_item_pos < data_offset || + extent_item_pos >= data_offset + data_len) + return 1; + offset = extent_item_pos - data_offset; + } + + e = kmalloc(sizeof(*e), GFP_NOFS); + if (!e) + return -ENOMEM; + + e->next = *eie; + e->inum = key->objectid; + e->offset = key->offset + offset; + *eie = e; + + return 0; +} + +static void free_inode_elem_list(struct extent_inode_elem *eie) +{ + struct extent_inode_elem *eie_next; + + for (; eie; eie = eie_next) { + eie_next = eie->next; + kfree(eie); + } +} + +static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte, + u64 extent_item_pos, + struct extent_inode_elem **eie) +{ + u64 disk_byte; + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + int slot; + int nritems; + int extent_type; + int ret; + + /* + * from the shared data ref, we only have the leaf but we need + * the key. thus, we must look into all items and see that we + * find one (some) with a reference to our extent item. + */ + nritems = btrfs_header_nritems(eb); + for (slot = 0; slot < nritems; ++slot) { + btrfs_item_key_to_cpu(eb, &key, slot); + if (key.type != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + extent_type = btrfs_file_extent_type(eb, fi); + if (extent_type == BTRFS_FILE_EXTENT_INLINE) + continue; + /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ + disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); + if (disk_byte != wanted_disk_byte) + continue; + + ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie); + if (ret < 0) + return ret; + } + + return 0; +} + +/* + * this structure records all encountered refs on the way up to the root + */ +struct __prelim_ref { + struct list_head list; + u64 root_id; + struct btrfs_key key_for_search; + int level; + int count; + struct extent_inode_elem *inode_list; + u64 parent; + u64 wanted_disk_byte; +}; + +/* + * the rules for all callers of this function are: + * - obtaining the parent is the goal + * - if you add a key, you must know that it is a correct key + * - if you cannot add the parent or a correct key, then we will look into the + * block later to set a correct key + * + * delayed refs + * ============ + * backref type | shared | indirect | shared | indirect + * information | tree | tree | data | data + * --------------------+--------+----------+--------+---------- + * parent logical | y | - | - | - + * key to resolve | - | y | y | y + * tree block logical | - | - | - | - + * root for resolving | y | y | y | y + * + * - column 1: we've the parent -> done + * - column 2, 3, 4: we use the key to find the parent + * + * on disk refs (inline or keyed) + * ============================== + * backref type | shared | indirect | shared | indirect + * information | tree | tree | data | data + * --------------------+--------+----------+--------+---------- + * parent logical | y | - | y | - + * key to resolve | - | - | - | y + * tree block logical | y | y | y | y + * root for resolving | - | y | y | y + * + * - column 1, 3: we've the parent -> done + * - column 2: we take the first key from the block to find the parent + * (see __add_missing_keys) + * - column 4: we use the key to find the parent + * + * additional information that's available but not required to find the parent + * block might help in merging entries to gain some speed. + */ + +static int __add_prelim_ref(struct list_head *head, u64 root_id, + struct btrfs_key *key, int level, + u64 parent, u64 wanted_disk_byte, int count, + gfp_t gfp_mask) +{ + struct __prelim_ref *ref; + + if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID) + return 0; + + ref = kmalloc(sizeof(*ref), gfp_mask); + if (!ref) + return -ENOMEM; + + ref->root_id = root_id; + if (key) + ref->key_for_search = *key; + else + memset(&ref->key_for_search, 0, sizeof(ref->key_for_search)); + + ref->inode_list = NULL; + ref->level = level; + ref->count = count; + ref->parent = parent; + ref->wanted_disk_byte = wanted_disk_byte; + list_add_tail(&ref->list, head); + + return 0; +} + +static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, + struct ulist *parents, struct __prelim_ref *ref, + int level, u64 time_seq, const u64 *extent_item_pos, + u64 total_refs) +{ + int ret = 0; + int slot; + struct extent_buffer *eb; + struct btrfs_key key; + struct btrfs_key *key_for_search = &ref->key_for_search; + struct btrfs_file_extent_item *fi; + struct extent_inode_elem *eie = NULL, *old = NULL; + u64 disk_byte; + u64 wanted_disk_byte = ref->wanted_disk_byte; + u64 count = 0; + + if (level != 0) { + eb = path->nodes[level]; + ret = ulist_add(parents, eb->start, 0, GFP_NOFS); + if (ret < 0) + return ret; + return 0; + } + + /* + * We normally enter this function with the path already pointing to + * the first item to check. But sometimes, we may enter it with + * slot==nritems. In that case, go to the next leaf before we continue. + */ + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) + ret = btrfs_next_leaf(root, path); + + while (!ret && count < total_refs) { + eb = path->nodes[0]; + slot = path->slots[0]; + + btrfs_item_key_to_cpu(eb, &key, slot); + + if (key.objectid != key_for_search->objectid || + key.type != BTRFS_EXTENT_DATA_KEY) + break; + + fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); + + if (disk_byte == wanted_disk_byte) { + eie = NULL; + old = NULL; + count++; + if (extent_item_pos) { + ret = check_extent_in_eb(&key, eb, fi, + *extent_item_pos, + &eie); + if (ret < 0) + break; + } + if (ret > 0) + goto next; + ret = ulist_add_merge_ptr(parents, eb->start, + eie, (void **)&old, GFP_NOFS); + if (ret < 0) + break; + if (!ret && extent_item_pos) { + while (old->next) + old = old->next; + old->next = eie; + } + eie = NULL; + } +next: + ret = btrfs_next_item(root, path); + } + + if (ret > 0) + ret = 0; + else if (ret < 0) + free_inode_elem_list(eie); + return ret; +} + +/* + * resolve an indirect backref in the form (root_id, key, level) + * to a logical address + */ +static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, + struct btrfs_path *path, u64 time_seq, + struct __prelim_ref *ref, + struct ulist *parents, + const u64 *extent_item_pos, u64 total_refs) +{ + struct btrfs_root *root; + struct btrfs_key root_key; + struct extent_buffer *eb; + int ret = 0; + int root_level; + int level = ref->level; + + root_key.objectid = ref->root_id; + root_key.type = BTRFS_ROOT_ITEM_KEY; + root_key.offset = (u64)-1; + + root = btrfs_read_fs_root(fs_info, &root_key); + if (IS_ERR(root)) { + ret = PTR_ERR(root); + goto out; + } + + root_level = btrfs_root_level(&root->root_item); + + if (root_level + 1 == level) + goto out; + + path->lowest_level = level; + ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path, 0, 0); + + pr_debug("search slot in root %llu (level %d, ref count %d) returned " + "%d for key (%llu %u %llu)\n", + ref->root_id, level, ref->count, ret, + ref->key_for_search.objectid, ref->key_for_search.type, + ref->key_for_search.offset); + if (ret < 0) + goto out; + + eb = path->nodes[level]; + while (!eb) { + if (!level) { + ret = 1; + WARN_ON(1); + goto out; + } + level--; + eb = path->nodes[level]; + } + + ret = add_all_parents(root, path, parents, ref, level, time_seq, + extent_item_pos, total_refs); +out: + path->lowest_level = 0; + btrfs_release_path(path); + return ret; +} + +/* + * resolve all indirect backrefs from the list + */ +static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, + struct btrfs_path *path, u64 time_seq, + struct list_head *head, + const u64 *extent_item_pos, u64 total_refs) +{ + int err; + int ret = 0; + struct __prelim_ref *ref; + struct __prelim_ref *ref_safe; + struct __prelim_ref *new_ref; + struct ulist *parents; + struct ulist_node *node; + struct ulist_iterator uiter; + + parents = ulist_alloc(GFP_NOFS); + if (!parents) + return -ENOMEM; + + /* + * _safe allows us to insert directly after the current item without + * iterating over the newly inserted items. + * we're also allowed to re-assign ref during iteration. + */ + list_for_each_entry_safe(ref, ref_safe, head, list) { + if (ref->parent) /* already direct */ + continue; + if (ref->count == 0) + continue; + err = __resolve_indirect_ref(fs_info, path, time_seq, ref, + parents, extent_item_pos, + total_refs); + /* + * we can only tolerate ENOENT,otherwise,we should catch error + * and return directly. + */ + if (err == -ENOENT) { + continue; + } else if (err) { + ret = err; + goto out; + } + + /* we put the first parent into the ref at hand */ + ULIST_ITER_INIT(&uiter); + node = ulist_next(parents, &uiter); + ref->parent = node ? node->val : 0; + ref->inode_list = node ? + (struct extent_inode_elem *)(uintptr_t)node->aux : NULL; + + /* additional parents require new refs being added here */ + while ((node = ulist_next(parents, &uiter))) { + new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); + if (!new_ref) { + ret = -ENOMEM; + goto out; + } + memcpy(new_ref, ref, sizeof(*ref)); + new_ref->parent = node->val; + new_ref->inode_list = (struct extent_inode_elem *) + (uintptr_t)node->aux; + list_add(&new_ref->list, &ref->list); + } + ulist_reinit(parents); + } +out: + ulist_free(parents); + return ret; +} + +static inline int ref_for_same_block(struct __prelim_ref *ref1, + struct __prelim_ref *ref2) +{ + if (ref1->level != ref2->level) + return 0; + if (ref1->root_id != ref2->root_id) + return 0; + if (ref1->key_for_search.type != ref2->key_for_search.type) + return 0; + if (ref1->key_for_search.objectid != ref2->key_for_search.objectid) + return 0; + if (ref1->key_for_search.offset != ref2->key_for_search.offset) + return 0; + if (ref1->parent != ref2->parent) + return 0; + + return 1; +} + +/* + * read tree blocks and add keys where required. + */ +static int __add_missing_keys(struct btrfs_fs_info *fs_info, + struct list_head *head) +{ + struct list_head *pos; + struct extent_buffer *eb; + + list_for_each(pos, head) { + struct __prelim_ref *ref; + ref = list_entry(pos, struct __prelim_ref, list); + + if (ref->parent) + continue; + if (ref->key_for_search.type) + continue; + BUG_ON(!ref->wanted_disk_byte); + eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, + fs_info->tree_root->leafsize, 0); + if (!extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + return -EIO; + } + if (btrfs_header_level(eb) == 0) + btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0); + else + btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0); + free_extent_buffer(eb); + } + return 0; +} + +/* + * merge two lists of backrefs and adjust counts accordingly + * + * mode = 1: merge identical keys, if key is set + * FIXME: if we add more keys in __add_prelim_ref, we can merge more here. + * additionally, we could even add a key range for the blocks we + * looked into to merge even more (-> replace unresolved refs by those + * having a parent). + * mode = 2: merge identical parents + */ +static void __merge_refs(struct list_head *head, int mode) +{ + struct list_head *pos1; + + list_for_each(pos1, head) { + struct list_head *n2; + struct list_head *pos2; + struct __prelim_ref *ref1; + + ref1 = list_entry(pos1, struct __prelim_ref, list); + + for (pos2 = pos1->next, n2 = pos2->next; pos2 != head; + pos2 = n2, n2 = pos2->next) { + struct __prelim_ref *ref2; + struct __prelim_ref *xchg; + struct extent_inode_elem *eie; + + ref2 = list_entry(pos2, struct __prelim_ref, list); + + if (mode == 1) { + if (!ref_for_same_block(ref1, ref2)) + continue; + if (!ref1->parent && ref2->parent) { + xchg = ref1; + ref1 = ref2; + ref2 = xchg; + } + } else { + if (ref1->parent != ref2->parent) + continue; + } + + eie = ref1->inode_list; + while (eie && eie->next) + eie = eie->next; + if (eie) + eie->next = ref2->inode_list; + else + ref1->inode_list = ref2->inode_list; + ref1->count += ref2->count; + + list_del(&ref2->list); + kfree(ref2); + } + + } +} + +/* + * add all inline backrefs for bytenr to the list + */ +static int __add_inline_refs(struct btrfs_fs_info *fs_info, + struct btrfs_path *path, u64 bytenr, + int *info_level, struct list_head *prefs, + u64 *total_refs) +{ + int ret = 0; + int slot; + struct extent_buffer *leaf; + struct btrfs_key key; + struct btrfs_key found_key; + unsigned long ptr; + unsigned long end; + struct btrfs_extent_item *ei; + u64 flags; + u64 item_size; + + /* + * enumerate all inline refs + */ + leaf = path->nodes[0]; + slot = path->slots[0]; + + item_size = btrfs_item_size_nr(leaf, slot); + BUG_ON(item_size < sizeof(*ei)); + + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + flags = btrfs_extent_flags(leaf, ei); + *total_refs += btrfs_extent_refs(leaf, ei); + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + ptr = (unsigned long)(ei + 1); + end = (unsigned long)ei + item_size; + + if (found_key.type == BTRFS_EXTENT_ITEM_KEY && + flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)ptr; + *info_level = btrfs_tree_block_level(leaf, info); + ptr += sizeof(struct btrfs_tree_block_info); + BUG_ON(ptr > end); + } else if (found_key.type == BTRFS_METADATA_ITEM_KEY) { + *info_level = found_key.offset; + } else { + BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); + } + + while (ptr < end) { + struct btrfs_extent_inline_ref *iref; + u64 offset; + int type; + + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + offset = btrfs_extent_inline_ref_offset(leaf, iref); + + switch (type) { + case BTRFS_SHARED_BLOCK_REF_KEY: + ret = __add_prelim_ref(prefs, 0, NULL, + *info_level + 1, offset, + bytenr, 1, GFP_NOFS); + break; + case BTRFS_SHARED_DATA_REF_KEY: { + struct btrfs_shared_data_ref *sdref; + int count; + + sdref = (struct btrfs_shared_data_ref *)(iref + 1); + count = btrfs_shared_data_ref_count(leaf, sdref); + ret = __add_prelim_ref(prefs, 0, NULL, 0, offset, + bytenr, count, GFP_NOFS); + break; + } + case BTRFS_TREE_BLOCK_REF_KEY: + ret = __add_prelim_ref(prefs, offset, NULL, + *info_level + 1, 0, + bytenr, 1, GFP_NOFS); + break; + case BTRFS_EXTENT_DATA_REF_KEY: { + struct btrfs_extent_data_ref *dref; + int count; + u64 root; + + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + count = btrfs_extent_data_ref_count(leaf, dref); + key.objectid = btrfs_extent_data_ref_objectid(leaf, + dref); + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = btrfs_extent_data_ref_offset(leaf, dref); + root = btrfs_extent_data_ref_root(leaf, dref); + ret = __add_prelim_ref(prefs, root, &key, 0, 0, + bytenr, count, GFP_NOFS); + break; + } + default: + WARN_ON(1); + } + if (ret) + return ret; + ptr += btrfs_extent_inline_ref_size(type); + } + + return 0; +} + +/* + * add all non-inline backrefs for bytenr to the list + */ +static int __add_keyed_refs(struct btrfs_fs_info *fs_info, + struct btrfs_path *path, u64 bytenr, + int info_level, struct list_head *prefs) +{ + struct btrfs_root *extent_root = fs_info->extent_root; + int ret; + int slot; + struct extent_buffer *leaf; + struct btrfs_key key; + + while (1) { + ret = btrfs_next_item(extent_root, path); + if (ret < 0) + break; + if (ret) { + ret = 0; + break; + } + + slot = path->slots[0]; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + + if (key.objectid != bytenr) + break; + if (key.type < BTRFS_TREE_BLOCK_REF_KEY) + continue; + if (key.type > BTRFS_SHARED_DATA_REF_KEY) + break; + + switch (key.type) { + case BTRFS_SHARED_BLOCK_REF_KEY: + ret = __add_prelim_ref(prefs, 0, NULL, + info_level + 1, key.offset, + bytenr, 1, GFP_NOFS); + break; + case BTRFS_SHARED_DATA_REF_KEY: { + struct btrfs_shared_data_ref *sdref; + int count; + + sdref = btrfs_item_ptr(leaf, slot, + struct btrfs_shared_data_ref); + count = btrfs_shared_data_ref_count(leaf, sdref); + ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset, + bytenr, count, GFP_NOFS); + break; + } + case BTRFS_TREE_BLOCK_REF_KEY: + ret = __add_prelim_ref(prefs, key.offset, NULL, + info_level + 1, 0, + bytenr, 1, GFP_NOFS); + break; + case BTRFS_EXTENT_DATA_REF_KEY: { + struct btrfs_extent_data_ref *dref; + int count; + u64 root; + + dref = btrfs_item_ptr(leaf, slot, + struct btrfs_extent_data_ref); + count = btrfs_extent_data_ref_count(leaf, dref); + key.objectid = btrfs_extent_data_ref_objectid(leaf, + dref); + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = btrfs_extent_data_ref_offset(leaf, dref); + root = btrfs_extent_data_ref_root(leaf, dref); + ret = __add_prelim_ref(prefs, root, &key, 0, 0, + bytenr, count, GFP_NOFS); + break; + } + default: + WARN_ON(1); + } + if (ret) + return ret; + + } + + return ret; +} + +/* + * this adds all existing backrefs (inline backrefs, backrefs and delayed + * refs) for the given bytenr to the refs list, merges duplicates and resolves + * indirect refs to their parent bytenr. + * When roots are found, they're added to the roots list + * + * FIXME some caching might speed things up + */ +static int find_parent_nodes(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, + u64 time_seq, struct ulist *refs, + struct ulist *roots, const u64 *extent_item_pos) +{ + struct btrfs_key key; + struct btrfs_path *path; + int info_level = 0; + int ret; + struct list_head prefs; + struct __prelim_ref *ref; + struct extent_inode_elem *eie = NULL; + u64 total_refs = 0; + + INIT_LIST_HEAD(&prefs); + + key.objectid = bytenr; + key.offset = (u64)-1; + if (btrfs_fs_incompat(fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) + key.type = BTRFS_METADATA_ITEM_KEY; + else + key.type = BTRFS_EXTENT_ITEM_KEY; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0); + if (ret < 0) + goto out; + BUG_ON(ret == 0); + + if (path->slots[0]) { + struct extent_buffer *leaf; + int slot; + + path->slots[0]--; + leaf = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid == bytenr && + (key.type == BTRFS_EXTENT_ITEM_KEY || + key.type == BTRFS_METADATA_ITEM_KEY)) { + ret = __add_inline_refs(fs_info, path, bytenr, + &info_level, &prefs, + &total_refs); + if (ret) + goto out; + ret = __add_keyed_refs(fs_info, path, bytenr, + info_level, &prefs); + if (ret) + goto out; + } + } + btrfs_release_path(path); + + ret = __add_missing_keys(fs_info, &prefs); + if (ret) + goto out; + + __merge_refs(&prefs, 1); + + ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, + extent_item_pos, total_refs); + if (ret) + goto out; + + __merge_refs(&prefs, 2); + + while (!list_empty(&prefs)) { + ref = list_first_entry(&prefs, struct __prelim_ref, list); + WARN_ON(ref->count < 0); + if (roots && ref->count && ref->root_id && ref->parent == 0) { + /* no parent == root of tree */ + ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); + if (ret < 0) + goto out; + } + if (ref->count && ref->parent) { + if (extent_item_pos && !ref->inode_list && + ref->level == 0) { + u32 bsz; + struct extent_buffer *eb; + bsz = btrfs_level_size(fs_info->extent_root, + ref->level); + eb = read_tree_block(fs_info->extent_root, + ref->parent, bsz, 0); + if (!extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + ret = -EIO; + goto out; + } + ret = find_extent_in_eb(eb, bytenr, + *extent_item_pos, &eie); + free_extent_buffer(eb); + if (ret < 0) + goto out; + ref->inode_list = eie; + } + ret = ulist_add_merge_ptr(refs, ref->parent, + ref->inode_list, + (void **)&eie, GFP_NOFS); + if (ret < 0) + goto out; + if (!ret && extent_item_pos) { + /* + * we've recorded that parent, so we must extend + * its inode list here + */ + BUG_ON(!eie); + while (eie->next) + eie = eie->next; + eie->next = ref->inode_list; + } + eie = NULL; + } + list_del(&ref->list); + kfree(ref); + } + +out: + btrfs_free_path(path); + while (!list_empty(&prefs)) { + ref = list_first_entry(&prefs, struct __prelim_ref, list); + list_del(&ref->list); + kfree(ref); + } + if (ret < 0) + free_inode_elem_list(eie); + return ret; +} + +static void free_leaf_list(struct ulist *blocks) +{ + struct ulist_node *node = NULL; + struct extent_inode_elem *eie; + struct ulist_iterator uiter; + + ULIST_ITER_INIT(&uiter); + while ((node = ulist_next(blocks, &uiter))) { + if (!node->aux) + continue; + eie = (struct extent_inode_elem *)(uintptr_t)node->aux; + free_inode_elem_list(eie); + node->aux = 0; + } + + ulist_free(blocks); +} + +/* + * Finds all leafs with a reference to the specified combination of bytenr and + * offset. key_list_head will point to a list of corresponding keys (caller must + * free each list element). The leafs will be stored in the leafs ulist, which + * must be freed with ulist_free. + * + * returns 0 on success, <0 on error + */ +static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, + u64 time_seq, struct ulist **leafs, + const u64 *extent_item_pos) +{ + int ret; + + *leafs = ulist_alloc(GFP_NOFS); + if (!*leafs) + return -ENOMEM; + + ret = find_parent_nodes(trans, fs_info, bytenr, + time_seq, *leafs, NULL, extent_item_pos); + if (ret < 0 && ret != -ENOENT) { + free_leaf_list(*leafs); + return ret; + } + + return 0; +} + +/* + * walk all backrefs for a given extent to find all roots that reference this + * extent. Walking a backref means finding all extents that reference this + * extent and in turn walk the backrefs of those, too. Naturally this is a + * recursive process, but here it is implemented in an iterative fashion: We + * find all referencing extents for the extent in question and put them on a + * list. In turn, we find all referencing extents for those, further appending + * to the list. The way we iterate the list allows adding more elements after + * the current while iterating. The process stops when we reach the end of the + * list. Found roots are added to the roots list. + * + * returns 0 on success, < 0 on error. + */ +static int __btrfs_find_all_roots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, + u64 time_seq, struct ulist **roots) +{ + struct ulist *tmp; + struct ulist_node *node = NULL; + struct ulist_iterator uiter; + int ret; + + tmp = ulist_alloc(GFP_NOFS); + if (!tmp) + return -ENOMEM; + *roots = ulist_alloc(GFP_NOFS); + if (!*roots) { + ulist_free(tmp); + return -ENOMEM; + } + + ULIST_ITER_INIT(&uiter); + while (1) { + ret = find_parent_nodes(trans, fs_info, bytenr, + time_seq, tmp, *roots, NULL); + if (ret < 0 && ret != -ENOENT) { + ulist_free(tmp); + ulist_free(*roots); + return ret; + } + node = ulist_next(tmp, &uiter); + if (!node) + break; + bytenr = node->val; + cond_resched(); + } + + ulist_free(tmp); + return 0; +} + +int btrfs_find_all_roots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, + u64 time_seq, struct ulist **roots) +{ + return __btrfs_find_all_roots(trans, fs_info, bytenr, time_seq, roots); +} + +/* + * this makes the path point to (inum INODE_ITEM ioff) + */ +int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, + struct btrfs_path *path) +{ + struct btrfs_key key; + return btrfs_find_item(fs_root, path, inum, ioff, + BTRFS_INODE_ITEM_KEY, &key); +} + +static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, + struct btrfs_path *path, + struct btrfs_key *found_key) +{ + return btrfs_find_item(fs_root, path, inum, ioff, + BTRFS_INODE_REF_KEY, found_key); +} + +int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, + u64 start_off, struct btrfs_path *path, + struct btrfs_inode_extref **ret_extref, + u64 *found_off) +{ + int ret, slot; + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_inode_extref *extref; + struct extent_buffer *leaf; + unsigned long ptr; + + key.objectid = inode_objectid; + btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); + key.offset = start_off; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + + while (1) { + leaf = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(leaf)) { + /* + * If the item at offset is not found, + * btrfs_search_slot will point us to the slot + * where it should be inserted. In our case + * that will be the slot directly before the + * next INODE_REF_KEY_V2 item. In the case + * that we're pointing to the last slot in a + * leaf, we must move one leaf over. + */ + ret = btrfs_next_leaf(root, path); + if (ret) { + if (ret >= 1) + ret = -ENOENT; + break; + } + continue; + } + + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + /* + * Check that we're still looking at an extended ref key for + * this particular objectid. If we have different + * objectid or type then there are no more to be found + * in the tree and we can exit. + */ + ret = -ENOENT; + if (found_key.objectid != inode_objectid) + break; + if (btrfs_key_type(&found_key) != BTRFS_INODE_EXTREF_KEY) + break; + + ret = 0; + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + extref = (struct btrfs_inode_extref *)ptr; + *ret_extref = extref; + if (found_off) + *found_off = found_key.offset; + break; + } + + return ret; +} + +/* + * this iterates to turn a name (from iref/extref) into a full filesystem path. + * Elements of the path are separated by '/' and the path is guaranteed to be + * 0-terminated. the path is only given within the current file system. + * Therefore, it never starts with a '/'. the caller is responsible to provide + * "size" bytes in "dest". the dest buffer will be filled backwards. finally, + * the start point of the resulting string is returned. this pointer is within + * dest, normally. + * in case the path buffer would overflow, the pointer is decremented further + * as if output was written to the buffer, though no more output is actually + * generated. that way, the caller can determine how much space would be + * required for the path to fit into the buffer. in that case, the returned + * value will be smaller than dest. callers must check this! + */ +char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, + u32 name_len, unsigned long name_off, + struct extent_buffer *eb_in, u64 parent, + char *dest, u32 size) +{ + int slot; + u64 next_inum; + int ret; + s64 bytes_left = ((s64)size) - 1; + struct extent_buffer *eb = eb_in; + struct btrfs_key found_key; + struct btrfs_inode_ref *iref; + + if (bytes_left >= 0) + dest[bytes_left] = '\0'; + + while (1) { + bytes_left -= name_len; + if (bytes_left >= 0) + read_extent_buffer(eb, dest + bytes_left, + name_off, name_len); + if (eb != eb_in) + free_extent_buffer(eb); + ret = inode_ref_info(parent, 0, fs_root, path, &found_key); + if (ret > 0) + ret = -ENOENT; + if (ret) + break; + + next_inum = found_key.offset; + + /* regular exit ahead */ + if (parent == next_inum) + break; + + slot = path->slots[0]; + eb = path->nodes[0]; + /* make sure we can use eb after releasing the path */ + if (eb != eb_in) + eb->refs++; + btrfs_release_path(path); + iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); + + name_len = btrfs_inode_ref_name_len(eb, iref); + name_off = (unsigned long)(iref + 1); + + parent = next_inum; + --bytes_left; + if (bytes_left >= 0) + dest[bytes_left] = '/'; + } + + btrfs_release_path(path); + + if (ret) + return ERR_PTR(ret); + + return dest + bytes_left; +} + +/* + * this makes the path point to (logical EXTENT_ITEM *) + * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for + * tree blocks and <0 on error. + */ +int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, + struct btrfs_path *path, struct btrfs_key *found_key, + u64 *flags_ret) +{ + int ret; + u64 flags; + u64 size = 0; + u32 item_size; + struct extent_buffer *eb; + struct btrfs_extent_item *ei; + struct btrfs_key key; + + if (btrfs_fs_incompat(fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) + key.type = BTRFS_METADATA_ITEM_KEY; + else + key.type = BTRFS_EXTENT_ITEM_KEY; + key.objectid = logical; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); + if (ret < 0) + return ret; + + ret = btrfs_previous_extent_item(fs_info->extent_root, path, 0); + if (ret) { + if (ret > 0) + ret = -ENOENT; + return ret; + } + btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); + if (found_key->type == BTRFS_METADATA_ITEM_KEY) + size = fs_info->extent_root->leafsize; + else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) + size = found_key->offset; + + if (found_key->objectid > logical || + found_key->objectid + size <= logical) { + pr_debug("logical %llu is not within any extent\n", logical); + return -ENOENT; + } + + eb = path->nodes[0]; + item_size = btrfs_item_size_nr(eb, path->slots[0]); + BUG_ON(item_size < sizeof(*ei)); + + ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); + flags = btrfs_extent_flags(eb, ei); + + pr_debug("logical %llu is at position %llu within the extent (%llu " + "EXTENT_ITEM %llu) flags %#llx size %u\n", + logical, logical - found_key->objectid, found_key->objectid, + found_key->offset, flags, item_size); + + if (flags_ret) { + if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) + *flags_ret = BTRFS_EXTENT_FLAG_TREE_BLOCK; + else if (flags & BTRFS_EXTENT_FLAG_DATA) + *flags_ret = BTRFS_EXTENT_FLAG_DATA; + else + BUG_ON(1); + return 0; + } else { + WARN_ON(1); + return -EIO; + } +} + +/* + * helper function to iterate extent inline refs. ptr must point to a 0 value + * for the first call and may be modified. it is used to track state. + * if more refs exist, 0 is returned and the next call to + * __get_extent_inline_ref must pass the modified ptr parameter to get the + * next ref. after the last ref was processed, 1 is returned. + * returns <0 on error + */ +static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb, + struct btrfs_key *key, + struct btrfs_extent_item *ei, u32 item_size, + struct btrfs_extent_inline_ref **out_eiref, + int *out_type) +{ + unsigned long end; + u64 flags; + struct btrfs_tree_block_info *info; + + if (!*ptr) { + /* first call */ + flags = btrfs_extent_flags(eb, ei); + if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + if (key->type == BTRFS_METADATA_ITEM_KEY) { + /* a skinny metadata extent */ + *out_eiref = + (struct btrfs_extent_inline_ref *)(ei + 1); + } else { + WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY); + info = (struct btrfs_tree_block_info *)(ei + 1); + *out_eiref = + (struct btrfs_extent_inline_ref *)(info + 1); + } + } else { + *out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1); + } + *ptr = (unsigned long)*out_eiref; + if ((unsigned long)(*ptr) >= (unsigned long)ei + item_size) + return -ENOENT; + } + + end = (unsigned long)ei + item_size; + *out_eiref = (struct btrfs_extent_inline_ref *)(*ptr); + *out_type = btrfs_extent_inline_ref_type(eb, *out_eiref); + + *ptr += btrfs_extent_inline_ref_size(*out_type); + WARN_ON(*ptr > end); + if (*ptr == end) + return 1; /* last */ + + return 0; +} + +/* + * reads the tree block backref for an extent. tree level and root are returned + * through out_level and out_root. ptr must point to a 0 value for the first + * call and may be modified (see __get_extent_inline_ref comment). + * returns 0 if data was provided, 1 if there was no more data to provide or + * <0 on error. + */ +int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, + struct btrfs_key *key, struct btrfs_extent_item *ei, + u32 item_size, u64 *out_root, u8 *out_level) +{ + int ret; + int type; + struct btrfs_tree_block_info *info; + struct btrfs_extent_inline_ref *eiref; + + if (*ptr == (unsigned long)-1) + return 1; + + while (1) { + ret = __get_extent_inline_ref(ptr, eb, key, ei, item_size, + &eiref, &type); + if (ret < 0) + return ret; + + if (type == BTRFS_TREE_BLOCK_REF_KEY || + type == BTRFS_SHARED_BLOCK_REF_KEY) + break; + + if (ret == 1) + return 1; + } + + /* we can treat both ref types equally here */ + info = (struct btrfs_tree_block_info *)(ei + 1); + *out_root = btrfs_extent_inline_ref_offset(eb, eiref); + *out_level = btrfs_tree_block_level(eb, info); + + if (ret == 1) + *ptr = (unsigned long)-1; + + return 0; +} + +static int iterate_leaf_refs(struct extent_inode_elem *inode_list, + u64 root, u64 extent_item_objectid, + iterate_extent_inodes_t *iterate, void *ctx) +{ + struct extent_inode_elem *eie; + int ret = 0; + + for (eie = inode_list; eie; eie = eie->next) { + pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), " + "root %llu\n", extent_item_objectid, + eie->inum, eie->offset, root); + ret = iterate(eie->inum, eie->offset, root, ctx); + if (ret) { + pr_debug("stopping iteration for %llu due to ret=%d\n", + extent_item_objectid, ret); + break; + } + } + + return ret; +} + +/* + * calls iterate() for every inode that references the extent identified by + * the given parameters. + * when the iterator function returns a non-zero value, iteration stops. + */ +int iterate_extent_inodes(struct btrfs_fs_info *fs_info, + u64 extent_item_objectid, u64 extent_item_pos, + int search_commit_root, + iterate_extent_inodes_t *iterate, void *ctx) +{ + int ret; + struct btrfs_trans_handle *trans = NULL; + struct ulist *refs = NULL; + struct ulist *roots = NULL; + struct ulist_node *ref_node = NULL; + struct ulist_node *root_node = NULL; + struct ulist_iterator ref_uiter; + struct ulist_iterator root_uiter; + + pr_debug("resolving all inodes for extent %llu\n", + extent_item_objectid); + + ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, + 0, &refs, &extent_item_pos); + if (ret) + goto out; + + ULIST_ITER_INIT(&ref_uiter); + while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { + ret = __btrfs_find_all_roots(trans, fs_info, ref_node->val, + 0, &roots); + if (ret) + break; + ULIST_ITER_INIT(&root_uiter); + while (!ret && (root_node = ulist_next(roots, &root_uiter))) { + pr_debug("root %llu references leaf %llu, data list " + "%#llx\n", root_node->val, ref_node->val, + ref_node->aux); + ret = iterate_leaf_refs((struct extent_inode_elem *) + (uintptr_t)ref_node->aux, + root_node->val, + extent_item_objectid, + iterate, ctx); + } + ulist_free(roots); + } + + free_leaf_list(refs); +out: + return ret; +} + +int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, + struct btrfs_path *path, + iterate_extent_inodes_t *iterate, void *ctx) +{ + int ret; + u64 extent_item_pos; + u64 flags = 0; + struct btrfs_key found_key; + int search_commit_root = 0; + + ret = extent_from_logical(fs_info, logical, path, &found_key, &flags); + btrfs_release_path(path); + if (ret < 0) + return ret; + if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) + return -EINVAL; + + extent_item_pos = logical - found_key.objectid; + ret = iterate_extent_inodes(fs_info, found_key.objectid, + extent_item_pos, search_commit_root, + iterate, ctx); + + return ret; +} + +typedef int (iterate_irefs_t)(u64 parent, u32 name_len, unsigned long name_off, + struct extent_buffer *eb, void *ctx); + +static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, + struct btrfs_path *path, + iterate_irefs_t *iterate, void *ctx) +{ + int ret = 0; + int slot; + u32 cur; + u32 len; + u32 name_len; + u64 parent = 0; + int found = 0; + struct extent_buffer *eb; + struct btrfs_item *item; + struct btrfs_inode_ref *iref; + struct btrfs_key found_key; + + while (!ret) { + ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, + &found_key); + if (ret < 0) + break; + if (ret) { + ret = found ? 0 : -ENOENT; + break; + } + ++found; + + parent = found_key.offset; + slot = path->slots[0]; + eb = btrfs_clone_extent_buffer(path->nodes[0]); + if (!eb) { + ret = -ENOMEM; + break; + } + extent_buffer_get(eb); + btrfs_release_path(path); + + item = btrfs_item_nr(slot); + iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); + + for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) { + name_len = btrfs_inode_ref_name_len(eb, iref); + /* path must be released before calling iterate()! */ + pr_debug("following ref at offset %u for inode %llu in " + "tree %llu\n", cur, found_key.objectid, + fs_root->objectid); + ret = iterate(parent, name_len, + (unsigned long)(iref + 1), eb, ctx); + if (ret) + break; + len = sizeof(*iref) + name_len; + iref = (struct btrfs_inode_ref *)((char *)iref + len); + } + free_extent_buffer(eb); + } + + btrfs_release_path(path); + + return ret; +} + +static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, + struct btrfs_path *path, + iterate_irefs_t *iterate, void *ctx) +{ + int ret; + int slot; + u64 offset = 0; + u64 parent; + int found = 0; + struct extent_buffer *eb; + struct btrfs_inode_extref *extref; + struct extent_buffer *leaf; + u32 item_size; + u32 cur_offset; + unsigned long ptr; + + while (1) { + ret = btrfs_find_one_extref(fs_root, inum, offset, path, &extref, + &offset); + if (ret < 0) + break; + if (ret) { + ret = found ? 0 : -ENOENT; + break; + } + ++found; + + slot = path->slots[0]; + eb = btrfs_clone_extent_buffer(path->nodes[0]); + if (!eb) { + ret = -ENOMEM; + break; + } + extent_buffer_get(eb); + + btrfs_release_path(path); + + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, slot); + ptr = btrfs_item_ptr_offset(leaf, slot); + cur_offset = 0; + + while (cur_offset < item_size) { + u32 name_len; + + extref = (struct btrfs_inode_extref *)(ptr + cur_offset); + parent = btrfs_inode_extref_parent(eb, extref); + name_len = btrfs_inode_extref_name_len(eb, extref); + ret = iterate(parent, name_len, + (unsigned long)&extref->name, eb, ctx); + if (ret) + break; + + cur_offset += btrfs_inode_extref_name_len(leaf, extref); + cur_offset += sizeof(*extref); + } + free_extent_buffer(eb); + + offset++; + } + + btrfs_release_path(path); + + return ret; +} + +static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, + struct btrfs_path *path, iterate_irefs_t *iterate, + void *ctx) +{ + int ret; + int found_refs = 0; + + ret = iterate_inode_refs(inum, fs_root, path, iterate, ctx); + if (!ret) + ++found_refs; + else if (ret != -ENOENT) + return ret; + + ret = iterate_inode_extrefs(inum, fs_root, path, iterate, ctx); + if (ret == -ENOENT && found_refs) + return 0; + + return ret; +} + +/* + * returns 0 if the path could be dumped (probably truncated) + * returns <0 in case of an error + */ +static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off, + struct extent_buffer *eb, void *ctx) +{ + struct inode_fs_paths *ipath = ctx; + char *fspath; + char *fspath_min; + int i = ipath->fspath->elem_cnt; + const int s_ptr = sizeof(char *); + u32 bytes_left; + + bytes_left = ipath->fspath->bytes_left > s_ptr ? + ipath->fspath->bytes_left - s_ptr : 0; + + fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; + fspath = btrfs_ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len, + name_off, eb, inum, fspath_min, bytes_left); + if (IS_ERR(fspath)) + return PTR_ERR(fspath); + + if (fspath > fspath_min) { + ipath->fspath->val[i] = (u64)(unsigned long)fspath; + ++ipath->fspath->elem_cnt; + ipath->fspath->bytes_left = fspath - fspath_min; + } else { + ++ipath->fspath->elem_missed; + ipath->fspath->bytes_missing += fspath_min - fspath; + ipath->fspath->bytes_left = 0; + } + + return 0; +} + +/* + * this dumps all file system paths to the inode into the ipath struct, provided + * is has been created large enough. each path is zero-terminated and accessed + * from ipath->fspath->val[i]. + * when it returns, there are ipath->fspath->elem_cnt number of paths available + * in ipath->fspath->val[]. when the allocated space wasn't sufficient, the + * number of missed paths in recored in ipath->fspath->elem_missed, otherwise, + * it's zero. ipath->fspath->bytes_missing holds the number of bytes that would + * have been needed to return all paths. + */ +int paths_from_inode(u64 inum, struct inode_fs_paths *ipath) +{ + return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path, + inode_to_path, ipath); +} + +struct btrfs_data_container *init_data_container(u32 total_bytes) +{ + struct btrfs_data_container *data; + size_t alloc_bytes; + + alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); + data = vmalloc(alloc_bytes); + if (!data) + return ERR_PTR(-ENOMEM); + + if (total_bytes >= sizeof(*data)) { + data->bytes_left = total_bytes - sizeof(*data); + data->bytes_missing = 0; + } else { + data->bytes_missing = sizeof(*data) - total_bytes; + data->bytes_left = 0; + } + + data->elem_cnt = 0; + data->elem_missed = 0; + + return data; +} + +/* + * allocates space to return multiple file system paths for an inode. + * total_bytes to allocate are passed, note that space usable for actual path + * information will be total_bytes - sizeof(struct inode_fs_paths). + * the returned pointer must be freed with free_ipath() in the end. + */ +struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, + struct btrfs_path *path) +{ + struct inode_fs_paths *ifp; + struct btrfs_data_container *fspath; + + fspath = init_data_container(total_bytes); + if (IS_ERR(fspath)) + return (void *)fspath; + + ifp = kmalloc(sizeof(*ifp), GFP_NOFS); + if (!ifp) { + kfree(fspath); + return ERR_PTR(-ENOMEM); + } + + ifp->btrfs_path = path; + ifp->fspath = fspath; + ifp->fs_root = fs_root; + + return ifp; +} + +void free_ipath(struct inode_fs_paths *ipath) +{ + if (!ipath) + return; + vfree(ipath->fspath); + kfree(ipath); +} diff --git a/backref.h b/backref.h new file mode 100644 index 00000000..3d2ed458 --- /dev/null +++ b/backref.h @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2011 STRATO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_BACKREF_H__ +#define __BTRFS_BACKREF_H__ + +#include "ulist.h" +#include "extent_io.h" + +struct inode_fs_paths { + struct btrfs_path *btrfs_path; + struct btrfs_root *fs_root; + struct btrfs_data_container *fspath; +}; + +typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, + void *ctx); + +int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, + struct btrfs_path *path); + +int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, + struct btrfs_path *path, struct btrfs_key *found_key, + u64 *flags); + +int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, + struct btrfs_key *key, struct btrfs_extent_item *ei, + u32 item_size, u64 *out_root, u8 *out_level); + +int iterate_extent_inodes(struct btrfs_fs_info *fs_info, + u64 extent_item_objectid, + u64 extent_offset, int search_commit_root, + iterate_extent_inodes_t *iterate, void *ctx); + +int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, + struct btrfs_path *path, + iterate_extent_inodes_t *iterate, void *ctx); + +int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); + +int btrfs_find_all_roots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, + u64 time_seq, struct ulist **roots); +char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, + u32 name_len, unsigned long name_off, + struct extent_buffer *eb_in, u64 parent, + char *dest, u32 size); + +struct btrfs_data_container *init_data_container(u32 total_bytes); +struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, + struct btrfs_path *path); +void free_ipath(struct inode_fs_paths *ipath); + +int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, + u64 start_off, struct btrfs_path *path, + struct btrfs_inode_extref **ret_extref, + u64 *found_off); +#endif @@ -0,0 +1,159 @@ +#!/usr/bin/env python +# Copyright (C) 2007 Oracle. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public +# License v2 as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this program; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 021110-1307, USA. +# +import sys, os, stat, fcntl +from optparse import OptionParser + +def copylink(srcname, dst, filename, statinfo, force_name): + dstname = os.path.join(dst, force_name or filename) + if not os.path.exists(dstname): + link_target = os.readlink(srcname) + os.symlink(link_target, dstname) + +def copydev(srcname, dst, filename, statinfo, force_name): + devbits = statinfo.st_mode & (stat.S_IFBLK | stat.S_IFCHR) + mode = stat.S_IMODE(statinfo.st_mode) | devbits + dstname = os.path.join(dst, force_name or filename) + if not os.path.exists(dstname): + os.mknod(dstname, mode, statinfo.st_rdev) + +def copyfile(srcname, dst, filename, statinfo, force_name): + written = 0 + dstname = os.path.join(dst, force_name or filename) + + st_mode = statinfo.st_mode + if stat.S_ISLNK(st_mode): + copylink(srcname, dst, part, statinfo, None) + return + elif stat.S_ISBLK(st_mode) or stat.S_ISCHR(st_mode): + copydev(srcname, dst, part, statinfo, None) + return + elif not stat.S_ISREG(st_mode): + return + + try: + os.unlink(dstname) + except: + pass + + if options.link: + os.link(srcname, dstname) + return + + dstf = file(dstname, 'w') + srcf = file(srcname, 'r') + + ret = 1 + + try: + if not options.copy: + ret = fcntl.ioctl(dstf.fileno(), 1074041865, srcf.fileno()) + except: + pass + + if ret != 0: + while True: + buf = srcf.read(256 * 1024) + if not buf: + break + written += len(buf) + dstf.write(buf) + + os.chmod(dstname, stat.S_IMODE(statinfo.st_mode)) + os.chown(dstname, statinfo.st_uid, statinfo.st_gid) + + +usage = "usage: %prog [options]" +parser = OptionParser(usage=usage) +parser.add_option("-l", "--link", help="Create hard links", default=False, + action="store_true") +parser.add_option("-c", "--copy", help="Copy file bytes (don't cow)", + default=False, action="store_true") + +(options,args) = parser.parse_args() + +if len(args) < 2: + sys.stderr.write("source or destination not specified\n") + sys.exit(1) + +if options.link and options.copy: + sys.stderr.write("Both -l and -c specified, using copy mode\n") + options.link = False + + +total_args = len(args) +src_args = total_args - 1 +orig_dst = args[-1] + +if src_args > 1: + if not os.path.exists(orig_dst): + os.makedirs(orig_dst) + if not os.path.isdir(orig_dst): + sys.stderr.write("Destination %s is not a directory\n" % orig_dst) + exit(1) + +for srci in xrange(0, src_args): + src = args[srci] + if os.path.isfile(src): + statinfo = os.lstat(src) + force_name = None + if src_args == 1: + if not os.path.isdir(orig_dst): + force_name = os.path.basename(orig_dst) + orig_dst = os.path.dirname(orig_dst) or '.' + copyfile(src, orig_dst, os.path.basename(src), statinfo, force_name) + continue + + if src_args > 1 or os.path.exists(orig_dst): + dst = os.path.join(orig_dst, os.path.basename(src)) + else: + dst = orig_dst + + if not os.path.exists(dst): + os.makedirs(dst) + statinfo = os.stat(src) + os.chmod(dst, stat.S_IMODE(statinfo.st_mode)) + os.chown(dst, statinfo.st_uid, statinfo.st_gid) + + iter = os.walk(src, topdown=True) + + for (dirpath, dirnames, filenames) in iter: + for x in dirnames: + srcname = os.path.join(dirpath, x) + statinfo = os.lstat(srcname) + + part = os.path.relpath(srcname, src) + + if stat.S_ISLNK(statinfo.st_mode): + copylink(srcname, dst, part, statinfo, None) + continue + + dst_dir = os.path.join(dst, part) + if not os.path.exists(dst_dir): + os.makedirs(dst_dir) + + os.chmod(dst_dir, stat.S_IMODE(statinfo.st_mode)) + os.chown(dst_dir, statinfo.st_uid, statinfo.st_gid) + + for f in filenames: + srcname = os.path.join(dirpath, f) + part = os.path.relpath(srcname, src) + + statinfo = os.lstat(srcname) + copyfile(srcname, dst, part, statinfo, None) + + diff --git a/bitops.h b/bitops.h new file mode 100644 index 00000000..5b35f9fc --- /dev/null +++ b/bitops.h @@ -0,0 +1,224 @@ +#ifndef _PERF_LINUX_BITOPS_H_ +#define _PERF_LINUX_BITOPS_H_ + +#include <linux/kernel.h> + +#ifndef DIV_ROUND_UP +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +#endif + +#define BITS_PER_BYTE 8 +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) +#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) + +#define for_each_set_bit(bit, addr, size) \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +/* same as for_each_set_bit() but use bit as value to start with */ +#define for_each_set_bit_from(bit, addr, size) \ + for ((bit) = find_next_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +static inline void set_bit(int nr, unsigned long *addr) +{ + addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); +} + +static inline void clear_bit(int nr, unsigned long *addr) +{ + addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG)); +} + +/** + * hweightN - returns the hamming weight of a N-bit word + * @x: the word to weigh + * + * The Hamming Weight of a number is the total number of bits set in it. + */ + +static inline unsigned int hweight32(unsigned int w) +{ + unsigned int res = w - ((w >> 1) & 0x55555555); + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); + res = (res + (res >> 4)) & 0x0F0F0F0F; + res = res + (res >> 8); + return (res + (res >> 16)) & 0x000000FF; +} + +static inline unsigned long hweight64(__u64 w) +{ +#if BITS_PER_LONG == 32 + return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); +#elif BITS_PER_LONG == 64 + __u64 res = w - ((w >> 1) & 0x5555555555555555ul); + res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul); + res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful; + res = res + (res >> 8); + res = res + (res >> 16); + return (res + (res >> 32)) & 0x00000000000000FFul; +#endif +} + +static inline unsigned long hweight_long(unsigned long w) +{ + return sizeof(w) == 4 ? hweight32(w) : hweight64(w); +} + +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static __always_inline unsigned long __ffs(unsigned long word) +{ + int num = 0; + +#if BITS_PER_LONG == 64 + if ((word & 0xffffffff) == 0) { + num += 32; + word >>= 32; + } +#endif + if ((word & 0xffff) == 0) { + num += 16; + word >>= 16; + } + if ((word & 0xff) == 0) { + num += 8; + word >>= 8; + } + if ((word & 0xf) == 0) { + num += 4; + word >>= 4; + } + if ((word & 0x3) == 0) { + num += 2; + word >>= 2; + } + if ((word & 0x1) == 0) + num += 1; + return num; +} + +#define ffz(x) __ffs(~(x)) + +/* + * Find the first set bit in a memory region. + */ +static inline unsigned long +find_first_bit(const unsigned long *addr, unsigned long size) +{ + const unsigned long *p = addr; + unsigned long result = 0; + unsigned long tmp; + + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + + tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found: + return result + __ffs(tmp); +} + +/* + * Find the next set bit in a memory region. + */ +static inline unsigned long +find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + __ffs(tmp); +} + +/* + * This implementation of find_{first,next}_zero_bit was stolen from + * Linus' asm-alpha/bitops.h. + */ +static inline unsigned long +find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp |= ~0UL >> (BITS_PER_LONG - offset); + if (size < BITS_PER_LONG) + goto found_first; + if (~tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if (~(tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp |= ~0UL << size; + if (tmp == ~0UL) /* Are any bits zero? */ + return result + size; /* Nope. */ +found_middle: + return result + ffz(tmp); +} +#endif diff --git a/btrfs-calc-size.c b/btrfs-calc-size.c new file mode 100644 index 00000000..e3f02d87 --- /dev/null +++ b/btrfs-calc-size.c @@ -0,0 +1,506 @@ +/* + * Copyright (C) 2011 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <zlib.h> +#include "kerncompat.h" +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" +#include "transaction.h" +#include "list.h" +#include "volumes.h" +#include "utils.h" + +static int verbose = 0; +static int no_pretty = 0; + +struct seek { + u64 distance; + u64 count; + struct rb_node n; +}; + +struct root_stats { + u64 total_nodes; + u64 total_leaves; + u64 total_bytes; + u64 total_inline; + u64 total_seeks; + u64 forward_seeks; + u64 backward_seeks; + u64 total_seek_len; + u64 max_seek_len; + u64 total_clusters; + u64 total_cluster_size; + u64 min_cluster_size; + u64 max_cluster_size; + u64 lowest_bytenr; + u64 highest_bytenr; + struct rb_root seek_root; + int total_levels; +}; + +static int add_seek(struct rb_root *root, u64 dist) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct seek *seek = NULL; + + while (*p) { + parent = *p; + seek = rb_entry(parent, struct seek, n); + + if (dist < seek->distance) { + p = &(*p)->rb_left; + } else if (dist > seek->distance) { + p = &(*p)->rb_right; + } else { + seek->count++; + return 0; + } + } + + seek = malloc(sizeof(struct seek)); + if (!seek) + return -ENOMEM; + seek->distance = dist; + seek->count = 1; + rb_link_node(&seek->n, parent, p); + rb_insert_color(&seek->n, root); + return 0; +} + +static int walk_leaf(struct btrfs_root *root, struct btrfs_path *path, + struct root_stats *stat, int find_inline) +{ + struct extent_buffer *b = path->nodes[0]; + struct btrfs_file_extent_item *fi; + struct btrfs_key found_key; + int i; + + stat->total_bytes += root->leafsize; + stat->total_leaves++; + + if (!find_inline) + return 0; + + for (i = 0; i < btrfs_header_nritems(b); i++) { + btrfs_item_key_to_cpu(b, &found_key, i); + if (found_key.type != BTRFS_EXTENT_DATA_KEY) + continue; + + fi = btrfs_item_ptr(b, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(b, fi) == BTRFS_FILE_EXTENT_INLINE) + stat->total_inline += + btrfs_file_extent_inline_item_len(b, + btrfs_item_nr(i)); + } + + return 0; +} + +static u64 calc_distance(u64 block1, u64 block2) +{ + if (block1 < block2) + return block2 - block1; + return block1 - block2; +} + +static int walk_nodes(struct btrfs_root *root, struct btrfs_path *path, + struct root_stats *stat, int level, int find_inline) +{ + struct extent_buffer *b = path->nodes[level]; + u64 last_block; + u64 cluster_size = root->leafsize; + int i; + int ret = 0; + + stat->total_bytes += root->nodesize; + stat->total_nodes++; + + last_block = btrfs_header_bytenr(b); + for (i = 0; i < btrfs_header_nritems(b); i++) { + struct extent_buffer *tmp = NULL; + u64 cur_blocknr = btrfs_node_blockptr(b, i); + + path->slots[level] = i; + if ((level - 1) > 0 || find_inline) { + tmp = read_tree_block(root, cur_blocknr, + btrfs_level_size(root, level - 1), + btrfs_node_ptr_generation(b, i)); + if (!extent_buffer_uptodate(tmp)) { + fprintf(stderr, "Failed to read blocknr %Lu\n", + btrfs_node_blockptr(b, i)); + continue; + } + path->nodes[level - 1] = tmp; + } + if (level - 1) + ret = walk_nodes(root, path, stat, level - 1, + find_inline); + else + ret = walk_leaf(root, path, stat, find_inline); + if (last_block + root->leafsize != cur_blocknr) { + u64 distance = calc_distance(last_block + + root->leafsize, + cur_blocknr); + stat->total_seeks++; + stat->total_seek_len += distance; + if (stat->max_seek_len < distance) + stat->max_seek_len = distance; + if (add_seek(&stat->seek_root, distance)) { + fprintf(stderr, "Error adding new seek\n"); + ret = -ENOMEM; + break; + } + + if (last_block < cur_blocknr) + stat->forward_seeks++; + else + stat->backward_seeks++; + if (cluster_size != root->leafsize) { + stat->total_cluster_size += cluster_size; + stat->total_clusters++; + if (cluster_size < stat->min_cluster_size) + stat->min_cluster_size = cluster_size; + if (cluster_size > stat->max_cluster_size) + stat->max_cluster_size = cluster_size; + } + cluster_size = root->leafsize; + } else { + cluster_size += root->leafsize; + } + last_block = cur_blocknr; + if (cur_blocknr < stat->lowest_bytenr) + stat->lowest_bytenr = cur_blocknr; + if (cur_blocknr > stat->highest_bytenr) + stat->highest_bytenr = cur_blocknr; + free_extent_buffer(tmp); + if (ret) { + fprintf(stderr, "Error walking down path\n"); + break; + } + } + + return ret; +} + +static void print_seek_histogram(struct root_stats *stat) +{ + struct rb_node *n = rb_first(&stat->seek_root); + struct seek *seek; + u64 tick_interval; + u64 group_start = 0; + u64 group_count = 0; + u64 group_end = 0; + u64 i; + u64 max_seek = stat->max_seek_len; + int digits = 1; + + if (stat->total_seeks < 20) + return; + + while ((max_seek /= 10)) + digits++; + + /* Make a tick count as 5% of the total seeks */ + tick_interval = stat->total_seeks / 20; + printf("\tSeek histogram\n"); + for (; n; n = rb_next(n)) { + u64 ticks, gticks = 0; + + seek = rb_entry(n, struct seek, n); + ticks = seek->count / tick_interval; + if (group_count) + gticks = group_count / tick_interval; + + if (ticks <= 2 && gticks <= 2) { + if (group_count == 0) + group_start = seek->distance; + group_end = seek->distance; + group_count += seek->count; + continue; + } + + if (group_count) { + + gticks = group_count / tick_interval; + printf("\t\t%*Lu - %*Lu: %*Lu ", digits, group_start, + digits, group_end, digits, group_count); + if (gticks) { + for (i = 0; i < gticks; i++) + printf("#"); + printf("\n"); + } else { + printf("|\n"); + } + group_count = 0; + } + + if (ticks <= 2) + continue; + + printf("\t\t%*Lu - %*Lu: %*Lu ", digits, seek->distance, + digits, seek->distance, digits, seek->count); + for (i = 0; i < ticks; i++) + printf("#"); + printf("\n"); + } + if (group_count) { + u64 gticks; + + gticks = group_count / tick_interval; + printf("\t\t%*Lu - %*Lu: %*Lu ", digits, group_start, + digits, group_end, digits, group_count); + if (gticks) { + for (i = 0; i < gticks; i++) + printf("#"); + printf("\n"); + } else { + printf("|\n"); + } + group_count = 0; + } +} + +static void timeval_subtract(struct timeval *result,struct timeval *x, + struct timeval *y) +{ + if (x->tv_usec < y->tv_usec) { + int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1; + y->tv_usec -= 1000000 * nsec; + y->tv_sec += nsec; + } + + if (x->tv_usec - y->tv_usec > 1000000) { + int nsec = (x->tv_usec - y->tv_usec) / 1000000; + y->tv_usec += 1000000 * nsec; + y->tv_sec -= nsec; + } + + result->tv_sec = x->tv_sec - y->tv_sec; + result->tv_usec = x->tv_usec - y->tv_usec; +} + +static int calc_root_size(struct btrfs_root *tree_root, struct btrfs_key *key, + int find_inline) +{ + struct btrfs_root *root; + struct btrfs_path *path; + struct rb_node *n; + struct timeval start, end, diff = {0}; + struct root_stats stat; + int level; + int ret = 0; + int size_fail = 0; + + root = btrfs_read_fs_root(tree_root->fs_info, key); + if (IS_ERR(root)) { + fprintf(stderr, "Failed to read root %Lu\n", key->objectid); + return 1; + } + + path = btrfs_alloc_path(); + if (!path) { + fprintf(stderr, "Could not allocate path\n"); + return 1; + } + + memset(&stat, 0, sizeof(stat)); + level = btrfs_header_level(root->node); + stat.lowest_bytenr = btrfs_header_bytenr(root->node); + stat.highest_bytenr = stat.lowest_bytenr; + stat.min_cluster_size = (u64)-1; + stat.max_cluster_size = root->leafsize; + path->nodes[level] = root->node; + if (gettimeofday(&start, NULL)) { + fprintf(stderr, "Error getting time: %d\n", errno); + goto out; + } + if (!level) { + ret = walk_leaf(root, path, &stat, find_inline); + if (ret) + goto out; + goto out_print; + } + + ret = walk_nodes(root, path, &stat, level, find_inline); + if (ret) + goto out; + if (gettimeofday(&end, NULL)) { + fprintf(stderr, "Error getting time: %d\n", errno); + goto out; + } + timeval_subtract(&diff, &end, &start); +out_print: + if (stat.min_cluster_size == (u64)-1) { + stat.min_cluster_size = 0; + stat.total_clusters = 1; + } + + if (no_pretty || size_fail) { + printf("\tTotal size: %Lu\n", stat.total_bytes); + printf("\t\tInline data: %Lu\n", stat.total_inline); + printf("\tTotal seeks: %Lu\n", stat.total_seeks); + printf("\t\tForward seeks: %Lu\n", stat.forward_seeks); + printf("\t\tBackward seeks: %Lu\n", stat.backward_seeks); + printf("\t\tAvg seek len: %llu\n", stat.total_seeks ? + stat.total_seek_len / stat.total_seeks : 0); + print_seek_histogram(&stat); + printf("\tTotal clusters: %Lu\n", stat.total_clusters); + printf("\t\tAvg cluster size: %Lu\n", stat.total_cluster_size / + stat.total_clusters); + printf("\t\tMin cluster size: %Lu\n", stat.min_cluster_size); + printf("\t\tMax cluster size: %Lu\n", stat.max_cluster_size); + printf("\tTotal disk spread: %Lu\n", stat.highest_bytenr - + stat.lowest_bytenr); + printf("\tTotal read time: %d s %d us\n", (int)diff.tv_sec, + (int)diff.tv_usec); + printf("\tLevels: %d\n", level + 1); + } else { + printf("\tTotal size: %s\n", pretty_size(stat.total_bytes)); + printf("\t\tInline data: %s\n", pretty_size(stat.total_inline)); + printf("\tTotal seeks: %Lu\n", stat.total_seeks); + printf("\t\tForward seeks: %Lu\n", stat.forward_seeks); + printf("\t\tBackward seeks: %Lu\n", stat.backward_seeks); + printf("\t\tAvg seek len: %s\n", stat.total_seeks ? + pretty_size(stat.total_seek_len / stat.total_seeks) : + pretty_size(0)); + print_seek_histogram(&stat); + printf("\tTotal clusters: %Lu\n", stat.total_clusters); + printf("\t\tAvg cluster size: %s\n", + pretty_size((stat.total_cluster_size / + stat.total_clusters))); + printf("\t\tMin cluster size: %s\n", + pretty_size(stat.min_cluster_size)); + printf("\t\tMax cluster size: %s\n", + pretty_size(stat.max_cluster_size)); + printf("\tTotal disk spread: %s\n", + pretty_size(stat.highest_bytenr - + stat.lowest_bytenr)); + printf("\tTotal read time: %d s %d us\n", (int)diff.tv_sec, + (int)diff.tv_usec); + printf("\tLevels: %d\n", level + 1); + } +out: + while ((n = rb_first(&stat.seek_root)) != NULL) { + struct seek *seek = rb_entry(n, struct seek, n); + rb_erase(n, &stat.seek_root); + free(seek); + } + + /* + * We only use path to save node data in iterating, + * without holding eb's ref_cnt in path. + * Don't use btrfs_free_path() here, it will free these + * eb again, and cause many problems, as negative ref_cnt + * or invalid memory access. + */ + free(path); + return ret; +} + +static void usage(void) +{ + fprintf(stderr, "Usage: calc-size [-v] [-b] <device>\n"); +} + +int main(int argc, char **argv) +{ + struct btrfs_key key; + struct btrfs_root *root; + int opt; + int ret = 0; + + while ((opt = getopt(argc, argv, "vb")) != -1) { + switch (opt) { + case 'v': + verbose++; + break; + case 'b': + no_pretty = 1; + break; + default: + usage(); + exit(1); + } + } + + set_argv0(argv); + argc = argc - optind; + if (check_argc_min(argc, 1)) { + usage(); + exit(1); + } + + /* + if ((ret = check_mounted(argv[optind])) < 0) { + fprintf(stderr, "Could not check mount status: %d\n", ret); + if (ret == -EACCES) + fprintf(stderr, "Maybe you need to run as root?\n"); + return ret; + } else if (ret) { + fprintf(stderr, "%s is currently mounted. Aborting.\n", + argv[optind]); + return -EBUSY; + } + */ + + root = open_ctree(argv[optind], 0, 0); + if (!root) { + fprintf(stderr, "Couldn't open ctree\n"); + exit(1); + } + + printf("Calculating size of root tree\n"); + key.objectid = BTRFS_ROOT_TREE_OBJECTID; + ret = calc_root_size(root, &key, 0); + if (ret) + goto out; + + printf("Calculating size of extent tree\n"); + key.objectid = BTRFS_EXTENT_TREE_OBJECTID; + ret = calc_root_size(root, &key, 0); + if (ret) + goto out; + + printf("Calculating size of csum tree\n"); + key.objectid = BTRFS_CSUM_TREE_OBJECTID; + ret = calc_root_size(root, &key, 0); + if (ret) + goto out; + + key.objectid = BTRFS_FS_TREE_OBJECTID; + key.offset = (u64)-1; + printf("Calculatin' size of fs tree\n"); + ret = calc_root_size(root, &key, 1); + if (ret) + goto out; +out: + close_ctree(root); + btrfs_close_all_devices(); + return ret; +} diff --git a/btrfs-completion b/btrfs-completion new file mode 100644 index 00000000..a34191bd --- /dev/null +++ b/btrfs-completion @@ -0,0 +1,152 @@ +# original by Alfredo Esteban <aedelatorre at xxxxxxxxx> +# (http://www.spinics.net/lists/linux-btrfs/msg15899.html) +# edited by Joseph Wang <joequant at gmail.com> +# (http://lists.alioth.debian.org/pipermail/bash-completion-devel/2013-June/004868.html) +# edited by John C F <john.ch.fr at gmail.com> on 2015-02-02 + +_btrfs_devs() +{ + local DEVS + DEVS=''; while read dev; do DEVS+="$dev "; done < <(lsblk -pnro name) + COMPREPLY+=( $( compgen -W "$DEVS" -- "$cur" ) ) +} + +_btrfs_mnts() +{ + local MNTS + MNTS='' + while read mnt; do MNTS+="$mnt " + done < <(mount | awk '{print $3}') + COMPREPLY+=( $( compgen -W "$MNTS" -- "$cur" ) ) +} + +_btrfs() +{ + local cur prev words cword + _init_completion || return + + COMPREPLY=() + + local cmd=${words[1]} + + commands='subvolume filesystem balance device scrub check rescue restore inspect-internal property send receive quota qgroup replace help version' + commands_subvolume='create delete list snapshot find-new get-default set-default show sync' + commands_filesystem='defragment sync resize show df label usage' + commands_balance='start pause cancel resume status' + commands_device='scan add delete remove ready stats usage' + commands_scrub='start cancel resume status' + commands_rescue='chunk-recover super-recover' + commands_inspect_internal='inode-resolve logical-resolve subvolid-resolve rootid min-dev-size' + commands_property='get set list' + commands_quota='enable disable rescan' + commands_qgroup='assign remove create destroy show limit' + commands_replace='start status cancel' + + if [[ "$cur" == -* && $cword -le 3 && "$cmd" != "help" ]]; then + COMPREPLY=( $( compgen -W '--help' -- "$cur" ) ) + return 0 + fi + + if [[ $cword -eq 1 ]]; then + COMPREPLY=( $( compgen -W "$commands" -- "$cur" ) ) + return 0 + elif [[ $cword -eq 2 ]]; then + case $cmd in + subvolume) + opts="$commands_subvolume" + ;; + filesystem) + opts="$commands_filesystem" + ;; + balance) + opts="$commands_balance" + ;; + device) + opts="$commands_device" + ;; + scrub) + opts="$commands_scrub" + ;; + check) + _btrfs_devs + return 0 + ;; + rescue) + opts="$commands_rescue" + ;; + restore) + _btrfs_devs + return 0 + ;; + inspect-internal) + opts="$commands_inspect_internal" + ;; + property) + opts="$commands_property" + ;; + send|receive) + _filedir -d + return 0 + ;; + quota) + opts="$commands_quota" + ;; + qgroup) + opts="$commands_qgroup" + ;; + replace) + opts="$commands_replace" + ;; + help) + opts="--full" + ;; + version) + return 0 + ;; + esac + COMPREPLY=( $( compgen -W "$opts" -- "$cur" ) ) + return 0 + elif [[ $cword -eq 3 ]]; then + case $cmd in + filesystem) + case $prev in + defragment) + _filedir + return 0 + ;; + label) + _btrfs_mnts + _btrfs_devs + return 0 + ;; + esac + ;; + device|rescue) + _btrfs_devs + return 0 + ;; + replace) + case $prev in + status|cancel) + _btrfs_mnts + return 0 + ;; + start) + _btrfs_devs + return 0 + ;; + esac + ;; + esac + fi + + if [[ "$cmd" == "receive" && "$prev" == "-f" ]]; then + _filedir + return 0 + fi + + _filedir -d + return 0 +} + +complete -F _btrfs btrfs diff --git a/btrfs-convert.c b/btrfs-convert.c new file mode 100644 index 00000000..4baa68ec --- /dev/null +++ b/btrfs-convert.c @@ -0,0 +1,3206 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include "kerncompat.h" + +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <uuid/uuid.h> +#include <linux/limits.h> +#include <getopt.h> + +#include "ctree.h" +#include "disk-io.h" +#include "volumes.h" +#include "transaction.h" +#include "crc32c.h" +#include "utils.h" +#include "task-utils.h" +#include <ext2fs/ext2_fs.h> +#include <ext2fs/ext2fs.h> +#include <ext2fs/ext2_ext_attr.h> + +#define INO_OFFSET (BTRFS_FIRST_FREE_OBJECTID - EXT2_ROOT_INO) +#define CONV_IMAGE_SUBVOL_OBJECTID BTRFS_FIRST_FREE_OBJECTID + +struct task_ctx { + uint32_t max_copy_inodes; + uint32_t cur_copy_inodes; + struct task_info *info; +}; + +static void *print_copied_inodes(void *p) +{ + struct task_ctx *priv = p; + const char work_indicator[] = { '.', 'o', 'O', 'o' }; + uint32_t count = 0; + + task_period_start(priv->info, 1000 /* 1s */); + while (1) { + count++; + printf("copy inodes [%c] [%10d/%10d]\r", + work_indicator[count % 4], priv->cur_copy_inodes, + priv->max_copy_inodes); + fflush(stdout); + task_period_wait(priv->info); + } + + return NULL; +} + +static int after_copied_inodes(void *p) +{ + printf("\n"); + fflush(stdout); + + return 0; +} + +struct btrfs_convert_context; +struct btrfs_convert_operations { + const char *name; + int (*open_fs)(struct btrfs_convert_context *cctx, const char *devname); + int (*alloc_block)(struct btrfs_convert_context *cctx, u64 goal, + u64 *block_ret); + int (*alloc_block_range)(struct btrfs_convert_context *cctx, u64 goal, + int num, u64 *block_ret); + int (*test_block)(struct btrfs_convert_context *cctx, u64 block); + void (*free_block)(struct btrfs_convert_context *cctx, u64 block); + void (*free_block_range)(struct btrfs_convert_context *cctx, u64 block, + int num); + int (*copy_inodes)(struct btrfs_convert_context *cctx, + struct btrfs_root *root, int datacsum, + int packing, int noxattr, struct task_ctx *p); + void (*close_fs)(struct btrfs_convert_context *cctx); +}; + +struct btrfs_convert_context { + u32 blocksize; + u32 first_data_block; + u32 block_count; + u32 inodes_count; + u32 free_inodes_count; + u64 total_bytes; + char *volume_name; + const struct btrfs_convert_operations *convert_ops; + + /* The accurate used space of old filesystem */ + struct cache_tree used; + + /* Batched ranges which must be covered by data chunks */ + struct cache_tree data_chunks; + + /* Free space which is not covered by data_chunks */ + struct cache_tree free; + + void *fs_data; +}; + +static void init_convert_context(struct btrfs_convert_context *cctx) +{ + cache_tree_init(&cctx->used); + cache_tree_init(&cctx->data_chunks); + cache_tree_init(&cctx->free); +} + +static void clean_convert_context(struct btrfs_convert_context *cctx) +{ + free_extent_cache_tree(&cctx->used); + free_extent_cache_tree(&cctx->data_chunks); + free_extent_cache_tree(&cctx->free); +} + +static inline int convert_alloc_block(struct btrfs_convert_context *cctx, + u64 goal, u64 *ret) +{ + return cctx->convert_ops->alloc_block(cctx, goal, ret); +} + +static inline int convert_alloc_block_range(struct btrfs_convert_context *cctx, + u64 goal, int num, u64 *ret) +{ + return cctx->convert_ops->alloc_block_range(cctx, goal, num, ret); +} + +static inline int convert_test_block(struct btrfs_convert_context *cctx, + u64 block) +{ + return cctx->convert_ops->test_block(cctx, block); +} + +static inline void convert_free_block(struct btrfs_convert_context *cctx, + u64 block) +{ + cctx->convert_ops->free_block(cctx, block); +} + +static inline void convert_free_block_range(struct btrfs_convert_context *cctx, + u64 block, int num) +{ + cctx->convert_ops->free_block_range(cctx, block, num); +} + +static inline int copy_inodes(struct btrfs_convert_context *cctx, + struct btrfs_root *root, int datacsum, + int packing, int noxattr, struct task_ctx *p) +{ + return cctx->convert_ops->copy_inodes(cctx, root, datacsum, packing, + noxattr, p); +} + +static inline void convert_close_fs(struct btrfs_convert_context *cctx) +{ + cctx->convert_ops->close_fs(cctx); +} + +/* + * Open Ext2fs in readonly mode, read block allocation bitmap and + * inode bitmap into memory. + */ +static int ext2_open_fs(struct btrfs_convert_context *cctx, const char *name) +{ + errcode_t ret; + ext2_filsys ext2_fs; + ext2_ino_t ino; + ret = ext2fs_open(name, 0, 0, 0, unix_io_manager, &ext2_fs); + if (ret) { + fprintf(stderr, "ext2fs_open: %s\n", error_message(ret)); + goto fail; + } + ret = ext2fs_read_inode_bitmap(ext2_fs); + if (ret) { + fprintf(stderr, "ext2fs_read_inode_bitmap: %s\n", + error_message(ret)); + goto fail; + } + ret = ext2fs_read_block_bitmap(ext2_fs); + if (ret) { + fprintf(stderr, "ext2fs_read_block_bitmap: %s\n", + error_message(ret)); + goto fail; + } + /* + * search each block group for a free inode. this set up + * uninit block/inode bitmaps appropriately. + */ + ino = 1; + while (ino <= ext2_fs->super->s_inodes_count) { + ext2_ino_t foo; + ext2fs_new_inode(ext2_fs, ino, 0, NULL, &foo); + ino += EXT2_INODES_PER_GROUP(ext2_fs->super); + } + + if (!(ext2_fs->super->s_feature_incompat & + EXT2_FEATURE_INCOMPAT_FILETYPE)) { + fprintf(stderr, "filetype feature is missing\n"); + goto fail; + } + + cctx->fs_data = ext2_fs; + cctx->blocksize = ext2_fs->blocksize; + cctx->block_count = ext2_fs->super->s_blocks_count; + cctx->total_bytes = ext2_fs->blocksize * ext2_fs->super->s_blocks_count; + cctx->volume_name = strndup(ext2_fs->super->s_volume_name, 16); + cctx->first_data_block = ext2_fs->super->s_first_data_block; + cctx->inodes_count = ext2_fs->super->s_inodes_count; + cctx->free_inodes_count = ext2_fs->super->s_free_inodes_count; + return 0; +fail: + return -1; +} + +static void ext2_close_fs(struct btrfs_convert_context *cctx) +{ + if (cctx->volume_name) { + free(cctx->volume_name); + cctx->volume_name = NULL; + } + ext2fs_close(cctx->fs_data); +} + +static int ext2_alloc_block(struct btrfs_convert_context *cctx, + u64 goal, u64 *block_ret) +{ + ext2_filsys fs = cctx->fs_data; + blk_t block; + + if (!ext2fs_new_block(fs, goal, NULL, &block)) { + ext2fs_fast_mark_block_bitmap(fs->block_map, block); + *block_ret = block; + return 0; + } + return -ENOSPC; +} + +static int ext2_alloc_block_range(struct btrfs_convert_context *cctx, u64 goal, + int num, u64 *block_ret) +{ + ext2_filsys fs = cctx->fs_data; + blk_t block; + ext2fs_block_bitmap bitmap = fs->block_map; + blk_t start = ext2fs_get_block_bitmap_start(bitmap); + blk_t end = ext2fs_get_block_bitmap_end(bitmap); + + for (block = max_t(u64, goal, start); block + num < end; block++) { + if (ext2fs_fast_test_block_bitmap_range(bitmap, block, num)) { + ext2fs_fast_mark_block_bitmap_range(bitmap, block, + num); + *block_ret = block; + return 0; + } + } + return -ENOSPC; +} + +static void ext2_free_block(struct btrfs_convert_context *cctx, u64 block) +{ + ext2_filsys fs = cctx->fs_data; + + BUG_ON(block != (blk_t)block); + ext2fs_fast_unmark_block_bitmap(fs->block_map, block); +} + +static void ext2_free_block_range(struct btrfs_convert_context *cctx, u64 block, int num) +{ + ext2_filsys fs = cctx->fs_data; + + BUG_ON(block != (blk_t)block); + ext2fs_fast_unmark_block_bitmap_range(fs->block_map, block, num); +} + +static int cache_free_extents(struct btrfs_root *root, + struct btrfs_convert_context *cctx) + +{ + int i, ret = 0; + blk_t block; + u64 bytenr; + u64 blocksize = cctx->blocksize; + + block = cctx->first_data_block; + for (; block < cctx->block_count; block++) { + if (convert_test_block(cctx, block)) + continue; + bytenr = block * blocksize; + ret = set_extent_dirty(&root->fs_info->free_space_cache, + bytenr, bytenr + blocksize - 1, 0); + BUG_ON(ret); + } + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + bytenr &= ~((u64)BTRFS_STRIPE_LEN - 1); + if (bytenr >= blocksize * cctx->block_count) + break; + clear_extent_dirty(&root->fs_info->free_space_cache, bytenr, + bytenr + BTRFS_STRIPE_LEN - 1, 0); + } + + clear_extent_dirty(&root->fs_info->free_space_cache, + 0, BTRFS_SUPER_INFO_OFFSET - 1, 0); + + return 0; +} + +static int custom_alloc_extent(struct btrfs_root *root, u64 num_bytes, + u64 hint_byte, struct btrfs_key *ins, + int metadata) +{ + u64 start; + u64 end; + u64 last = hint_byte; + int ret; + int wrapped = 0; + struct btrfs_block_group_cache *cache; + + while(1) { + ret = find_first_extent_bit(&root->fs_info->free_space_cache, + last, &start, &end, EXTENT_DIRTY); + if (ret) { + if (wrapped++ == 0) { + last = 0; + continue; + } else { + goto fail; + } + } + + start = max(last, start); + last = end + 1; + if (last - start < num_bytes) + continue; + + last = start + num_bytes; + if (test_range_bit(&root->fs_info->pinned_extents, + start, last - 1, EXTENT_DIRTY, 0)) + continue; + + cache = btrfs_lookup_block_group(root->fs_info, start); + BUG_ON(!cache); + if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM || + last > cache->key.objectid + cache->key.offset) { + last = cache->key.objectid + cache->key.offset; + continue; + } + + if (metadata) { + BUG_ON(num_bytes != root->nodesize); + if (check_crossing_stripes(start, num_bytes)) { + last = round_down(start + num_bytes, + BTRFS_STRIPE_LEN); + continue; + } + } + clear_extent_dirty(&root->fs_info->free_space_cache, + start, start + num_bytes - 1, 0); + + ins->objectid = start; + ins->offset = num_bytes; + ins->type = BTRFS_EXTENT_ITEM_KEY; + return 0; + } +fail: + fprintf(stderr, "not enough free space\n"); + return -ENOSPC; +} + +static int intersect_with_sb(u64 bytenr, u64 num_bytes) +{ + int i; + u64 offset; + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + offset = btrfs_sb_offset(i); + offset &= ~((u64)BTRFS_STRIPE_LEN - 1); + + if (bytenr < offset + BTRFS_STRIPE_LEN && + bytenr + num_bytes > offset) + return 1; + } + return 0; +} + +static int custom_free_extent(struct btrfs_root *root, u64 bytenr, + u64 num_bytes) +{ + return intersect_with_sb(bytenr, num_bytes); +} + +static struct btrfs_extent_ops extent_ops = { + .alloc_extent = custom_alloc_extent, + .free_extent = custom_free_extent, +}; + +static int convert_insert_dirent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, size_t name_len, + u64 dir, u64 objectid, + u8 file_type, u64 index_cnt, + struct btrfs_inode_item *inode) +{ + int ret; + u64 inode_size; + struct btrfs_key location = { + .objectid = objectid, + .offset = 0, + .type = BTRFS_INODE_ITEM_KEY, + }; + + ret = btrfs_insert_dir_item(trans, root, name, name_len, + dir, &location, file_type, index_cnt); + if (ret) + return ret; + ret = btrfs_insert_inode_ref(trans, root, name, name_len, + objectid, dir, index_cnt); + if (ret) + return ret; + inode_size = btrfs_stack_inode_size(inode) + name_len * 2; + btrfs_set_stack_inode_size(inode, inode_size); + + return 0; +} + +struct dir_iterate_data { + struct btrfs_trans_handle *trans; + struct btrfs_root *root; + struct btrfs_inode_item *inode; + u64 objectid; + u64 index_cnt; + u64 parent; + int errcode; +}; + +static u8 filetype_conversion_table[EXT2_FT_MAX] = { + [EXT2_FT_UNKNOWN] = BTRFS_FT_UNKNOWN, + [EXT2_FT_REG_FILE] = BTRFS_FT_REG_FILE, + [EXT2_FT_DIR] = BTRFS_FT_DIR, + [EXT2_FT_CHRDEV] = BTRFS_FT_CHRDEV, + [EXT2_FT_BLKDEV] = BTRFS_FT_BLKDEV, + [EXT2_FT_FIFO] = BTRFS_FT_FIFO, + [EXT2_FT_SOCK] = BTRFS_FT_SOCK, + [EXT2_FT_SYMLINK] = BTRFS_FT_SYMLINK, +}; + +static int dir_iterate_proc(ext2_ino_t dir, int entry, + struct ext2_dir_entry *dirent, + int offset, int blocksize, + char *buf,void *priv_data) +{ + int ret; + int file_type; + u64 objectid; + char dotdot[] = ".."; + struct dir_iterate_data *idata = (struct dir_iterate_data *)priv_data; + int name_len; + + name_len = dirent->name_len & 0xFF; + + objectid = dirent->inode + INO_OFFSET; + if (!strncmp(dirent->name, dotdot, name_len)) { + if (name_len == 2) { + BUG_ON(idata->parent != 0); + idata->parent = objectid; + } + return 0; + } + if (dirent->inode < EXT2_GOOD_OLD_FIRST_INO) + return 0; + + file_type = dirent->name_len >> 8; + BUG_ON(file_type > EXT2_FT_SYMLINK); + + ret = convert_insert_dirent(idata->trans, idata->root, dirent->name, + name_len, idata->objectid, objectid, + filetype_conversion_table[file_type], + idata->index_cnt, idata->inode); + if (ret < 0) { + idata->errcode = ret; + return BLOCK_ABORT; + } + + idata->index_cnt++; + return 0; +} + +static int create_dir_entries(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *btrfs_inode, + ext2_filsys ext2_fs, ext2_ino_t ext2_ino) +{ + int ret; + errcode_t err; + struct dir_iterate_data data = { + .trans = trans, + .root = root, + .inode = btrfs_inode, + .objectid = objectid, + .index_cnt = 2, + .parent = 0, + .errcode = 0, + }; + + err = ext2fs_dir_iterate2(ext2_fs, ext2_ino, 0, NULL, + dir_iterate_proc, &data); + if (err) + goto error; + ret = data.errcode; + if (ret == 0 && data.parent == objectid) { + ret = btrfs_insert_inode_ref(trans, root, "..", 2, + objectid, objectid, 0); + } + return ret; +error: + fprintf(stderr, "ext2fs_dir_iterate2: %s\n", error_message(err)); + return -1; +} + +static int read_disk_extent(struct btrfs_root *root, u64 bytenr, + u32 num_bytes, char *buffer) +{ + int ret; + struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices; + + ret = pread(fs_devs->latest_bdev, buffer, num_bytes, bytenr); + if (ret != num_bytes) + goto fail; + ret = 0; +fail: + if (ret > 0) + ret = -1; + return ret; +} + +static int csum_disk_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 disk_bytenr, u64 num_bytes) +{ + u32 blocksize = root->sectorsize; + u64 offset; + char *buffer; + int ret = 0; + + buffer = malloc(blocksize); + if (!buffer) + return -ENOMEM; + for (offset = 0; offset < num_bytes; offset += blocksize) { + ret = read_disk_extent(root, disk_bytenr + offset, + blocksize, buffer); + if (ret) + break; + ret = btrfs_csum_file_block(trans, + root->fs_info->csum_root, + disk_bytenr + num_bytes, + disk_bytenr + offset, + buffer, blocksize); + if (ret) + break; + } + free(buffer); + return ret; +} + +struct blk_iterate_data { + struct btrfs_trans_handle *trans; + struct btrfs_root *root; + struct btrfs_inode_item *inode; + u64 objectid; + u64 first_block; + u64 disk_block; + u64 num_blocks; + u64 boundary; + int checksum; + int errcode; +}; + +static void init_blk_iterate_data(struct blk_iterate_data *data, + struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_inode_item *inode, + u64 objectid, int checksum) +{ + data->trans = trans; + data->root = root; + data->inode = inode; + data->objectid = objectid; + data->first_block = 0; + data->disk_block = 0; + data->num_blocks = 0; + data->boundary = (u64)-1; + data->checksum = checksum; + data->errcode = 0; +} + +static int record_file_blocks(struct blk_iterate_data *data, + u64 file_block, u64 disk_block, u64 num_blocks) +{ + int ret; + struct btrfs_root *root = data->root; + u64 file_pos = file_block * root->sectorsize; + u64 disk_bytenr = disk_block * root->sectorsize; + u64 num_bytes = num_blocks * root->sectorsize; + ret = btrfs_record_file_extent(data->trans, data->root, + data->objectid, data->inode, file_pos, + disk_bytenr, num_bytes); + + if (ret || !data->checksum || disk_bytenr == 0) + return ret; + + return csum_disk_extent(data->trans, data->root, disk_bytenr, + num_bytes); +} + +static int block_iterate_proc(u64 disk_block, u64 file_block, + struct blk_iterate_data *idata) +{ + int ret = 0; + int sb_region; + int do_barrier; + struct btrfs_root *root = idata->root; + struct btrfs_block_group_cache *cache; + u64 bytenr = disk_block * root->sectorsize; + + sb_region = intersect_with_sb(bytenr, root->sectorsize); + do_barrier = sb_region || disk_block >= idata->boundary; + if ((idata->num_blocks > 0 && do_barrier) || + (file_block > idata->first_block + idata->num_blocks) || + (disk_block != idata->disk_block + idata->num_blocks)) { + if (idata->num_blocks > 0) { + ret = record_file_blocks(idata, idata->first_block, + idata->disk_block, + idata->num_blocks); + if (ret) + goto fail; + idata->first_block += idata->num_blocks; + idata->num_blocks = 0; + } + if (file_block > idata->first_block) { + ret = record_file_blocks(idata, idata->first_block, + 0, file_block - idata->first_block); + if (ret) + goto fail; + } + + if (sb_region) { + bytenr += BTRFS_STRIPE_LEN - 1; + bytenr &= ~((u64)BTRFS_STRIPE_LEN - 1); + } else { + cache = btrfs_lookup_block_group(root->fs_info, bytenr); + BUG_ON(!cache); + bytenr = cache->key.objectid + cache->key.offset; + } + + idata->first_block = file_block; + idata->disk_block = disk_block; + idata->boundary = bytenr / root->sectorsize; + } + idata->num_blocks++; +fail: + return ret; +} + +static int __block_iterate_proc(ext2_filsys fs, blk_t *blocknr, + e2_blkcnt_t blockcnt, blk_t ref_block, + int ref_offset, void *priv_data) +{ + int ret; + struct blk_iterate_data *idata; + idata = (struct blk_iterate_data *)priv_data; + ret = block_iterate_proc(*blocknr, blockcnt, idata); + if (ret) { + idata->errcode = ret; + return BLOCK_ABORT; + } + return 0; +} + +/* + * traverse file's data blocks, record these data blocks as file extents. + */ +static int create_file_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *btrfs_inode, + ext2_filsys ext2_fs, ext2_ino_t ext2_ino, + int datacsum, int packing) +{ + int ret; + char *buffer = NULL; + errcode_t err; + u32 last_block; + u32 sectorsize = root->sectorsize; + u64 inode_size = btrfs_stack_inode_size(btrfs_inode); + struct blk_iterate_data data; + + init_blk_iterate_data(&data, trans, root, btrfs_inode, objectid, + datacsum); + + err = ext2fs_block_iterate2(ext2_fs, ext2_ino, BLOCK_FLAG_DATA_ONLY, + NULL, __block_iterate_proc, &data); + if (err) + goto error; + ret = data.errcode; + if (ret) + goto fail; + if (packing && data.first_block == 0 && data.num_blocks > 0 && + inode_size <= BTRFS_MAX_INLINE_DATA_SIZE(root)) { + u64 num_bytes = data.num_blocks * sectorsize; + u64 disk_bytenr = data.disk_block * sectorsize; + u64 nbytes; + + buffer = malloc(num_bytes); + if (!buffer) + return -ENOMEM; + ret = read_disk_extent(root, disk_bytenr, num_bytes, buffer); + if (ret) + goto fail; + if (num_bytes > inode_size) + num_bytes = inode_size; + ret = btrfs_insert_inline_extent(trans, root, objectid, + 0, buffer, num_bytes); + if (ret) + goto fail; + nbytes = btrfs_stack_inode_nbytes(btrfs_inode) + num_bytes; + btrfs_set_stack_inode_nbytes(btrfs_inode, nbytes); + } else if (data.num_blocks > 0) { + ret = record_file_blocks(&data, data.first_block, + data.disk_block, data.num_blocks); + if (ret) + goto fail; + } + data.first_block += data.num_blocks; + last_block = (inode_size + sectorsize - 1) / sectorsize; + if (last_block > data.first_block) { + ret = record_file_blocks(&data, data.first_block, 0, + last_block - data.first_block); + } +fail: + free(buffer); + return ret; +error: + fprintf(stderr, "ext2fs_block_iterate2: %s\n", error_message(err)); + return -1; +} + +static int create_symbol_link(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *btrfs_inode, + ext2_filsys ext2_fs, ext2_ino_t ext2_ino, + struct ext2_inode *ext2_inode) +{ + int ret; + char *pathname; + u64 inode_size = btrfs_stack_inode_size(btrfs_inode); + if (ext2fs_inode_data_blocks(ext2_fs, ext2_inode)) { + btrfs_set_stack_inode_size(btrfs_inode, inode_size + 1); + ret = create_file_extents(trans, root, objectid, btrfs_inode, + ext2_fs, ext2_ino, 1, 1); + btrfs_set_stack_inode_size(btrfs_inode, inode_size); + return ret; + } + + pathname = (char *)&(ext2_inode->i_block[0]); + BUG_ON(pathname[inode_size] != 0); + ret = btrfs_insert_inline_extent(trans, root, objectid, 0, + pathname, inode_size + 1); + btrfs_set_stack_inode_nbytes(btrfs_inode, inode_size + 1); + return ret; +} + +/* + * Following xattr/acl related codes are based on codes in + * fs/ext3/xattr.c and fs/ext3/acl.c + */ +#define EXT2_XATTR_BHDR(ptr) ((struct ext2_ext_attr_header *)(ptr)) +#define EXT2_XATTR_BFIRST(ptr) \ + ((struct ext2_ext_attr_entry *)(EXT2_XATTR_BHDR(ptr) + 1)) +#define EXT2_XATTR_IHDR(inode) \ + ((struct ext2_ext_attr_header *) ((void *)(inode) + \ + EXT2_GOOD_OLD_INODE_SIZE + (inode)->i_extra_isize)) +#define EXT2_XATTR_IFIRST(inode) \ + ((struct ext2_ext_attr_entry *) ((void *)EXT2_XATTR_IHDR(inode) + \ + sizeof(EXT2_XATTR_IHDR(inode)->h_magic))) + +static int ext2_xattr_check_names(struct ext2_ext_attr_entry *entry, + const void *end) +{ + struct ext2_ext_attr_entry *next; + + while (!EXT2_EXT_IS_LAST_ENTRY(entry)) { + next = EXT2_EXT_ATTR_NEXT(entry); + if ((void *)next >= end) + return -EIO; + entry = next; + } + return 0; +} + +static int ext2_xattr_check_block(const char *buf, size_t size) +{ + int error; + struct ext2_ext_attr_header *header = EXT2_XATTR_BHDR(buf); + + if (header->h_magic != EXT2_EXT_ATTR_MAGIC || + header->h_blocks != 1) + return -EIO; + error = ext2_xattr_check_names(EXT2_XATTR_BFIRST(buf), buf + size); + return error; +} + +static int ext2_xattr_check_entry(struct ext2_ext_attr_entry *entry, + size_t size) +{ + size_t value_size = entry->e_value_size; + + if (entry->e_value_block != 0 || value_size > size || + entry->e_value_offs + value_size > size) + return -EIO; + return 0; +} + +#define EXT2_ACL_VERSION 0x0001 + +/* 23.2.5 acl_tag_t values */ + +#define ACL_UNDEFINED_TAG (0x00) +#define ACL_USER_OBJ (0x01) +#define ACL_USER (0x02) +#define ACL_GROUP_OBJ (0x04) +#define ACL_GROUP (0x08) +#define ACL_MASK (0x10) +#define ACL_OTHER (0x20) + +/* 23.2.7 ACL qualifier constants */ + +#define ACL_UNDEFINED_ID ((id_t)-1) + +typedef struct { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +} ext2_acl_entry; + +typedef struct { + __le16 e_tag; + __le16 e_perm; +} ext2_acl_entry_short; + +typedef struct { + __le32 a_version; +} ext2_acl_header; + +static inline int ext2_acl_count(size_t size) +{ + ssize_t s; + size -= sizeof(ext2_acl_header); + s = size - 4 * sizeof(ext2_acl_entry_short); + if (s < 0) { + if (size % sizeof(ext2_acl_entry_short)) + return -1; + return size / sizeof(ext2_acl_entry_short); + } else { + if (s % sizeof(ext2_acl_entry)) + return -1; + return s / sizeof(ext2_acl_entry) + 4; + } +} + +#define ACL_EA_VERSION 0x0002 + +typedef struct { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +} acl_ea_entry; + +typedef struct { + __le32 a_version; + acl_ea_entry a_entries[0]; +} acl_ea_header; + +static inline size_t acl_ea_size(int count) +{ + return sizeof(acl_ea_header) + count * sizeof(acl_ea_entry); +} + +static int ext2_acl_to_xattr(void *dst, const void *src, + size_t dst_size, size_t src_size) +{ + int i, count; + const void *end = src + src_size; + acl_ea_header *ext_acl = (acl_ea_header *)dst; + acl_ea_entry *dst_entry = ext_acl->a_entries; + ext2_acl_entry *src_entry; + + if (src_size < sizeof(ext2_acl_header)) + goto fail; + if (((ext2_acl_header *)src)->a_version != + cpu_to_le32(EXT2_ACL_VERSION)) + goto fail; + src += sizeof(ext2_acl_header); + count = ext2_acl_count(src_size); + if (count <= 0) + goto fail; + + BUG_ON(dst_size < acl_ea_size(count)); + ext_acl->a_version = cpu_to_le32(ACL_EA_VERSION); + for (i = 0; i < count; i++, dst_entry++) { + src_entry = (ext2_acl_entry *)src; + if (src + sizeof(ext2_acl_entry_short) > end) + goto fail; + dst_entry->e_tag = src_entry->e_tag; + dst_entry->e_perm = src_entry->e_perm; + switch (le16_to_cpu(src_entry->e_tag)) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + src += sizeof(ext2_acl_entry_short); + dst_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID); + break; + case ACL_USER: + case ACL_GROUP: + src += sizeof(ext2_acl_entry); + if (src > end) + goto fail; + dst_entry->e_id = src_entry->e_id; + break; + default: + goto fail; + } + } + if (src != end) + goto fail; + return 0; +fail: + return -EINVAL; +} + +static char *xattr_prefix_table[] = { + [1] = "user.", + [2] = "system.posix_acl_access", + [3] = "system.posix_acl_default", + [4] = "trusted.", + [6] = "security.", +}; + +static int copy_single_xattr(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct ext2_ext_attr_entry *entry, + const void *data, u32 datalen) +{ + int ret = 0; + int name_len; + int name_index; + void *databuf = NULL; + char namebuf[XATTR_NAME_MAX + 1]; + + name_index = entry->e_name_index; + if (name_index >= ARRAY_SIZE(xattr_prefix_table) || + xattr_prefix_table[name_index] == NULL) + return -EOPNOTSUPP; + name_len = strlen(xattr_prefix_table[name_index]) + + entry->e_name_len; + if (name_len >= sizeof(namebuf)) + return -ERANGE; + + if (name_index == 2 || name_index == 3) { + size_t bufsize = acl_ea_size(ext2_acl_count(datalen)); + databuf = malloc(bufsize); + if (!databuf) + return -ENOMEM; + ret = ext2_acl_to_xattr(databuf, data, bufsize, datalen); + if (ret) + goto out; + data = databuf; + datalen = bufsize; + } + strncpy(namebuf, xattr_prefix_table[name_index], XATTR_NAME_MAX); + strncat(namebuf, EXT2_EXT_ATTR_NAME(entry), entry->e_name_len); + if (name_len + datalen > BTRFS_LEAF_DATA_SIZE(root) - + sizeof(struct btrfs_item) - sizeof(struct btrfs_dir_item)) { + fprintf(stderr, "skip large xattr on inode %Lu name %.*s\n", + objectid - INO_OFFSET, name_len, namebuf); + goto out; + } + ret = btrfs_insert_xattr_item(trans, root, namebuf, name_len, + data, datalen, objectid); +out: + free(databuf); + return ret; +} + +static int copy_extended_attrs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *btrfs_inode, + ext2_filsys ext2_fs, ext2_ino_t ext2_ino) +{ + int ret = 0; + int inline_ea = 0; + errcode_t err; + u32 datalen; + u32 block_size = ext2_fs->blocksize; + u32 inode_size = EXT2_INODE_SIZE(ext2_fs->super); + struct ext2_inode_large *ext2_inode; + struct ext2_ext_attr_entry *entry; + void *data; + char *buffer = NULL; + char inode_buf[EXT2_GOOD_OLD_INODE_SIZE]; + + if (inode_size <= EXT2_GOOD_OLD_INODE_SIZE) { + ext2_inode = (struct ext2_inode_large *)inode_buf; + } else { + ext2_inode = (struct ext2_inode_large *)malloc(inode_size); + if (!ext2_inode) + return -ENOMEM; + } + err = ext2fs_read_inode_full(ext2_fs, ext2_ino, (void *)ext2_inode, + inode_size); + if (err) { + fprintf(stderr, "ext2fs_read_inode_full: %s\n", + error_message(err)); + ret = -1; + goto out; + } + + if (ext2_ino > ext2_fs->super->s_first_ino && + inode_size > EXT2_GOOD_OLD_INODE_SIZE) { + if (EXT2_GOOD_OLD_INODE_SIZE + + ext2_inode->i_extra_isize > inode_size) { + ret = -EIO; + goto out; + } + if (ext2_inode->i_extra_isize != 0 && + EXT2_XATTR_IHDR(ext2_inode)->h_magic == + EXT2_EXT_ATTR_MAGIC) { + inline_ea = 1; + } + } + if (inline_ea) { + int total; + void *end = (void *)ext2_inode + inode_size; + entry = EXT2_XATTR_IFIRST(ext2_inode); + total = end - (void *)entry; + ret = ext2_xattr_check_names(entry, end); + if (ret) + goto out; + while (!EXT2_EXT_IS_LAST_ENTRY(entry)) { + ret = ext2_xattr_check_entry(entry, total); + if (ret) + goto out; + data = (void *)EXT2_XATTR_IFIRST(ext2_inode) + + entry->e_value_offs; + datalen = entry->e_value_size; + ret = copy_single_xattr(trans, root, objectid, + entry, data, datalen); + if (ret) + goto out; + entry = EXT2_EXT_ATTR_NEXT(entry); + } + } + + if (ext2_inode->i_file_acl == 0) + goto out; + + buffer = malloc(block_size); + if (!buffer) { + ret = -ENOMEM; + goto out; + } + err = ext2fs_read_ext_attr(ext2_fs, ext2_inode->i_file_acl, buffer); + if (err) { + fprintf(stderr, "ext2fs_read_ext_attr: %s\n", + error_message(err)); + ret = -1; + goto out; + } + ret = ext2_xattr_check_block(buffer, block_size); + if (ret) + goto out; + + entry = EXT2_XATTR_BFIRST(buffer); + while (!EXT2_EXT_IS_LAST_ENTRY(entry)) { + ret = ext2_xattr_check_entry(entry, block_size); + if (ret) + goto out; + data = buffer + entry->e_value_offs; + datalen = entry->e_value_size; + ret = copy_single_xattr(trans, root, objectid, + entry, data, datalen); + if (ret) + goto out; + entry = EXT2_EXT_ATTR_NEXT(entry); + } +out: + free(buffer); + if ((void *)ext2_inode != inode_buf) + free(ext2_inode); + return ret; +} +#define MINORBITS 20 +#define MKDEV(ma, mi) (((ma) << MINORBITS) | (mi)) + +static inline dev_t old_decode_dev(u16 val) +{ + return MKDEV((val >> 8) & 255, val & 255); +} + +static inline dev_t new_decode_dev(u32 dev) +{ + unsigned major = (dev & 0xfff00) >> 8; + unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00); + return MKDEV(major, minor); +} + +static int copy_inode_item(struct btrfs_inode_item *dst, + struct ext2_inode *src, u32 blocksize) +{ + btrfs_set_stack_inode_generation(dst, 1); + btrfs_set_stack_inode_sequence(dst, 0); + btrfs_set_stack_inode_transid(dst, 1); + btrfs_set_stack_inode_size(dst, src->i_size); + btrfs_set_stack_inode_nbytes(dst, 0); + btrfs_set_stack_inode_block_group(dst, 0); + btrfs_set_stack_inode_nlink(dst, src->i_links_count); + btrfs_set_stack_inode_uid(dst, src->i_uid | (src->i_uid_high << 16)); + btrfs_set_stack_inode_gid(dst, src->i_gid | (src->i_gid_high << 16)); + btrfs_set_stack_inode_mode(dst, src->i_mode); + btrfs_set_stack_inode_rdev(dst, 0); + btrfs_set_stack_inode_flags(dst, 0); + btrfs_set_stack_timespec_sec(&dst->atime, src->i_atime); + btrfs_set_stack_timespec_nsec(&dst->atime, 0); + btrfs_set_stack_timespec_sec(&dst->ctime, src->i_ctime); + btrfs_set_stack_timespec_nsec(&dst->ctime, 0); + btrfs_set_stack_timespec_sec(&dst->mtime, src->i_mtime); + btrfs_set_stack_timespec_nsec(&dst->mtime, 0); + btrfs_set_stack_timespec_sec(&dst->otime, 0); + btrfs_set_stack_timespec_nsec(&dst->otime, 0); + + if (S_ISDIR(src->i_mode)) { + btrfs_set_stack_inode_size(dst, 0); + btrfs_set_stack_inode_nlink(dst, 1); + } + if (S_ISREG(src->i_mode)) { + btrfs_set_stack_inode_size(dst, (u64)src->i_size_high << 32 | + (u64)src->i_size); + } + if (!S_ISREG(src->i_mode) && !S_ISDIR(src->i_mode) && + !S_ISLNK(src->i_mode)) { + if (src->i_block[0]) { + btrfs_set_stack_inode_rdev(dst, + old_decode_dev(src->i_block[0])); + } else { + btrfs_set_stack_inode_rdev(dst, + new_decode_dev(src->i_block[1])); + } + } + memset(&dst->reserved, 0, sizeof(dst->reserved)); + + return 0; +} + +/* + * copy a single inode. do all the required works, such as cloning + * inode item, creating file extents and creating directory entries. + */ +static int copy_single_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + ext2_filsys ext2_fs, ext2_ino_t ext2_ino, + struct ext2_inode *ext2_inode, + int datacsum, int packing, int noxattr) +{ + int ret; + struct btrfs_inode_item btrfs_inode; + + if (ext2_inode->i_links_count == 0) + return 0; + + copy_inode_item(&btrfs_inode, ext2_inode, ext2_fs->blocksize); + if (!datacsum && S_ISREG(ext2_inode->i_mode)) { + u32 flags = btrfs_stack_inode_flags(&btrfs_inode) | + BTRFS_INODE_NODATASUM; + btrfs_set_stack_inode_flags(&btrfs_inode, flags); + } + + switch (ext2_inode->i_mode & S_IFMT) { + case S_IFREG: + ret = create_file_extents(trans, root, objectid, &btrfs_inode, + ext2_fs, ext2_ino, datacsum, packing); + break; + case S_IFDIR: + ret = create_dir_entries(trans, root, objectid, &btrfs_inode, + ext2_fs, ext2_ino); + break; + case S_IFLNK: + ret = create_symbol_link(trans, root, objectid, &btrfs_inode, + ext2_fs, ext2_ino, ext2_inode); + break; + default: + ret = 0; + break; + } + if (ret) + return ret; + + if (!noxattr) { + ret = copy_extended_attrs(trans, root, objectid, &btrfs_inode, + ext2_fs, ext2_ino); + if (ret) + return ret; + } + return btrfs_insert_inode(trans, root, objectid, &btrfs_inode); +} + +static int copy_disk_extent(struct btrfs_root *root, u64 dst_bytenr, + u64 src_bytenr, u32 num_bytes) +{ + int ret; + char *buffer; + struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices; + + buffer = malloc(num_bytes); + if (!buffer) + return -ENOMEM; + ret = pread(fs_devs->latest_bdev, buffer, num_bytes, src_bytenr); + if (ret != num_bytes) + goto fail; + ret = pwrite(fs_devs->latest_bdev, buffer, num_bytes, dst_bytenr); + if (ret != num_bytes) + goto fail; + ret = 0; +fail: + free(buffer); + if (ret > 0) + ret = -1; + return ret; +} +/* + * scan ext2's inode bitmap and copy all used inodes. + */ +static int ext2_copy_inodes(struct btrfs_convert_context *cctx, + struct btrfs_root *root, + int datacsum, int packing, int noxattr, struct task_ctx *p) +{ + ext2_filsys ext2_fs = cctx->fs_data; + int ret; + errcode_t err; + ext2_inode_scan ext2_scan; + struct ext2_inode ext2_inode; + ext2_ino_t ext2_ino; + u64 objectid; + struct btrfs_trans_handle *trans; + + trans = btrfs_start_transaction(root, 1); + if (!trans) + return -ENOMEM; + err = ext2fs_open_inode_scan(ext2_fs, 0, &ext2_scan); + if (err) { + fprintf(stderr, "ext2fs_open_inode_scan: %s\n", error_message(err)); + return -1; + } + while (!(err = ext2fs_get_next_inode(ext2_scan, &ext2_ino, + &ext2_inode))) { + /* no more inodes */ + if (ext2_ino == 0) + break; + /* skip special inode in ext2fs */ + if (ext2_ino < EXT2_GOOD_OLD_FIRST_INO && + ext2_ino != EXT2_ROOT_INO) + continue; + objectid = ext2_ino + INO_OFFSET; + ret = copy_single_inode(trans, root, + objectid, ext2_fs, ext2_ino, + &ext2_inode, datacsum, packing, + noxattr); + p->cur_copy_inodes++; + if (ret) + return ret; + if (trans->blocks_used >= 4096) { + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + } + } + if (err) { + fprintf(stderr, "ext2fs_get_next_inode: %s\n", error_message(err)); + return -1; + } + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + ext2fs_close_inode_scan(ext2_scan); + + return ret; +} + +static int ext2_test_block(struct btrfs_convert_context *cctx, u64 block) +{ + ext2_filsys ext2_fs = cctx->fs_data; + + BUG_ON(block != (u32)block); + return ext2fs_fast_test_block_bitmap(ext2_fs->block_map, block); +} + +/* + * Construct a range of ext2fs image file. + * scan block allocation bitmap, find all blocks used by the ext2fs + * in this range and create file extents that point to these blocks. + * + * Note: Before calling the function, no file extent points to blocks + * in this range + */ +static int create_image_file_range(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *inode, + u64 start_byte, u64 end_byte, + struct btrfs_convert_context *cctx, int datacsum) +{ + u32 blocksize = cctx->blocksize; + u32 block = start_byte / blocksize; + u32 last_block = (end_byte + blocksize - 1) / blocksize; + int ret = 0; + struct blk_iterate_data data; + + init_blk_iterate_data(&data, trans, root, inode, objectid, datacsum); + data.first_block = block; + + for (; start_byte < end_byte; block++, start_byte += blocksize) { + if (!convert_test_block(cctx, block)) + continue; + ret = block_iterate_proc(block, block, &data); + if (ret < 0) + goto fail; + } + if (data.num_blocks > 0) { + ret = record_file_blocks(&data, data.first_block, + data.disk_block, data.num_blocks); + if (ret) + goto fail; + data.first_block += data.num_blocks; + } + if (last_block > data.first_block) { + ret = record_file_blocks(&data, data.first_block, 0, + last_block - data.first_block); + if (ret) + goto fail; + } +fail: + return ret; +} +/* + * Create the fs image file. + */ +static int create_image(struct btrfs_convert_context *cctx, + struct btrfs_root *root, const char *name, int datacsum) +{ + int ret; + struct btrfs_key key; + struct btrfs_key location; + struct btrfs_path path; + struct btrfs_inode_item btrfs_inode; + struct btrfs_inode_item *inode_item; + struct extent_buffer *leaf; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_trans_handle *trans; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct btrfs_extent_data_ref *dref; + u64 bytenr; + u64 num_bytes; + u64 objectid; + u64 last_byte; + u64 first_free; + u64 total_bytes; + u64 flags = BTRFS_INODE_READONLY; + u32 sectorsize = root->sectorsize; + + total_bytes = btrfs_super_total_bytes(fs_info->super_copy); + first_free = BTRFS_SUPER_INFO_OFFSET + sectorsize * 2 - 1; + first_free &= ~((u64)sectorsize - 1); + if (!datacsum) + flags |= BTRFS_INODE_NODATASUM; + + memset(&btrfs_inode, 0, sizeof(btrfs_inode)); + btrfs_set_stack_inode_generation(&btrfs_inode, 1); + btrfs_set_stack_inode_size(&btrfs_inode, total_bytes); + btrfs_set_stack_inode_nlink(&btrfs_inode, 1); + btrfs_set_stack_inode_nbytes(&btrfs_inode, 0); + btrfs_set_stack_inode_mode(&btrfs_inode, S_IFREG | 0400); + btrfs_set_stack_inode_flags(&btrfs_inode, flags); + btrfs_init_path(&path); + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + objectid = btrfs_root_dirid(&root->root_item); + ret = btrfs_find_free_objectid(trans, root, objectid, &objectid); + if (ret) + goto fail; + + /* + * copy blocks covered by extent #0 to new positions. extent #0 is + * special, we can't rely on relocate_extents_range to relocate it. + */ + for (last_byte = 0; last_byte < first_free; last_byte += sectorsize) { + ret = custom_alloc_extent(root, sectorsize, 0, &key, 0); + if (ret) + goto fail; + ret = copy_disk_extent(root, key.objectid, last_byte, + sectorsize); + if (ret) + goto fail; + ret = btrfs_record_file_extent(trans, root, objectid, + &btrfs_inode, last_byte, + key.objectid, sectorsize); + if (ret) + goto fail; + if (datacsum) { + ret = csum_disk_extent(trans, root, key.objectid, + sectorsize); + if (ret) + goto fail; + } + } + + while(1) { + key.objectid = last_byte; + key.offset = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + ret = btrfs_search_slot(trans, fs_info->extent_root, + &key, &path, 0, 0); + if (ret < 0) + goto fail; +next: + leaf = path.nodes[0]; + if (path.slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(extent_root, &path); + if (ret < 0) + goto fail; + if (ret > 0) + break; + leaf = path.nodes[0]; + } + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); + if (last_byte > key.objectid || + key.type != BTRFS_EXTENT_ITEM_KEY) { + path.slots[0]++; + goto next; + } + + bytenr = key.objectid; + num_bytes = key.offset; + ei = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_extent_item); + if (!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_DATA)) { + path.slots[0]++; + goto next; + } + + BUG_ON(btrfs_item_size_nr(leaf, path.slots[0]) != sizeof(*ei) + + btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)); + + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + key.type = btrfs_extent_inline_ref_type(leaf, iref); + BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY); + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + if (btrfs_extent_data_ref_root(leaf, dref) != + BTRFS_FS_TREE_OBJECTID) { + path.slots[0]++; + goto next; + } + + if (bytenr > last_byte) { + ret = create_image_file_range(trans, root, objectid, + &btrfs_inode, last_byte, + bytenr, cctx, + datacsum); + if (ret) + goto fail; + } + ret = btrfs_record_file_extent(trans, root, objectid, + &btrfs_inode, bytenr, bytenr, + num_bytes); + if (ret) + goto fail; + last_byte = bytenr + num_bytes; + btrfs_release_path(&path); + + if (trans->blocks_used >= 4096) { + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + } + } + btrfs_release_path(&path); + if (total_bytes > last_byte) { + ret = create_image_file_range(trans, root, objectid, + &btrfs_inode, last_byte, + total_bytes, cctx, + datacsum); + if (ret) + goto fail; + } + + ret = btrfs_insert_inode(trans, root, objectid, &btrfs_inode); + if (ret) + goto fail; + + location.objectid = objectid; + location.offset = 0; + btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY); + ret = btrfs_insert_dir_item(trans, root, name, strlen(name), + btrfs_root_dirid(&root->root_item), + &location, BTRFS_FT_REG_FILE, objectid); + if (ret) + goto fail; + ret = btrfs_insert_inode_ref(trans, root, name, strlen(name), + objectid, + btrfs_root_dirid(&root->root_item), + objectid); + if (ret) + goto fail; + location.objectid = btrfs_root_dirid(&root->root_item); + location.offset = 0; + btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY); + ret = btrfs_lookup_inode(trans, root, &path, &location, 1); + if (ret) + goto fail; + leaf = path.nodes[0]; + inode_item = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_inode_item); + btrfs_set_inode_size(leaf, inode_item, strlen(name) * 2 + + btrfs_inode_size(leaf, inode_item)); + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(&path); + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); +fail: + btrfs_release_path(&path); + return ret; +} + +static struct btrfs_root * link_subvol(struct btrfs_root *root, + const char *base, u64 root_objectid) +{ + struct btrfs_trans_handle *trans; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *new_root = NULL; + struct btrfs_path *path; + struct btrfs_inode_item *inode_item; + struct extent_buffer *leaf; + struct btrfs_key key; + u64 dirid = btrfs_root_dirid(&root->root_item); + u64 index = 2; + char buf[BTRFS_NAME_LEN + 1]; /* for snprintf null */ + int len; + int i; + int ret; + + len = strlen(base); + if (len == 0 || len > BTRFS_NAME_LEN) + return NULL; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + key.objectid = dirid; + key.type = BTRFS_DIR_INDEX_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + BUG_ON(ret <= 0); + + if (path->slots[0] > 0) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid == dirid && key.type == BTRFS_DIR_INDEX_KEY) + index = key.offset + 1; + } + btrfs_release_path(path); + + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + key.objectid = dirid; + key.offset = 0; + key.type = BTRFS_INODE_ITEM_KEY; + + ret = btrfs_lookup_inode(trans, root, path, &key, 1); + BUG_ON(ret); + leaf = path->nodes[0]; + inode_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_inode_item); + + key.objectid = root_objectid; + key.offset = (u64)-1; + key.type = BTRFS_ROOT_ITEM_KEY; + + memcpy(buf, base, len); + for (i = 0; i < 1024; i++) { + ret = btrfs_insert_dir_item(trans, root, buf, len, + dirid, &key, BTRFS_FT_DIR, index); + if (ret != -EEXIST) + break; + len = snprintf(buf, ARRAY_SIZE(buf), "%s%d", base, i); + if (len < 1 || len > BTRFS_NAME_LEN) { + ret = -EINVAL; + break; + } + } + if (ret) + goto fail; + + btrfs_set_inode_size(leaf, inode_item, len * 2 + + btrfs_inode_size(leaf, inode_item)); + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(path); + + /* add the backref first */ + ret = btrfs_add_root_ref(trans, tree_root, root_objectid, + BTRFS_ROOT_BACKREF_KEY, + root->root_key.objectid, + dirid, index, buf, len); + BUG_ON(ret); + + /* now add the forward ref */ + ret = btrfs_add_root_ref(trans, tree_root, root->root_key.objectid, + BTRFS_ROOT_REF_KEY, root_objectid, + dirid, index, buf, len); + + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + + new_root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(new_root)) + new_root = NULL; +fail: + btrfs_free_path(path); + return new_root; +} + +static int create_chunk_mapping(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_root *chunk_root = info->chunk_root; + struct btrfs_root *extent_root = info->extent_root; + struct btrfs_device *device; + struct btrfs_block_group_cache *cache; + struct btrfs_dev_extent *extent; + struct extent_buffer *leaf; + struct btrfs_chunk chunk; + struct btrfs_key key; + struct btrfs_path path; + u64 cur_start; + u64 total_bytes; + u64 chunk_objectid; + int ret; + + btrfs_init_path(&path); + + total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); + chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + + BUG_ON(list_empty(&info->fs_devices->devices)); + device = list_entry(info->fs_devices->devices.next, + struct btrfs_device, dev_list); + BUG_ON(device->devid != info->fs_devices->latest_devid); + + /* delete device extent created by make_btrfs */ + key.objectid = device->devid; + key.offset = 0; + key.type = BTRFS_DEV_EXTENT_KEY; + ret = btrfs_search_slot(trans, device->dev_root, &key, &path, -1, 1); + if (ret < 0) + goto err; + + BUG_ON(ret > 0); + ret = btrfs_del_item(trans, device->dev_root, &path); + if (ret) + goto err; + btrfs_release_path(&path); + + /* delete chunk item created by make_btrfs */ + key.objectid = chunk_objectid; + key.offset = 0; + key.type = BTRFS_CHUNK_ITEM_KEY; + ret = btrfs_search_slot(trans, chunk_root, &key, &path, -1, 1); + if (ret < 0) + goto err; + + BUG_ON(ret > 0); + ret = btrfs_del_item(trans, chunk_root, &path); + if (ret) + goto err; + btrfs_release_path(&path); + + /* for each block group, create device extent and chunk item */ + cur_start = 0; + while (cur_start < total_bytes) { + cache = btrfs_lookup_block_group(root->fs_info, cur_start); + BUG_ON(!cache); + + /* insert device extent */ + key.objectid = device->devid; + key.offset = cache->key.objectid; + key.type = BTRFS_DEV_EXTENT_KEY; + ret = btrfs_insert_empty_item(trans, device->dev_root, &path, + &key, sizeof(*extent)); + if (ret) + goto err; + + leaf = path.nodes[0]; + extent = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_dev_extent); + + btrfs_set_dev_extent_chunk_tree(leaf, extent, + chunk_root->root_key.objectid); + btrfs_set_dev_extent_chunk_objectid(leaf, extent, + chunk_objectid); + btrfs_set_dev_extent_chunk_offset(leaf, extent, + cache->key.objectid); + btrfs_set_dev_extent_length(leaf, extent, cache->key.offset); + write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, + (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent), + BTRFS_UUID_SIZE); + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(&path); + + /* insert chunk item */ + btrfs_set_stack_chunk_length(&chunk, cache->key.offset); + btrfs_set_stack_chunk_owner(&chunk, + extent_root->root_key.objectid); + btrfs_set_stack_chunk_stripe_len(&chunk, BTRFS_STRIPE_LEN); + btrfs_set_stack_chunk_type(&chunk, cache->flags); + btrfs_set_stack_chunk_io_align(&chunk, device->io_align); + btrfs_set_stack_chunk_io_width(&chunk, device->io_width); + btrfs_set_stack_chunk_sector_size(&chunk, device->sector_size); + btrfs_set_stack_chunk_num_stripes(&chunk, 1); + btrfs_set_stack_chunk_sub_stripes(&chunk, 0); + btrfs_set_stack_stripe_devid(&chunk.stripe, device->devid); + btrfs_set_stack_stripe_offset(&chunk.stripe, + cache->key.objectid); + memcpy(&chunk.stripe.dev_uuid, device->uuid, BTRFS_UUID_SIZE); + + key.objectid = chunk_objectid; + key.offset = cache->key.objectid; + key.type = BTRFS_CHUNK_ITEM_KEY; + + ret = btrfs_insert_item(trans, chunk_root, &key, &chunk, + btrfs_chunk_item_size(1)); + if (ret) + goto err; + + cur_start = cache->key.objectid + cache->key.offset; + } + + device->bytes_used = total_bytes; + ret = btrfs_update_device(trans, device); +err: + btrfs_release_path(&path); + return ret; +} + +static int create_subvol(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 root_objectid) +{ + struct extent_buffer *tmp; + struct btrfs_root *new_root; + struct btrfs_key key; + struct btrfs_root_item root_item; + int ret; + + ret = btrfs_copy_root(trans, root, root->node, &tmp, + root_objectid); + BUG_ON(ret); + + memcpy(&root_item, &root->root_item, sizeof(root_item)); + btrfs_set_root_bytenr(&root_item, tmp->start); + btrfs_set_root_level(&root_item, btrfs_header_level(tmp)); + btrfs_set_root_generation(&root_item, trans->transid); + free_extent_buffer(tmp); + + key.objectid = root_objectid; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = trans->transid; + ret = btrfs_insert_root(trans, root->fs_info->tree_root, + &key, &root_item); + + key.offset = (u64)-1; + new_root = btrfs_read_fs_root(root->fs_info, &key); + BUG_ON(!new_root || IS_ERR(new_root)); + + ret = btrfs_make_root_dir(trans, new_root, BTRFS_FIRST_FREE_OBJECTID); + BUG_ON(ret); + + return 0; +} + +static int init_btrfs(struct btrfs_root *root) +{ + int ret; + struct btrfs_key location; + struct btrfs_trans_handle *trans; + struct btrfs_fs_info *fs_info = root->fs_info; + struct extent_buffer *tmp; + + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + ret = btrfs_make_block_groups(trans, root); + if (ret) + goto err; + ret = btrfs_fix_block_accounting(trans, root); + if (ret) + goto err; + ret = create_chunk_mapping(trans, root); + if (ret) + goto err; + ret = btrfs_make_root_dir(trans, fs_info->tree_root, + BTRFS_ROOT_TREE_DIR_OBJECTID); + if (ret) + goto err; + memcpy(&location, &root->root_key, sizeof(location)); + location.offset = (u64)-1; + ret = btrfs_insert_dir_item(trans, fs_info->tree_root, "default", 7, + btrfs_super_root_dir(fs_info->super_copy), + &location, BTRFS_FT_DIR, 0); + if (ret) + goto err; + ret = btrfs_insert_inode_ref(trans, fs_info->tree_root, "default", 7, + location.objectid, + btrfs_super_root_dir(fs_info->super_copy), 0); + if (ret) + goto err; + btrfs_set_root_dirid(&fs_info->fs_root->root_item, + BTRFS_FIRST_FREE_OBJECTID); + + /* subvol for fs image file */ + ret = create_subvol(trans, root, CONV_IMAGE_SUBVOL_OBJECTID); + BUG_ON(ret); + /* subvol for data relocation */ + ret = create_subvol(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID); + BUG_ON(ret); + + extent_buffer_get(fs_info->csum_root->node); + ret = __btrfs_cow_block(trans, fs_info->csum_root, + fs_info->csum_root->node, NULL, 0, &tmp, 0, 0); + BUG_ON(ret); + free_extent_buffer(tmp); + + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); +err: + return ret; +} + +/* + * Migrate super block to its default position and zero 0 ~ 16k + */ +static int migrate_super_block(int fd, u64 old_bytenr, u32 sectorsize) +{ + int ret; + struct extent_buffer *buf; + struct btrfs_super_block *super; + u32 len; + u32 bytenr; + + BUG_ON(sectorsize < sizeof(*super)); + buf = malloc(sizeof(*buf) + sectorsize); + if (!buf) + return -ENOMEM; + + buf->len = sectorsize; + ret = pread(fd, buf->data, sectorsize, old_bytenr); + if (ret != sectorsize) + goto fail; + + super = (struct btrfs_super_block *)buf->data; + BUG_ON(btrfs_super_bytenr(super) != old_bytenr); + btrfs_set_super_bytenr(super, BTRFS_SUPER_INFO_OFFSET); + + csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0); + ret = pwrite(fd, buf->data, sectorsize, BTRFS_SUPER_INFO_OFFSET); + if (ret != sectorsize) + goto fail; + + ret = fsync(fd); + if (ret) + goto fail; + + memset(buf->data, 0, sectorsize); + for (bytenr = 0; bytenr < BTRFS_SUPER_INFO_OFFSET; ) { + len = BTRFS_SUPER_INFO_OFFSET - bytenr; + if (len > sectorsize) + len = sectorsize; + ret = pwrite(fd, buf->data, len, bytenr); + if (ret != len) { + fprintf(stderr, "unable to zero fill device\n"); + break; + } + bytenr += len; + } + ret = 0; + fsync(fd); +fail: + free(buf); + if (ret > 0) + ret = -1; + return ret; +} + +static int prepare_system_chunk_sb(struct btrfs_super_block *super) +{ + struct btrfs_chunk *chunk; + struct btrfs_disk_key *key; + u32 sectorsize = btrfs_super_sectorsize(super); + + key = (struct btrfs_disk_key *)(super->sys_chunk_array); + chunk = (struct btrfs_chunk *)(super->sys_chunk_array + + sizeof(struct btrfs_disk_key)); + + btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID); + btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY); + btrfs_set_disk_key_offset(key, 0); + + btrfs_set_stack_chunk_length(chunk, btrfs_super_total_bytes(super)); + btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID); + btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN); + btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM); + btrfs_set_stack_chunk_io_align(chunk, sectorsize); + btrfs_set_stack_chunk_io_width(chunk, sectorsize); + btrfs_set_stack_chunk_sector_size(chunk, sectorsize); + btrfs_set_stack_chunk_num_stripes(chunk, 1); + btrfs_set_stack_chunk_sub_stripes(chunk, 0); + chunk->stripe.devid = super->dev_item.devid; + btrfs_set_stack_stripe_offset(&chunk->stripe, 0); + memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE); + btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk)); + return 0; +} + +static int prepare_system_chunk(int fd, u64 sb_bytenr) +{ + int ret; + struct extent_buffer *buf; + struct btrfs_super_block *super; + + BUG_ON(BTRFS_SUPER_INFO_SIZE < sizeof(*super)); + buf = malloc(sizeof(*buf) + BTRFS_SUPER_INFO_SIZE); + if (!buf) + return -ENOMEM; + + buf->len = BTRFS_SUPER_INFO_SIZE; + ret = pread(fd, buf->data, BTRFS_SUPER_INFO_SIZE, sb_bytenr); + if (ret != BTRFS_SUPER_INFO_SIZE) + goto fail; + + super = (struct btrfs_super_block *)buf->data; + BUG_ON(btrfs_super_bytenr(super) != sb_bytenr); + BUG_ON(btrfs_super_num_devices(super) != 1); + + ret = prepare_system_chunk_sb(super); + if (ret) + goto fail; + + csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0); + ret = pwrite(fd, buf->data, BTRFS_SUPER_INFO_SIZE, sb_bytenr); + if (ret != BTRFS_SUPER_INFO_SIZE) + goto fail; + + ret = 0; +fail: + free(buf); + if (ret > 0) + ret = -1; + return ret; +} + +static int relocate_one_reference(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 extent_start, u64 extent_size, + struct btrfs_key *extent_key, + struct extent_io_tree *reloc_tree) +{ + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + struct btrfs_key key; + struct btrfs_path path; + struct btrfs_inode_item inode; + struct blk_iterate_data data; + u64 bytenr; + u64 num_bytes; + u64 cur_offset; + u64 new_pos; + u64 nbytes; + u64 sector_end; + u32 sectorsize = root->sectorsize; + unsigned long ptr; + int datacsum; + int fd; + int ret; + + btrfs_init_path(&path); + ret = btrfs_search_slot(trans, root, extent_key, &path, -1, 1); + if (ret) + goto fail; + + leaf = path.nodes[0]; + fi = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_file_extent_item); + BUG_ON(btrfs_file_extent_offset(leaf, fi) > 0); + if (extent_start != btrfs_file_extent_disk_bytenr(leaf, fi) || + extent_size != btrfs_file_extent_disk_num_bytes(leaf, fi)) { + ret = 1; + goto fail; + } + + bytenr = extent_start + btrfs_file_extent_offset(leaf, fi); + num_bytes = btrfs_file_extent_num_bytes(leaf, fi); + + ret = btrfs_del_item(trans, root, &path); + if (ret) + goto fail; + + ret = btrfs_free_extent(trans, root, extent_start, extent_size, 0, + root->root_key.objectid, + extent_key->objectid, extent_key->offset); + if (ret) + goto fail; + + btrfs_release_path(&path); + + key.objectid = extent_key->objectid; + key.offset = 0; + key.type = BTRFS_INODE_ITEM_KEY; + ret = btrfs_lookup_inode(trans, root, &path, &key, 0); + if (ret) + goto fail; + + leaf = path.nodes[0]; + ptr = btrfs_item_ptr_offset(leaf, path.slots[0]); + read_extent_buffer(leaf, &inode, ptr, sizeof(inode)); + btrfs_release_path(&path); + + BUG_ON(num_bytes & (sectorsize - 1)); + nbytes = btrfs_stack_inode_nbytes(&inode) - num_bytes; + btrfs_set_stack_inode_nbytes(&inode, nbytes); + datacsum = !(btrfs_stack_inode_flags(&inode) & BTRFS_INODE_NODATASUM); + + init_blk_iterate_data(&data, trans, root, &inode, extent_key->objectid, + datacsum); + data.first_block = extent_key->offset; + + cur_offset = extent_key->offset; + while (num_bytes > 0) { + sector_end = bytenr + sectorsize - 1; + if (test_range_bit(reloc_tree, bytenr, sector_end, + EXTENT_LOCKED, 1)) { + ret = get_state_private(reloc_tree, bytenr, &new_pos); + BUG_ON(ret); + } else { + ret = custom_alloc_extent(root, sectorsize, 0, &key, 0); + if (ret) + goto fail; + new_pos = key.objectid; + + if (cur_offset == extent_key->offset) { + fd = root->fs_info->fs_devices->latest_bdev; + readahead(fd, bytenr, num_bytes); + } + ret = copy_disk_extent(root, new_pos, bytenr, + sectorsize); + if (ret) + goto fail; + ret = set_extent_bits(reloc_tree, bytenr, sector_end, + EXTENT_LOCKED, GFP_NOFS); + BUG_ON(ret); + ret = set_state_private(reloc_tree, bytenr, new_pos); + BUG_ON(ret); + } + + ret = block_iterate_proc(new_pos / sectorsize, + cur_offset / sectorsize, &data); + if (ret < 0) + goto fail; + + cur_offset += sectorsize; + bytenr += sectorsize; + num_bytes -= sectorsize; + } + + if (data.num_blocks > 0) { + ret = record_file_blocks(&data, data.first_block, + data.disk_block, data.num_blocks); + if (ret) + goto fail; + } + + key.objectid = extent_key->objectid; + key.offset = 0; + key.type = BTRFS_INODE_ITEM_KEY; + ret = btrfs_lookup_inode(trans, root, &path, &key, 1); + if (ret) + goto fail; + + leaf = path.nodes[0]; + ptr = btrfs_item_ptr_offset(leaf, path.slots[0]); + write_extent_buffer(leaf, &inode, ptr, sizeof(inode)); + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(&path); + +fail: + btrfs_release_path(&path); + return ret; +} + +static int relocate_extents_range(struct btrfs_root *fs_root, + struct btrfs_root *image_root, + u64 start_byte, u64 end_byte) +{ + struct btrfs_fs_info *info = fs_root->fs_info; + struct btrfs_root *extent_root = info->extent_root; + struct btrfs_root *cur_root = NULL; + struct btrfs_trans_handle *trans; + struct btrfs_extent_data_ref *dref; + struct btrfs_extent_inline_ref *iref; + struct btrfs_extent_item *ei; + struct extent_buffer *leaf; + struct btrfs_key key; + struct btrfs_key extent_key; + struct btrfs_path path; + struct extent_io_tree reloc_tree; + unsigned long ptr; + unsigned long end; + u64 cur_byte; + u64 num_bytes; + u64 ref_root; + u64 num_extents; + int pass = 0; + int ret; + + btrfs_init_path(&path); + extent_io_tree_init(&reloc_tree); + + key.objectid = start_byte; + key.offset = 0; + key.type = BTRFS_EXTENT_ITEM_KEY; + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret < 0) + goto fail; + if (ret > 0) { + ret = btrfs_previous_item(extent_root, &path, 0, + BTRFS_EXTENT_ITEM_KEY); + if (ret < 0) + goto fail; + if (ret == 0) { + leaf = path.nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); + if (key.objectid + key.offset > start_byte) + start_byte = key.objectid; + } + } + btrfs_release_path(&path); +again: + cur_root = (pass % 2 == 0) ? image_root : fs_root; + num_extents = 0; + + trans = btrfs_start_transaction(cur_root, 1); + BUG_ON(!trans); + + cur_byte = start_byte; + while (1) { + key.objectid = cur_byte; + key.offset = 0; + key.type = BTRFS_EXTENT_ITEM_KEY; + ret = btrfs_search_slot(trans, extent_root, + &key, &path, 0, 0); + if (ret < 0) + goto fail; +next: + leaf = path.nodes[0]; + if (path.slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(extent_root, &path); + if (ret < 0) + goto fail; + if (ret > 0) + break; + leaf = path.nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); + if (key.objectid < cur_byte || + key.type != BTRFS_EXTENT_ITEM_KEY) { + path.slots[0]++; + goto next; + } + if (key.objectid >= end_byte) + break; + + num_extents++; + + cur_byte = key.objectid; + num_bytes = key.offset; + ei = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_extent_item); + BUG_ON(!(btrfs_extent_flags(leaf, ei) & + BTRFS_EXTENT_FLAG_DATA)); + + ptr = btrfs_item_ptr_offset(leaf, path.slots[0]); + end = ptr + btrfs_item_size_nr(leaf, path.slots[0]); + + ptr += sizeof(struct btrfs_extent_item); + + while (ptr < end) { + iref = (struct btrfs_extent_inline_ref *)ptr; + key.type = btrfs_extent_inline_ref_type(leaf, iref); + BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY); + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + ref_root = btrfs_extent_data_ref_root(leaf, dref); + extent_key.objectid = + btrfs_extent_data_ref_objectid(leaf, dref); + extent_key.offset = + btrfs_extent_data_ref_offset(leaf, dref); + extent_key.type = BTRFS_EXTENT_DATA_KEY; + BUG_ON(btrfs_extent_data_ref_count(leaf, dref) != 1); + + if (ref_root == cur_root->root_key.objectid) + break; + + ptr += btrfs_extent_inline_ref_size(key.type); + } + + if (ptr >= end) { + path.slots[0]++; + goto next; + } + + ret = relocate_one_reference(trans, cur_root, cur_byte, + num_bytes, &extent_key, + &reloc_tree); + if (ret < 0) + goto fail; + + cur_byte += num_bytes; + btrfs_release_path(&path); + + if (trans->blocks_used >= 4096) { + ret = btrfs_commit_transaction(trans, cur_root); + BUG_ON(ret); + trans = btrfs_start_transaction(cur_root, 1); + BUG_ON(!trans); + } + } + btrfs_release_path(&path); + + ret = btrfs_commit_transaction(trans, cur_root); + BUG_ON(ret); + + if (num_extents > 0 && pass++ < 16) + goto again; + + ret = (num_extents > 0) ? -1 : 0; +fail: + btrfs_release_path(&path); + extent_io_tree_cleanup(&reloc_tree); + return ret; +} + +/* + * relocate data in system chunk + */ +static int cleanup_sys_chunk(struct btrfs_root *fs_root, + struct btrfs_root *image_root) +{ + struct btrfs_block_group_cache *cache; + int i, ret = 0; + u64 offset = 0; + u64 end_byte; + + while(1) { + cache = btrfs_lookup_block_group(fs_root->fs_info, offset); + if (!cache) + break; + + end_byte = cache->key.objectid + cache->key.offset; + if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { + ret = relocate_extents_range(fs_root, image_root, + cache->key.objectid, + end_byte); + if (ret) + goto fail; + } + offset = end_byte; + } + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + offset = btrfs_sb_offset(i); + offset &= ~((u64)BTRFS_STRIPE_LEN - 1); + + ret = relocate_extents_range(fs_root, image_root, + offset, offset + BTRFS_STRIPE_LEN); + if (ret) + goto fail; + } + ret = 0; +fail: + return ret; +} + +static int fixup_chunk_mapping(struct btrfs_root *root) +{ + struct btrfs_trans_handle *trans; + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_root *chunk_root = info->chunk_root; + struct extent_buffer *leaf; + struct btrfs_key key; + struct btrfs_path path; + struct btrfs_chunk chunk; + unsigned long ptr; + u32 size; + u64 type; + int ret; + + btrfs_init_path(&path); + + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + /* + * recow the whole chunk tree. this will move all chunk tree blocks + * into system block group. + */ + memset(&key, 0, sizeof(key)); + while (1) { + ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1); + if (ret < 0) + goto err; + + ret = btrfs_next_leaf(chunk_root, &path); + if (ret < 0) + goto err; + if (ret > 0) + break; + + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + btrfs_release_path(&path); + } + btrfs_release_path(&path); + + /* fixup the system chunk array in super block */ + btrfs_set_super_sys_array_size(info->super_copy, 0); + + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.offset = 0; + key.type = BTRFS_CHUNK_ITEM_KEY; + + ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 0); + if (ret < 0) + goto err; + BUG_ON(ret != 0); + while(1) { + leaf = path.nodes[0]; + if (path.slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(chunk_root, &path); + if (ret < 0) + goto err; + if (ret > 0) + break; + leaf = path.nodes[0]; + } + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); + if (key.type != BTRFS_CHUNK_ITEM_KEY) + goto next; + + ptr = btrfs_item_ptr_offset(leaf, path.slots[0]); + size = btrfs_item_size_nr(leaf, path.slots[0]); + BUG_ON(size != sizeof(chunk)); + read_extent_buffer(leaf, &chunk, ptr, size); + type = btrfs_stack_chunk_type(&chunk); + + if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) + goto next; + + ret = btrfs_add_system_chunk(trans, chunk_root, &key, + &chunk, size); + if (ret) + goto err; +next: + path.slots[0]++; + } + + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); +err: + btrfs_release_path(&path); + return ret; +} + +static const struct btrfs_convert_operations ext2_convert_ops = { + .name = "ext2", + .open_fs = ext2_open_fs, + .alloc_block = ext2_alloc_block, + .alloc_block_range = ext2_alloc_block_range, + .copy_inodes = ext2_copy_inodes, + .test_block = ext2_test_block, + .free_block = ext2_free_block, + .free_block_range = ext2_free_block_range, + .close_fs = ext2_close_fs, +}; + +static const struct btrfs_convert_operations *convert_operations[] = { + &ext2_convert_ops, +}; + +static int convert_open_fs(const char *devname, + struct btrfs_convert_context *cctx) +{ + int i; + + memset(cctx, 0, sizeof(*cctx)); + + for (i = 0; i < ARRAY_SIZE(convert_operations); i++) { + int ret = convert_operations[i]->open_fs(cctx, devname); + + if (ret == 0) { + cctx->convert_ops = convert_operations[i]; + return ret; + } + } + + fprintf(stderr, "No file system found to convert.\n"); + return -1; +} + +static int do_convert(const char *devname, int datacsum, int packing, int noxattr, + u32 nodesize, int copylabel, const char *fslabel, int progress, + u64 features) +{ + int i, ret, blocks_per_node; + int fd = -1; + int is_btrfs = 0; + u32 blocksize; + u64 blocks[7]; + u64 total_bytes; + u64 super_bytenr; + struct btrfs_root *root; + struct btrfs_root *image_root; + struct btrfs_convert_context cctx; + char *subvol_name = NULL; + struct task_ctx ctx; + char features_buf[64]; + struct btrfs_mkfs_config mkfs_cfg; + + init_convert_context(&cctx); + ret = convert_open_fs(devname, &cctx); + if (ret) + goto fail; + + blocksize = cctx.blocksize; + total_bytes = (u64)blocksize * (u64)cctx.block_count; + if (blocksize < 4096) { + fprintf(stderr, "block size is too small\n"); + goto fail; + } + if (btrfs_check_nodesize(nodesize, blocksize, features)) + goto fail; + blocks_per_node = nodesize / blocksize; + ret = -blocks_per_node; + for (i = 0; i < 7; i++) { + if (nodesize == blocksize) + ret = convert_alloc_block(&cctx, 0, blocks + i); + else + ret = convert_alloc_block_range(&cctx, + ret + blocks_per_node, blocks_per_node, + blocks + i); + if (ret) { + fprintf(stderr, "not enough free space\n"); + goto fail; + } + blocks[i] *= blocksize; + } + super_bytenr = blocks[0]; + fd = open(devname, O_RDWR); + if (fd < 0) { + fprintf(stderr, "unable to open %s\n", devname); + goto fail; + } + btrfs_parse_features_to_string(features_buf, features); + if (features == BTRFS_MKFS_DEFAULT_FEATURES) + strcat(features_buf, " (default)"); + + printf("create btrfs filesystem:\n"); + printf("\tblocksize: %u\n", blocksize); + printf("\tnodesize: %u\n", nodesize); + printf("\tfeatures: %s\n", features_buf); + + mkfs_cfg.label = cctx.volume_name; + mkfs_cfg.fs_uuid = NULL; + memcpy(mkfs_cfg.blocks, blocks, sizeof(blocks)); + mkfs_cfg.num_bytes = total_bytes; + mkfs_cfg.nodesize = nodesize; + mkfs_cfg.sectorsize = blocksize; + mkfs_cfg.stripesize = blocksize; + mkfs_cfg.features = features; + + ret = make_btrfs(fd, &mkfs_cfg); + if (ret) { + fprintf(stderr, "unable to create initial ctree: %s\n", + strerror(-ret)); + goto fail; + } + /* create a system chunk that maps the whole device */ + ret = prepare_system_chunk(fd, super_bytenr); + if (ret) { + fprintf(stderr, "unable to update system chunk\n"); + goto fail; + } + root = open_ctree_fd(fd, devname, super_bytenr, OPEN_CTREE_WRITES); + if (!root) { + fprintf(stderr, "unable to open ctree\n"); + goto fail; + } + ret = cache_free_extents(root, &cctx); + if (ret) { + fprintf(stderr, "error during cache_free_extents %d\n", ret); + goto fail; + } + root->fs_info->extent_ops = &extent_ops; + /* recover block allocation bitmap */ + for (i = 0; i < 7; i++) { + blocks[i] /= blocksize; + if (nodesize == blocksize) + convert_free_block(&cctx, blocks[i]); + else + convert_free_block_range(&cctx, blocks[i], + blocks_per_node); + } + ret = init_btrfs(root); + if (ret) { + fprintf(stderr, "unable to setup the root tree\n"); + goto fail; + } + printf("creating btrfs metadata.\n"); + ctx.max_copy_inodes = (cctx.inodes_count - cctx.free_inodes_count); + ctx.cur_copy_inodes = 0; + + if (progress) { + ctx.info = task_init(print_copied_inodes, after_copied_inodes, &ctx); + task_start(ctx.info); + } + ret = copy_inodes(&cctx, root, datacsum, packing, noxattr, &ctx); + if (ret) { + fprintf(stderr, "error during copy_inodes %d\n", ret); + goto fail; + } + if (progress) { + task_stop(ctx.info); + task_deinit(ctx.info); + } + + printf("creating %s image file.\n", cctx.convert_ops->name); + ret = asprintf(&subvol_name, "%s_saved", cctx.convert_ops->name); + if (ret < 0) { + fprintf(stderr, "error allocating subvolume name: %s_saved\n", + cctx.convert_ops->name); + goto fail; + } + + image_root = link_subvol(root, subvol_name, CONV_IMAGE_SUBVOL_OBJECTID); + + free(subvol_name); + + if (!image_root) { + fprintf(stderr, "unable to create subvol\n"); + goto fail; + } + ret = create_image(&cctx, image_root, "image", datacsum); + if (ret) { + fprintf(stderr, "error during create_image %d\n", ret); + goto fail; + } + memset(root->fs_info->super_copy->label, 0, BTRFS_LABEL_SIZE); + if (copylabel == 1) { + strncpy(root->fs_info->super_copy->label, + cctx.volume_name, BTRFS_LABEL_SIZE); + fprintf(stderr, "copy label '%s'\n", + root->fs_info->super_copy->label); + } else if (copylabel == -1) { + strcpy(root->fs_info->super_copy->label, fslabel); + fprintf(stderr, "set label to '%s'\n", fslabel); + } + + printf("cleaning up system chunk.\n"); + ret = cleanup_sys_chunk(root, image_root); + if (ret) { + fprintf(stderr, "error during cleanup_sys_chunk %d\n", ret); + goto fail; + } + ret = close_ctree(root); + if (ret) { + fprintf(stderr, "error during close_ctree %d\n", ret); + goto fail; + } + convert_close_fs(&cctx); + clean_convert_context(&cctx); + + /* + * If this step succeed, we get a mountable btrfs. Otherwise + * the source fs is left unchanged. + */ + ret = migrate_super_block(fd, super_bytenr, blocksize); + if (ret) { + fprintf(stderr, "unable to migrate super block\n"); + goto fail; + } + is_btrfs = 1; + + root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES); + if (!root) { + fprintf(stderr, "unable to open ctree\n"); + goto fail; + } + /* move chunk tree into system chunk. */ + ret = fixup_chunk_mapping(root); + if (ret) { + fprintf(stderr, "error during fixup_chunk_tree\n"); + goto fail; + } + ret = close_ctree(root); + close(fd); + + printf("conversion complete.\n"); + return 0; +fail: + clean_convert_context(&cctx); + if (fd != -1) + close(fd); + if (is_btrfs) + fprintf(stderr, + "WARNING: an error occured during chunk mapping fixup, filesystem mountable but not finalized\n"); + else + fprintf(stderr, "conversion aborted\n"); + return -1; +} + +static int may_rollback(struct btrfs_root *root) +{ + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_multi_bio *multi = NULL; + u64 bytenr; + u64 length; + u64 physical; + u64 total_bytes; + int num_stripes; + int ret; + + if (btrfs_super_num_devices(info->super_copy) != 1) + goto fail; + + bytenr = BTRFS_SUPER_INFO_OFFSET; + total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); + + while (1) { + ret = btrfs_map_block(&info->mapping_tree, WRITE, bytenr, + &length, &multi, 0, NULL); + if (ret) { + if (ret == -ENOENT) { + /* removed block group at the tail */ + if (length == (u64)-1) + break; + + /* removed block group in the middle */ + goto next; + } + goto fail; + } + + num_stripes = multi->num_stripes; + physical = multi->stripes[0].physical; + kfree(multi); + + if (num_stripes != 1 || physical != bytenr) + goto fail; +next: + bytenr += length; + if (bytenr >= total_bytes) + break; + } + return 0; +fail: + return -1; +} + +static int do_rollback(const char *devname) +{ + int fd = -1; + int ret; + int i; + struct btrfs_root *root; + struct btrfs_root *image_root; + struct btrfs_root *chunk_root; + struct btrfs_dir_item *dir; + struct btrfs_inode_item *inode; + struct btrfs_file_extent_item *fi; + struct btrfs_trans_handle *trans; + struct extent_buffer *leaf; + struct btrfs_block_group_cache *cache1; + struct btrfs_block_group_cache *cache2; + struct btrfs_key key; + struct btrfs_path path; + struct extent_io_tree io_tree; + char *buf = NULL; + char *name; + u64 bytenr; + u64 num_bytes; + u64 root_dir; + u64 objectid; + u64 offset; + u64 start; + u64 end; + u64 sb_bytenr; + u64 first_free; + u64 total_bytes; + u32 sectorsize; + + extent_io_tree_init(&io_tree); + + fd = open(devname, O_RDWR); + if (fd < 0) { + fprintf(stderr, "unable to open %s\n", devname); + goto fail; + } + root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES); + if (!root) { + fprintf(stderr, "unable to open ctree\n"); + goto fail; + } + ret = may_rollback(root); + if (ret < 0) { + fprintf(stderr, "unable to do rollback\n"); + goto fail; + } + + sectorsize = root->sectorsize; + buf = malloc(sectorsize); + if (!buf) { + fprintf(stderr, "unable to allocate memory\n"); + goto fail; + } + + btrfs_init_path(&path); + + key.objectid = CONV_IMAGE_SUBVOL_OBJECTID; + key.type = BTRFS_ROOT_BACKREF_KEY; + key.offset = BTRFS_FS_TREE_OBJECTID; + ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path, 0, + 0); + btrfs_release_path(&path); + if (ret > 0) { + fprintf(stderr, + "ERROR: unable to convert ext2 image subvolume, is it deleted?\n"); + goto fail; + } else if (ret < 0) { + fprintf(stderr, + "ERROR: unable to open ext2_subvol, id=%llu: %s\n", + (unsigned long long)key.objectid, strerror(-ret)); + goto fail; + } + + key.objectid = CONV_IMAGE_SUBVOL_OBJECTID; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + image_root = btrfs_read_fs_root(root->fs_info, &key); + if (!image_root || IS_ERR(image_root)) { + fprintf(stderr, "unable to open subvol %llu\n", + (unsigned long long)key.objectid); + goto fail; + } + + name = "image"; + root_dir = btrfs_root_dirid(&root->root_item); + dir = btrfs_lookup_dir_item(NULL, image_root, &path, + root_dir, name, strlen(name), 0); + if (!dir || IS_ERR(dir)) { + fprintf(stderr, "unable to find file %s\n", name); + goto fail; + } + leaf = path.nodes[0]; + btrfs_dir_item_key_to_cpu(leaf, dir, &key); + btrfs_release_path(&path); + + objectid = key.objectid; + + ret = btrfs_lookup_inode(NULL, image_root, &path, &key, 0); + if (ret) { + fprintf(stderr, "unable to find inode item\n"); + goto fail; + } + leaf = path.nodes[0]; + inode = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_inode_item); + total_bytes = btrfs_inode_size(leaf, inode); + btrfs_release_path(&path); + + key.objectid = objectid; + key.offset = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + ret = btrfs_search_slot(NULL, image_root, &key, &path, 0, 0); + if (ret != 0) { + fprintf(stderr, "unable to find first file extent\n"); + btrfs_release_path(&path); + goto fail; + } + + /* build mapping tree for the relocated blocks */ + for (offset = 0; offset < total_bytes; ) { + leaf = path.nodes[0]; + if (path.slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, &path); + if (ret != 0) + break; + continue; + } + + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); + if (key.objectid != objectid || key.offset != offset || + btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + break; + + fi = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) + break; + if (btrfs_file_extent_compression(leaf, fi) || + btrfs_file_extent_encryption(leaf, fi) || + btrfs_file_extent_other_encoding(leaf, fi)) + break; + + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + /* skip holes and direct mapped extents */ + if (bytenr == 0 || bytenr == offset) + goto next_extent; + + bytenr += btrfs_file_extent_offset(leaf, fi); + num_bytes = btrfs_file_extent_num_bytes(leaf, fi); + + cache1 = btrfs_lookup_block_group(root->fs_info, offset); + cache2 = btrfs_lookup_block_group(root->fs_info, + offset + num_bytes - 1); + if (!cache1 || cache1 != cache2 || + (!(cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM) && + !intersect_with_sb(offset, num_bytes))) + break; + + set_extent_bits(&io_tree, offset, offset + num_bytes - 1, + EXTENT_LOCKED, GFP_NOFS); + set_state_private(&io_tree, offset, bytenr); +next_extent: + offset += btrfs_file_extent_num_bytes(leaf, fi); + path.slots[0]++; + } + btrfs_release_path(&path); + + if (offset < total_bytes) { + fprintf(stderr, "unable to build extent mapping\n"); + goto fail; + } + + first_free = BTRFS_SUPER_INFO_OFFSET + 2 * sectorsize - 1; + first_free &= ~((u64)sectorsize - 1); + /* backup for extent #0 should exist */ + if(!test_range_bit(&io_tree, 0, first_free - 1, EXTENT_LOCKED, 1)) { + fprintf(stderr, "no backup for the first extent\n"); + goto fail; + } + /* force no allocation from system block group */ + root->fs_info->system_allocs = -1; + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + /* + * recow the whole chunk tree, this will remove all chunk tree blocks + * from system block group + */ + chunk_root = root->fs_info->chunk_root; + memset(&key, 0, sizeof(key)); + while (1) { + ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1); + if (ret < 0) + break; + + ret = btrfs_next_leaf(chunk_root, &path); + if (ret) + break; + + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + btrfs_release_path(&path); + } + btrfs_release_path(&path); + + offset = 0; + num_bytes = 0; + while(1) { + cache1 = btrfs_lookup_block_group(root->fs_info, offset); + if (!cache1) + break; + + if (cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM) + num_bytes += btrfs_block_group_used(&cache1->item); + + offset = cache1->key.objectid + cache1->key.offset; + } + /* only extent #0 left in system block group? */ + if (num_bytes > first_free) { + fprintf(stderr, "unable to empty system block group\n"); + goto fail; + } + /* create a system chunk that maps the whole device */ + ret = prepare_system_chunk_sb(root->fs_info->super_copy); + if (ret) { + fprintf(stderr, "unable to update system chunk\n"); + goto fail; + } + + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + + ret = close_ctree(root); + if (ret) { + fprintf(stderr, "error during close_ctree %d\n", ret); + goto fail; + } + + /* zero btrfs super block mirrors */ + memset(buf, 0, sectorsize); + for (i = 1 ; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + if (bytenr >= total_bytes) + break; + ret = pwrite(fd, buf, sectorsize, bytenr); + if (ret != sectorsize) { + fprintf(stderr, + "error during zeroing supreblock %d: %d\n", + i, ret); + goto fail; + } + } + + sb_bytenr = (u64)-1; + /* copy all relocated blocks back */ + while(1) { + ret = find_first_extent_bit(&io_tree, 0, &start, &end, + EXTENT_LOCKED); + if (ret) + break; + + ret = get_state_private(&io_tree, start, &bytenr); + BUG_ON(ret); + + clear_extent_bits(&io_tree, start, end, EXTENT_LOCKED, + GFP_NOFS); + + while (start <= end) { + if (start == BTRFS_SUPER_INFO_OFFSET) { + sb_bytenr = bytenr; + goto next_sector; + } + ret = pread(fd, buf, sectorsize, bytenr); + if (ret < 0) { + fprintf(stderr, "error during pread %d\n", ret); + goto fail; + } + BUG_ON(ret != sectorsize); + ret = pwrite(fd, buf, sectorsize, start); + if (ret < 0) { + fprintf(stderr, "error during pwrite %d\n", ret); + goto fail; + } + BUG_ON(ret != sectorsize); +next_sector: + start += sectorsize; + bytenr += sectorsize; + } + } + + ret = fsync(fd); + if (ret) { + fprintf(stderr, "error during fsync %d\n", ret); + goto fail; + } + /* + * finally, overwrite btrfs super block. + */ + ret = pread(fd, buf, sectorsize, sb_bytenr); + if (ret < 0) { + fprintf(stderr, "error during pread %d\n", ret); + goto fail; + } + BUG_ON(ret != sectorsize); + ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET); + if (ret < 0) { + fprintf(stderr, "error during pwrite %d\n", ret); + goto fail; + } + BUG_ON(ret != sectorsize); + ret = fsync(fd); + if (ret) { + fprintf(stderr, "error during fsync %d\n", ret); + goto fail; + } + + close(fd); + free(buf); + extent_io_tree_cleanup(&io_tree); + printf("rollback complete.\n"); + return 0; + +fail: + if (fd != -1) + close(fd); + free(buf); + fprintf(stderr, "rollback aborted.\n"); + return -1; +} + +static void print_usage(void) +{ + printf("usage: btrfs-convert [options] device\n"); + printf("options:\n"); + printf("\t-d|--no-datasum disable data checksum, sets NODATASUM\n"); + printf("\t-i|--no-xattr ignore xattrs and ACLs\n"); + printf("\t-n|--no-inline disable inlining of small files to metadata\n"); + printf("\t-N|--nodesize SIZE set filesystem metadata nodesize\n"); + printf("\t-r|--rollback roll back to the original filesystem\n"); + printf("\t-l|--label LABEL set filesystem label\n"); + printf("\t-L|--copy-label use label from converted filesystem\n"); + printf("\t-p|--progress show converting progress (default)\n"); + printf("\t-O|--features LIST comma separated list of filesystem features\n"); + printf("\t--no-progress show only overview, not the detailed progress\n"); +} + +int main(int argc, char *argv[]) +{ + int ret; + int packing = 1; + int noxattr = 0; + int datacsum = 1; + u32 nodesize = max_t(u32, sysconf(_SC_PAGESIZE), + BTRFS_MKFS_DEFAULT_NODE_SIZE); + int rollback = 0; + int copylabel = 0; + int usage_error = 0; + int progress = 1; + char *file; + char fslabel[BTRFS_LABEL_SIZE]; + u64 features = BTRFS_MKFS_DEFAULT_FEATURES; + + while(1) { + enum { GETOPT_VAL_NO_PROGRESS = 256 }; + static const struct option long_options[] = { + { "no-progress", no_argument, NULL, + GETOPT_VAL_NO_PROGRESS }, + { "no-datasum", no_argument, NULL, 'd' }, + { "no-inline", no_argument, NULL, 'n' }, + { "no-xattr", no_argument, NULL, 'i' }, + { "rollback", no_argument, NULL, 'r' }, + { "features", required_argument, NULL, 'O' }, + { "progress", no_argument, NULL, 'p' }, + { "label", required_argument, NULL, 'l' }, + { "copy-label", no_argument, NULL, 'L' }, + { "nodesize", required_argument, NULL, 'N' }, + { "help", no_argument, NULL, GETOPT_VAL_HELP}, + { NULL, 0, NULL, 0 } + }; + int c = getopt_long(argc, argv, "dinN:rl:LpO:", long_options, NULL); + + if (c < 0) + break; + switch(c) { + case 'd': + datacsum = 0; + break; + case 'i': + noxattr = 1; + break; + case 'n': + packing = 0; + break; + case 'N': + nodesize = parse_size(optarg); + break; + case 'r': + rollback = 1; + break; + case 'l': + copylabel = -1; + if (strlen(optarg) >= BTRFS_LABEL_SIZE) { + fprintf(stderr, + "WARNING: label too long, trimmed to %d bytes\n", + BTRFS_LABEL_SIZE - 1); + } + strncpy(fslabel, optarg, BTRFS_LABEL_SIZE - 1); + fslabel[BTRFS_LABEL_SIZE - 1] = 0; + break; + case 'L': + copylabel = 1; + break; + case 'p': + progress = 1; + break; + case 'O': { + char *orig = strdup(optarg); + char *tmp = orig; + + tmp = btrfs_parse_fs_features(tmp, &features); + if (tmp) { + fprintf(stderr, + "Unrecognized filesystem feature '%s'\n", + tmp); + free(orig); + exit(1); + } + free(orig); + if (features & BTRFS_FEATURE_LIST_ALL) { + btrfs_list_all_fs_features( + ~BTRFS_CONVERT_ALLOWED_FEATURES); + exit(0); + } + if (features & ~BTRFS_CONVERT_ALLOWED_FEATURES) { + char buf[64]; + + btrfs_parse_features_to_string(buf, + features & ~BTRFS_CONVERT_ALLOWED_FEATURES); + fprintf(stderr, + "ERROR: features not allowed for convert: %s\n", + buf); + exit(1); + } + + break; + } + case GETOPT_VAL_NO_PROGRESS: + progress = 0; + break; + case GETOPT_VAL_HELP: + default: + print_usage(); + return c != GETOPT_VAL_HELP; + } + } + argc = argc - optind; + set_argv0(argv); + if (check_argc_exact(argc, 1)) { + print_usage(); + return 1; + } + + if (rollback && (!datacsum || noxattr || !packing)) { + fprintf(stderr, + "Usage error: -d, -i, -n options do not apply to rollback\n"); + usage_error++; + } + + if (usage_error) { + print_usage(); + return 1; + } + + file = argv[optind]; + ret = check_mounted(file); + if (ret < 0) { + fprintf(stderr, "Could not check mount status: %s\n", + strerror(-ret)); + return 1; + } else if (ret) { + fprintf(stderr, "%s is mounted\n", file); + return 1; + } + + if (rollback) { + ret = do_rollback(file); + } else { + ret = do_convert(file, datacsum, packing, noxattr, nodesize, + copylabel, fslabel, progress, features); + } + if (ret) + return 1; + return 0; +} diff --git a/btrfs-corrupt-block.c b/btrfs-corrupt-block.c new file mode 100644 index 00000000..be5cd7ea --- /dev/null +++ b/btrfs-corrupt-block.c @@ -0,0 +1,1310 @@ +/* + * Copyright (C) 2009 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <limits.h> + +#include "kerncompat.h" +#include "ctree.h" +#include "volumes.h" +#include "disk-io.h" +#include "print-tree.h" +#include "transaction.h" +#include "list.h" +#include "utils.h" + +#define FIELD_BUF_LEN 80 + +static struct extent_buffer *debug_corrupt_block(struct btrfs_root *root, + u64 bytenr, u32 blocksize, u64 copy) +{ + int ret; + struct extent_buffer *eb; + u64 length; + struct btrfs_multi_bio *multi = NULL; + struct btrfs_device *device; + int num_copies; + int mirror_num = 1; + + eb = btrfs_find_create_tree_block(root->fs_info, bytenr, blocksize); + if (!eb) + return NULL; + + length = blocksize; + while (1) { + ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, + eb->start, &length, &multi, + mirror_num, NULL); + BUG_ON(ret); + device = multi->stripes[0].dev; + eb->fd = device->fd; + device->total_ios++; + eb->dev_bytenr = multi->stripes[0].physical; + + fprintf(stdout, + "mirror %d logical %llu physical %llu device %s\n", + mirror_num, (unsigned long long)bytenr, + (unsigned long long)eb->dev_bytenr, device->name); + kfree(multi); + + if (!copy || mirror_num == copy) { + ret = read_extent_from_disk(eb, 0, eb->len); + printf("corrupting %llu copy %d\n", eb->start, + mirror_num); + memset(eb->data, 0, eb->len); + write_extent_to_disk(eb); + fsync(eb->fd); + } + + num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, + eb->start, eb->len); + if (num_copies == 1) + break; + + mirror_num++; + if (mirror_num > num_copies) + break; + } + return eb; +} + +static void print_usage(int ret) +{ + fprintf(stderr, "usage: btrfs-corrupt-block [options] device\n"); + fprintf(stderr, "\t-l Logical extent to be corrupted\n"); + fprintf(stderr, "\t-c Copy of the extent to be corrupted" + " (usually 1 or 2, default: 0)\n"); + fprintf(stderr, "\t-b Number of bytes to be corrupted\n"); + fprintf(stderr, "\t-e Extent to be corrupted\n"); + fprintf(stderr, "\t-E The whole extent tree to be corrupted\n"); + fprintf(stderr, "\t-u Given chunk item to be corrupted\n"); + fprintf(stderr, "\t-U The whole chunk tree to be corrupted\n"); + fprintf(stderr, "\t-i The inode item to corrupt (must also specify " + "the field to corrupt)\n"); + fprintf(stderr, "\t-x The file extent item to corrupt (must also " + "specify -i for the inode and -f for the field to corrupt)\n"); + fprintf(stderr, "\t-m The metadata block to corrupt (must also " + "specify -f for the field to corrupt)\n"); + fprintf(stderr, "\t-K The key to corrupt in the format " + "<num>,<num>,<num> (must also specify -f for the field)\n"); + fprintf(stderr, "\t-f The field in the item to corrupt\n"); + fprintf(stderr, "\t-I An item to corrupt (must also specify the field " + "to corrupt and a root+key for the item)\n"); + fprintf(stderr, "\t-D Corrupt a dir item, must specify key and field\n"); + fprintf(stderr, "\t-d Delete this item (must specify -K)\n"); + fprintf(stderr, "\t-r Operate on this root (only works with -d)\n"); + fprintf(stderr, "\t-C Delete a csum for the specified bytenr. When " + "used with -b it'll delete that many bytes, otherwise it's " + "just sectorsize\n"); + exit(ret); +} + +static void corrupt_keys(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *eb) +{ + int slot; + int bad_slot; + int nr; + struct btrfs_disk_key bad_key;; + + nr = btrfs_header_nritems(eb); + if (nr == 0) + return; + + slot = rand() % nr; + bad_slot = rand() % nr; + + if (bad_slot == slot) + return; + + fprintf(stderr, + "corrupting keys in block %llu slot %d swapping with %d\n", + (unsigned long long)eb->start, slot, bad_slot); + + if (btrfs_header_level(eb) == 0) { + btrfs_item_key(eb, &bad_key, bad_slot); + btrfs_set_item_key(eb, &bad_key, slot); + } else { + btrfs_node_key(eb, &bad_key, bad_slot); + btrfs_set_node_key(eb, &bad_key, slot); + } + btrfs_mark_buffer_dirty(eb); + if (!trans) { + u16 csum_size = + btrfs_super_csum_size(root->fs_info->super_copy); + csum_tree_block_size(eb, csum_size, 0); + write_extent_to_disk(eb); + } +} + + +static int corrupt_keys_in_block(struct btrfs_root *root, u64 bytenr) +{ + struct extent_buffer *eb; + + eb = read_tree_block(root, bytenr, root->leafsize, 0); + if (!extent_buffer_uptodate(eb)) + return -EIO;; + + corrupt_keys(NULL, root, eb); + free_extent_buffer(eb); + return 0; +} + +static int corrupt_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, u64 copy) +{ + struct btrfs_key key; + struct extent_buffer *leaf; + u32 item_size; + unsigned long ptr; + struct btrfs_path *path; + int ret; + int slot; + int should_del = rand() % 3; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = bytenr; + key.type = (u8)-1; + key.offset = (u64)-1; + + while(1) { + ret = btrfs_search_slot(trans, root->fs_info->extent_root, + &key, path, -1, 1); + if (ret < 0) + break; + + if (ret > 0) { + if (path->slots[0] == 0) + break; + path->slots[0]--; + ret = 0; + } + leaf = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != bytenr) + break; + + if (key.type != BTRFS_EXTENT_ITEM_KEY && + key.type != BTRFS_TREE_BLOCK_REF_KEY && + key.type != BTRFS_EXTENT_DATA_REF_KEY && + key.type != BTRFS_EXTENT_REF_V0_KEY && + key.type != BTRFS_SHARED_BLOCK_REF_KEY && + key.type != BTRFS_SHARED_DATA_REF_KEY) + goto next; + + if (should_del) { + fprintf(stderr, + "deleting extent record: key %llu %u %llu\n", + key.objectid, key.type, key.offset); + + if (key.type == BTRFS_EXTENT_ITEM_KEY) { + /* make sure this extent doesn't get + * reused for other purposes */ + btrfs_pin_extent(root->fs_info, + key.objectid, key.offset); + } + + btrfs_del_item(trans, root, path); + } else { + fprintf(stderr, + "corrupting extent record: key %llu %u %llu\n", + key.objectid, key.type, key.offset); + ptr = btrfs_item_ptr_offset(leaf, slot); + item_size = btrfs_item_size_nr(leaf, slot); + memset_extent_buffer(leaf, 0, ptr, item_size); + btrfs_mark_buffer_dirty(leaf); + } +next: + btrfs_release_path(path); + + if (key.offset > 0) + key.offset--; + if (key.offset == 0) + break; + } + + btrfs_free_path(path); + return 0; +} + +static void btrfs_corrupt_extent_leaf(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *eb) +{ + u32 nr = btrfs_header_nritems(eb); + u32 victim = rand() % nr; + u64 objectid; + struct btrfs_key key; + + btrfs_item_key_to_cpu(eb, &key, victim); + objectid = key.objectid; + corrupt_extent(trans, root, objectid, 1); +} + +static void btrfs_corrupt_extent_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *eb) +{ + int i; + + if (!eb) + return; + + if (btrfs_is_leaf(eb)) { + btrfs_corrupt_extent_leaf(trans, root, eb); + return; + } + + if (btrfs_header_level(eb) == 1 && eb != root->node) { + if (rand() % 5) + return; + } + + for (i = 0; i < btrfs_header_nritems(eb); i++) { + struct extent_buffer *next; + + next = read_tree_block(root, btrfs_node_blockptr(eb, i), + root->leafsize, + btrfs_node_ptr_generation(eb, i)); + if (!extent_buffer_uptodate(next)) + continue; + btrfs_corrupt_extent_tree(trans, root, next); + free_extent_buffer(next); + } +} + +enum btrfs_inode_field { + BTRFS_INODE_FIELD_ISIZE, + BTRFS_INODE_FIELD_NBYTES, + BTRFS_INODE_FIELD_BAD, +}; + +enum btrfs_file_extent_field { + BTRFS_FILE_EXTENT_DISK_BYTENR, + BTRFS_FILE_EXTENT_BAD, +}; + +enum btrfs_dir_item_field { + BTRFS_DIR_ITEM_NAME, + BTRFS_DIR_ITEM_LOCATION_OBJECTID, + BTRFS_DIR_ITEM_BAD, +}; + +enum btrfs_metadata_block_field { + BTRFS_METADATA_BLOCK_GENERATION, + BTRFS_METADATA_BLOCK_SHIFT_ITEMS, + BTRFS_METADATA_BLOCK_BAD, +}; + +enum btrfs_item_field { + BTRFS_ITEM_OFFSET, + BTRFS_ITEM_BAD, +}; + +enum btrfs_key_field { + BTRFS_KEY_OBJECTID, + BTRFS_KEY_TYPE, + BTRFS_KEY_OFFSET, + BTRFS_KEY_BAD, +}; + +static enum btrfs_inode_field convert_inode_field(char *field) +{ + if (!strncmp(field, "isize", FIELD_BUF_LEN)) + return BTRFS_INODE_FIELD_ISIZE; + if (!strncmp(field, "nbytes", FIELD_BUF_LEN)) + return BTRFS_INODE_FIELD_NBYTES; + return BTRFS_INODE_FIELD_BAD; +} + +static enum btrfs_file_extent_field convert_file_extent_field(char *field) +{ + if (!strncmp(field, "disk_bytenr", FIELD_BUF_LEN)) + return BTRFS_FILE_EXTENT_DISK_BYTENR; + return BTRFS_FILE_EXTENT_BAD; +} + +static enum btrfs_metadata_block_field +convert_metadata_block_field(char *field) +{ + if (!strncmp(field, "generation", FIELD_BUF_LEN)) + return BTRFS_METADATA_BLOCK_GENERATION; + if (!strncmp(field, "shift_items", FIELD_BUF_LEN)) + return BTRFS_METADATA_BLOCK_SHIFT_ITEMS; + return BTRFS_METADATA_BLOCK_BAD; +} + +static enum btrfs_key_field convert_key_field(char *field) +{ + if (!strncmp(field, "objectid", FIELD_BUF_LEN)) + return BTRFS_KEY_OBJECTID; + if (!strncmp(field, "type", FIELD_BUF_LEN)) + return BTRFS_KEY_TYPE; + if (!strncmp(field, "offset", FIELD_BUF_LEN)) + return BTRFS_KEY_OFFSET; + return BTRFS_KEY_BAD; +} + +static enum btrfs_item_field convert_item_field(char *field) +{ + if (!strncmp(field, "offset", FIELD_BUF_LEN)) + return BTRFS_ITEM_OFFSET; + return BTRFS_ITEM_BAD; +} + +static enum btrfs_dir_item_field convert_dir_item_field(char *field) +{ + if (!strncmp(field, "name", FIELD_BUF_LEN)) + return BTRFS_DIR_ITEM_NAME; + if (!strncmp(field, "location_objectid", FIELD_BUF_LEN)) + return BTRFS_DIR_ITEM_LOCATION_OBJECTID; + return BTRFS_DIR_ITEM_BAD; +} + +static u64 generate_u64(u64 orig) +{ + u64 ret; + do { + ret = rand(); + } while (ret == orig); + return ret; +} + +static u32 generate_u32(u32 orig) +{ + u32 ret; + do { + ret = rand(); + } while (ret == orig); + return ret; +} + +static u8 generate_u8(u8 orig) +{ + u8 ret; + do { + ret = rand(); + } while (ret == orig); + return ret; +} + +static int corrupt_key(struct btrfs_root *root, struct btrfs_key *key, + char *field) +{ + enum btrfs_key_field corrupt_field = convert_key_field(field); + struct btrfs_path *path; + struct btrfs_trans_handle *trans; + int ret; + + root = root->fs_info->fs_root; + if (corrupt_field == BTRFS_KEY_BAD) { + fprintf(stderr, "Invalid field %s\n", field); + return -EINVAL; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + btrfs_free_path(path); + return PTR_ERR(trans); + } + + ret = btrfs_search_slot(trans, root, key, path, 0, 1); + if (ret < 0) + goto out; + if (ret > 0) { + fprintf(stderr, "Couldn't find the key to corrupt\n"); + ret = -ENOENT; + goto out; + } + + switch (corrupt_field) { + case BTRFS_KEY_OBJECTID: + key->objectid = generate_u64(key->objectid); + break; + case BTRFS_KEY_TYPE: + key->type = generate_u8(key->type); + break; + case BTRFS_KEY_OFFSET: + key->offset = generate_u64(key->objectid); + break; + default: + fprintf(stderr, "Invalid field %s, %d\n", field, + corrupt_field); + ret = -EINVAL; + goto out; + } + + btrfs_set_item_key_unsafe(root, path, key); +out: + btrfs_free_path(path); + btrfs_commit_transaction(trans, root); + return ret; +} + +static int corrupt_dir_item(struct btrfs_root *root, struct btrfs_key *key, + char *field) +{ + struct btrfs_trans_handle *trans; + struct btrfs_dir_item *di; + struct btrfs_path *path; + char name[PATH_MAX]; + struct btrfs_key location; + struct btrfs_disk_key disk_key; + unsigned long name_ptr; + enum btrfs_dir_item_field corrupt_field = + convert_dir_item_field(field); + u64 bogus; + u16 name_len; + int ret; + + if (corrupt_field == BTRFS_DIR_ITEM_BAD) { + fprintf(stderr, "Invalid field %s\n", field); + return -EINVAL; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + btrfs_free_path(path); + return PTR_ERR(trans); + } + + ret = btrfs_search_slot(trans, root, key, path, 0, 1); + if (ret) { + if (ret > 0) + ret = -ENOENT; + fprintf(stderr, "Error searching for dir item %d\n", ret); + goto out; + } + + di = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_dir_item); + + switch (corrupt_field) { + case BTRFS_DIR_ITEM_NAME: + name_len = btrfs_dir_name_len(path->nodes[0], di); + name_ptr = (unsigned long)(di + 1); + read_extent_buffer(path->nodes[0], name, name_ptr, name_len); + name[0]++; + write_extent_buffer(path->nodes[0], name, name_ptr, name_len); + btrfs_mark_buffer_dirty(path->nodes[0]); + goto out; + case BTRFS_DIR_ITEM_LOCATION_OBJECTID: + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); + bogus = generate_u64(location.objectid); + location.objectid = bogus; + btrfs_cpu_key_to_disk(&disk_key, &location); + btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); + btrfs_mark_buffer_dirty(path->nodes[0]); + goto out; + default: + ret = -EINVAL; + goto out; + } +out: + btrfs_commit_transaction(trans, root); + btrfs_free_path(path); + return ret; +} + +static int corrupt_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 inode, char *field) +{ + struct btrfs_inode_item *ei; + struct btrfs_path *path; + struct btrfs_key key; + enum btrfs_inode_field corrupt_field = convert_inode_field(field); + u64 bogus; + u64 orig; + int ret; + + if (corrupt_field == BTRFS_INODE_FIELD_BAD) { + fprintf(stderr, "Invalid field %s\n", field); + return -EINVAL; + } + + key.objectid = inode; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = (u64)-1; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) + goto out; + if (ret) { + if (!path->slots[0]) { + fprintf(stderr, "Couldn't find inode %Lu\n", inode); + ret = -ENOENT; + goto out; + } + path->slots[0]--; + ret = 0; + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid != inode) { + fprintf(stderr, "Couldn't find inode %Lu\n", inode); + ret = -ENOENT; + goto out; + } + + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + switch (corrupt_field) { + case BTRFS_INODE_FIELD_ISIZE: + orig = btrfs_inode_size(path->nodes[0], ei); + bogus = generate_u64(orig); + btrfs_set_inode_size(path->nodes[0], ei, bogus); + break; + case BTRFS_INODE_FIELD_NBYTES: + orig = btrfs_inode_nbytes(path->nodes[0], ei); + bogus = generate_u64(orig); + btrfs_set_inode_nbytes(path->nodes[0], ei, bogus); + break; + default: + ret = -EINVAL; + break; + } + btrfs_mark_buffer_dirty(path->nodes[0]); +out: + btrfs_free_path(path); + return ret; +} + +static int corrupt_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 inode, u64 extent, + char *field) +{ + struct btrfs_file_extent_item *fi; + struct btrfs_path *path; + struct btrfs_key key; + enum btrfs_file_extent_field corrupt_field; + u64 bogus; + u64 orig; + int ret = 0; + + corrupt_field = convert_file_extent_field(field); + if (corrupt_field == BTRFS_FILE_EXTENT_BAD) { + fprintf(stderr, "Invalid field %s\n", field); + return -EINVAL; + } + + key.objectid = inode; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = extent; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) + goto out; + if (ret) { + fprintf(stderr, "Couldn't find extent %llu for inode %llu\n", + extent, inode); + ret = -ENOENT; + goto out; + } + + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + switch (corrupt_field) { + case BTRFS_FILE_EXTENT_DISK_BYTENR: + orig = btrfs_file_extent_disk_bytenr(path->nodes[0], fi); + bogus = generate_u64(orig); + btrfs_set_file_extent_disk_bytenr(path->nodes[0], fi, bogus); + break; + default: + ret = -EINVAL; + break; + } + btrfs_mark_buffer_dirty(path->nodes[0]); +out: + btrfs_free_path(path); + return ret; +} + +static void shift_items(struct btrfs_root *root, struct extent_buffer *eb) +{ + int nritems = btrfs_header_nritems(eb); + int shift_space = btrfs_leaf_free_space(root, eb) / 2; + int slot = nritems / 2; + int i = 0; + unsigned int data_end = btrfs_item_offset_nr(eb, nritems - 1); + + /* Shift the item data up to and including slot back by shift space */ + memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end - shift_space, + btrfs_leaf_data(eb) + data_end, + btrfs_item_offset_nr(eb, slot - 1) - data_end); + + /* Now update the item pointers. */ + for (i = nritems - 1; i >= slot; i--) { + u32 offset = btrfs_item_offset_nr(eb, i); + offset -= shift_space; + btrfs_set_item_offset(eb, btrfs_item_nr(i), offset); + } +} + +static int corrupt_metadata_block(struct btrfs_root *root, u64 block, + char *field) +{ + struct btrfs_trans_handle *trans; + struct btrfs_path *path; + struct extent_buffer *eb; + struct btrfs_key key, root_key; + enum btrfs_metadata_block_field corrupt_field; + u64 root_objectid; + u64 orig, bogus; + u8 level; + int ret; + + corrupt_field = convert_metadata_block_field(field); + if (corrupt_field == BTRFS_METADATA_BLOCK_BAD) { + fprintf(stderr, "Invalid field %s\n", field); + return -EINVAL; + } + + eb = read_tree_block(root, block, root->leafsize, 0); + if (!extent_buffer_uptodate(eb)) { + fprintf(stderr, "Couldn't read in tree block %s\n", field); + return -EINVAL; + } + root_objectid = btrfs_header_owner(eb); + level = btrfs_header_level(eb); + if (level) + btrfs_node_key_to_cpu(eb, &key, 0); + else + btrfs_item_key_to_cpu(eb, &key, 0); + free_extent_buffer(eb); + + root_key.objectid = root_objectid; + root_key.type = BTRFS_ROOT_ITEM_KEY; + root_key.offset = (u64)-1; + + root = btrfs_read_fs_root(root->fs_info, &root_key); + if (IS_ERR(root)) { + fprintf(stderr, "Couldn't finde owner root %llu\n", + key.objectid); + return PTR_ERR(root); + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + btrfs_free_path(path); + fprintf(stderr, "Couldn't start transaction %ld\n", + PTR_ERR(trans)); + return PTR_ERR(trans); + } + + path->lowest_level = level; + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) { + fprintf(stderr, "Error searching to node %d\n", ret); + goto out; + } + eb = path->nodes[level]; + + ret = 0; + switch (corrupt_field) { + case BTRFS_METADATA_BLOCK_GENERATION: + orig = btrfs_header_generation(eb); + bogus = generate_u64(orig); + btrfs_set_header_generation(eb, bogus); + break; + case BTRFS_METADATA_BLOCK_SHIFT_ITEMS: + shift_items(root, path->nodes[level]); + break; + default: + ret = -EINVAL; + break; + } + btrfs_mark_buffer_dirty(path->nodes[level]); +out: + btrfs_commit_transaction(trans, root); + btrfs_free_path(path); + return ret; +} + +static int corrupt_btrfs_item(struct btrfs_root *root, struct btrfs_key *key, + char *field) +{ + struct btrfs_trans_handle *trans; + struct btrfs_path *path; + enum btrfs_item_field corrupt_field; + u32 orig, bogus; + int ret; + + corrupt_field = convert_item_field(field); + if (corrupt_field == BTRFS_ITEM_BAD) { + fprintf(stderr, "Invalid field %s\n", field); + return -EINVAL; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + btrfs_free_path(path); + fprintf(stderr, "Couldn't start transaction %ld\n", + PTR_ERR(trans)); + return PTR_ERR(trans); + } + + ret = btrfs_search_slot(trans, root, key, path, 0, 1); + if (ret != 0) { + fprintf(stderr, "Error searching to node %d\n", ret); + goto out; + } + + ret = 0; + switch (corrupt_field) { + case BTRFS_ITEM_OFFSET: + orig = btrfs_item_offset_nr(path->nodes[0], path->slots[0]); + bogus = generate_u32(orig); + btrfs_set_item_offset(path->nodes[0], + btrfs_item_nr(path->slots[0]), bogus); + break; + default: + ret = -EINVAL; + break; + } + btrfs_mark_buffer_dirty(path->nodes[0]); +out: + btrfs_commit_transaction(trans, root); + btrfs_free_path(path); + return ret; +} + +static int delete_item(struct btrfs_root *root, struct btrfs_key *key) +{ + struct btrfs_trans_handle *trans; + struct btrfs_path *path; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + btrfs_free_path(path); + fprintf(stderr, "Couldn't start transaction %ld\n", + PTR_ERR(trans)); + return PTR_ERR(trans); + } + + ret = btrfs_search_slot(trans, root, key, path, -1, 1); + if (ret) { + if (ret > 0) + ret = -ENOENT; + fprintf(stderr, "Error searching to node %d\n", ret); + goto out; + } + ret = btrfs_del_item(trans, root, path); + btrfs_mark_buffer_dirty(path->nodes[0]); +out: + btrfs_commit_transaction(trans, root); + btrfs_free_path(path); + return ret; +} + +static int delete_csum(struct btrfs_root *root, u64 bytenr, u64 bytes) +{ + struct btrfs_trans_handle *trans; + int ret; + + root = root->fs_info->csum_root; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + fprintf(stderr, "Couldn't start transaction %ld\n", + PTR_ERR(trans)); + return PTR_ERR(trans); + } + + ret = btrfs_del_csums(trans, root, bytenr, bytes); + if (ret) + fprintf(stderr, "Error deleting csums %d\n", ret); + btrfs_commit_transaction(trans, root); + return ret; +} + +/* corrupt item using NO cow. + * Because chunk recover will recover based on whole partition scaning, + * If using COW, chunk recover will use the old item to recover, + * which is still OK but we want to check the ability to rebuild chunk + * not only restore the old ones */ +static int corrupt_item_nocow(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + int del) +{ + int ret = 0; + struct btrfs_key key; + struct extent_buffer *leaf; + unsigned long ptr; + int slot; + u32 item_size; + + leaf = path->nodes[0]; + slot = path->slots[0]; + /* Not deleting the first item of a leaf to keep leaf structure */ + if (slot == 0) + del = 0; + /* Only accept valid eb */ + BUG_ON(!leaf->data || slot >= btrfs_header_nritems(leaf)); + btrfs_item_key_to_cpu(leaf, &key, slot); + if (del) { + fprintf(stdout, "Deleting key and data [%llu, %u, %llu].\n", + key.objectid, key.type, key.offset); + btrfs_del_item(trans, root, path); + } else { + fprintf(stdout, "Corrupting key and data [%llu, %u, %llu].\n", + key.objectid, key.type, key.offset); + ptr = btrfs_item_ptr_offset(leaf, slot); + item_size = btrfs_item_size_nr(leaf, slot); + memset_extent_buffer(leaf, 0, ptr, item_size); + btrfs_mark_buffer_dirty(leaf); + } + return ret; +} +static int corrupt_chunk_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + int del; + int slot; + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_key found_key; + struct extent_buffer *leaf; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = (u64)-1; + key.offset = (u64)-1; + key.type = (u8)-1; + + /* Here, cow and ins_len must equals 0 for the following reasons: + * 1) chunk recover is based on disk scanning, so COW should be + * disabled in case the original chunk being scanned and + * recovered using the old chunk. + * 2) if cow = 0, ins_len must also be set to 0, or BUG_ON will be + * triggered. + */ + ret = btrfs_search_slot(trans, root, &key, path, 0, 0); + BUG_ON(ret == 0); + if (ret < 0) { + fprintf(stderr, "Error searching tree\n"); + goto free_out; + } + /* corrupt/del dev_item first */ + while (!btrfs_previous_item(root, path, 0, BTRFS_DEV_ITEM_KEY)) { + slot = path->slots[0]; + leaf = path->nodes[0]; + del = rand() % 3; + /* Never delete the first item to keep the leaf structure */ + if (path->slots[0] == 0) + del = 0; + ret = corrupt_item_nocow(trans, root, path, del); + if (ret) + goto free_out; + } + btrfs_release_path(path); + + /* Here, cow and ins_len must equals 0 for the following reasons: + * 1) chunk recover is based on disk scanning, so COW should be + * disabled in case the original chunk being scanned and + * recovered using the old chunk. + * 2) if cow = 0, ins_len must also be set to 0, or BUG_ON will be + * triggered. + */ + ret = btrfs_search_slot(trans, root, &key, path, 0, 0); + BUG_ON(ret == 0); + if (ret < 0) { + fprintf(stderr, "Error searching tree\n"); + goto free_out; + } + /* corrupt/del chunk then*/ + while (!btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY)) { + slot = path->slots[0]; + leaf = path->nodes[0]; + del = rand() % 3; + btrfs_item_key_to_cpu(leaf, &found_key, slot); + ret = corrupt_item_nocow(trans, root, path, del); + if (ret) + goto free_out; + } +free_out: + btrfs_free_path(path); + return ret; +} +static int find_chunk_offset(struct btrfs_root *root, + struct btrfs_path *path, u64 offset) +{ + struct btrfs_key key; + int ret; + + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.type = BTRFS_CHUNK_ITEM_KEY; + key.offset = offset; + + /* Here, cow and ins_len must equals 0 for following reasons: + * 1) chunk recover is based on disk scanning, so COW should + * be disabled in case the original chunk being scanned + * and recovered using the old chunk. + * 2) if cow = 0, ins_len must also be set to 0, or BUG_ON + * will be triggered. + */ + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret > 0) { + fprintf(stderr, "Can't find chunk with given offset %llu\n", + offset); + goto out; + } + if (ret < 0) { + fprintf(stderr, "Error searching chunk\n"); + goto out; + } +out: + return ret; + +} +int main(int ac, char **av) +{ + struct cache_tree root_cache; + struct btrfs_key key; + struct btrfs_root *root; + struct extent_buffer *eb; + char *dev; + /* chunk offset can be 0,so change to (u64)-1 */ + u64 logical = (u64)-1; + int ret = 0; + u64 copy = 0; + u64 bytes = 4096; + int extent_rec = 0; + int extent_tree = 0; + int corrupt_block_keys = 0; + int chunk_rec = 0; + int chunk_tree = 0; + int corrupt_item = 0; + int corrupt_di = 0; + int delete = 0; + u64 metadata_block = 0; + u64 inode = 0; + u64 file_extent = (u64)-1; + u64 root_objectid = 0; + u64 csum_bytenr = 0; + char field[FIELD_BUF_LEN]; + + field[0] = '\0'; + srand(128); + memset(&key, 0, sizeof(key)); + + while(1) { + int c; + static const struct option long_options[] = { + /* { "byte-count", 1, NULL, 'b' }, */ + { "logical", required_argument, NULL, 'l' }, + { "copy", required_argument, NULL, 'c' }, + { "bytes", required_argument, NULL, 'b' }, + { "extent-record", no_argument, NULL, 'e' }, + { "extent-tree", no_argument, NULL, 'E' }, + { "keys", no_argument, NULL, 'k' }, + { "chunk-record", no_argument, NULL, 'u' }, + { "chunk-tree", no_argument, NULL, 'U' }, + { "inode", required_argument, NULL, 'i'}, + { "file-extent", required_argument, NULL, 'x'}, + { "metadata-block", required_argument, NULL, 'm'}, + { "field", required_argument, NULL, 'f'}, + { "key", required_argument, NULL, 'K'}, + { "item", no_argument, NULL, 'I'}, + { "dir-item", no_argument, NULL, 'D'}, + { "delete", no_argument, NULL, 'd'}, + { "root", no_argument, NULL, 'r'}, + { "csum", required_argument, NULL, 'C'}, + { "help", no_argument, NULL, GETOPT_VAL_HELP}, + { NULL, 0, NULL, 0 } + }; + + c = getopt_long(ac, av, "l:c:b:eEkuUi:f:x:m:K:IDdr:C:", + long_options, NULL); + if (c < 0) + break; + switch(c) { + case 'l': + logical = arg_strtou64(optarg); + break; + case 'c': + copy = arg_strtou64(optarg); + break; + case 'b': + bytes = arg_strtou64(optarg); + break; + case 'e': + extent_rec = 1; + break; + case 'E': + extent_tree = 1; + break; + case 'k': + corrupt_block_keys = 1; + break; + case 'u': + chunk_rec = 1; + break; + case 'U': + chunk_tree = 1; + break; + case 'i': + inode = arg_strtou64(optarg); + break; + case 'f': + strncpy(field, optarg, FIELD_BUF_LEN); + break; + case 'x': + file_extent = arg_strtou64(optarg); + break; + case 'm': + metadata_block = arg_strtou64(optarg); + break; + case 'K': + ret = sscanf(optarg, "%llu,%u,%llu", + &key.objectid, + (unsigned int *)&key.type, + &key.offset); + if (ret != 3) { + fprintf(stderr, "error reading key " + "%d\n", errno); + print_usage(1); + } + break; + case 'D': + corrupt_di = 1; + break; + case 'I': + corrupt_item = 1; + break; + case 'd': + delete = 1; + break; + case 'r': + root_objectid = arg_strtou64(optarg); + break; + case 'C': + csum_bytenr = arg_strtou64(optarg); + break; + case GETOPT_VAL_HELP: + default: + print_usage(c != GETOPT_VAL_HELP); + } + } + set_argv0(av); + ac = ac - optind; + if (check_argc_min(ac, 1)) + print_usage(1); + dev = av[optind]; + + radix_tree_init(); + cache_tree_init(&root_cache); + + root = open_ctree(dev, 0, OPEN_CTREE_WRITES); + if (!root) { + fprintf(stderr, "Open ctree failed\n"); + exit(1); + } + if (extent_rec) { + struct btrfs_trans_handle *trans; + + if (logical == (u64)-1) + print_usage(1); + trans = btrfs_start_transaction(root, 1); + ret = corrupt_extent (trans, root, logical, 0); + btrfs_commit_transaction(trans, root); + goto out_close; + } + if (extent_tree) { + struct btrfs_trans_handle *trans; + trans = btrfs_start_transaction(root, 1); + btrfs_corrupt_extent_tree(trans, root->fs_info->extent_root, + root->fs_info->extent_root->node); + btrfs_commit_transaction(trans, root); + goto out_close; + } + if (chunk_rec) { + struct btrfs_trans_handle *trans; + struct btrfs_path *path; + int del; + + if (logical == (u64)-1) + print_usage(1); + del = rand() % 3; + path = btrfs_alloc_path(); + if (!path) { + fprintf(stderr, "path allocation failed\n"); + goto out_close; + } + + if (find_chunk_offset(root->fs_info->chunk_root, path, + logical) != 0) { + btrfs_free_path(path); + goto out_close; + } + trans = btrfs_start_transaction(root, 1); + ret = corrupt_item_nocow(trans, root->fs_info->chunk_root, + path, del); + if (ret < 0) + fprintf(stderr, "Failed to corrupt chunk record\n"); + btrfs_commit_transaction(trans, root); + goto out_close; + } + if (chunk_tree) { + struct btrfs_trans_handle *trans; + trans = btrfs_start_transaction(root, 1); + ret = corrupt_chunk_tree(trans, root->fs_info->chunk_root); + if (ret < 0) + fprintf(stderr, "Failed to corrupt chunk tree\n"); + btrfs_commit_transaction(trans, root); + goto out_close; + } + if (inode) { + struct btrfs_trans_handle *trans; + + if (*field == 0) + print_usage(1); + + trans = btrfs_start_transaction(root, 1); + if (file_extent == (u64)-1) { + printf("corrupting inode\n"); + ret = corrupt_inode(trans, root, inode, field); + } else { + printf("corrupting file extent\n"); + ret = corrupt_file_extent(trans, root, inode, + file_extent, field); + } + btrfs_commit_transaction(trans, root); + goto out_close; + } + if (metadata_block) { + if (*field == 0) + print_usage(1); + ret = corrupt_metadata_block(root, metadata_block, field); + goto out_close; + } + if (corrupt_di) { + if (!key.objectid || *field == 0) + print_usage(1); + ret = corrupt_dir_item(root, &key, field); + goto out_close; + } + if (csum_bytenr) { + ret = delete_csum(root, csum_bytenr, bytes); + goto out_close; + } + if (corrupt_item) { + if (!key.objectid) + print_usage(1); + ret = corrupt_btrfs_item(root, &key, field); + } + if (delete) { + struct btrfs_root *target = root; + + if (!key.objectid) + print_usage(1); + if (root_objectid) { + struct btrfs_key root_key; + + root_key.objectid = root_objectid; + root_key.type = BTRFS_ROOT_ITEM_KEY; + root_key.offset = (u64)-1; + + target = btrfs_read_fs_root(root->fs_info, &root_key); + if (IS_ERR(target)) { + fprintf(stderr, "Couldn't find root %llu\n", + (unsigned long long)root_objectid); + print_usage(1); + } + } + ret = delete_item(target, &key); + goto out_close; + } + if (key.objectid || key.offset || key.type) { + if (*field == 0) + print_usage(1); + ret = corrupt_key(root, &key, field); + goto out_close; + } + /* + * If we made it here and we have extent set then we didn't specify + * inode and we're screwed. + */ + if (file_extent != (u64)-1) + print_usage(1); + + if (logical == (u64)-1) + print_usage(1); + + if (bytes == 0) + bytes = root->sectorsize; + + bytes = (bytes + root->sectorsize - 1) / root->sectorsize; + bytes *= root->sectorsize; + + while (bytes > 0) { + if (corrupt_block_keys) { + corrupt_keys_in_block(root, logical); + } else { + eb = debug_corrupt_block(root, logical, + root->sectorsize, copy); + free_extent_buffer(eb); + } + logical += root->sectorsize; + bytes -= root->sectorsize; + } + return ret; +out_close: + close_ctree(root); + return ret; +} diff --git a/btrfs-crc.c b/btrfs-crc.c new file mode 100644 index 00000000..723e0b7a --- /dev/null +++ b/btrfs-crc.c @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2013 STRATO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include "crc32c.h" +#include "utils.h" + +void usage(void) +{ + printf("usage: btrfs-crc filename\n"); + printf(" print out the btrfs crc for \"filename\"\n"); + printf("usage: btrfs-crc filename -c crc [-s seed] [-l length]\n"); + printf(" brute force search for file names with the given crc\n"); + printf(" -s seed the random seed (default: random)\n"); + printf(" -l length the length of the file names (default: 10)\n"); + exit(1); +} + +int main(int argc, char **argv) +{ + int c; + unsigned long checksum = 0; + char *str; + char *buf; + int length = 10; + int seed = getpid() ^ getppid(); + int loop = 0; + int i; + + while ((c = getopt(argc, argv, "l:c:s:h")) != -1) { + switch (c) { + case 'l': + length = atol(optarg); + break; + case 'c': + sscanf(optarg, "%li", &checksum); + loop = 1; + break; + case 's': + seed = atol(optarg); + break; + case 'h': + usage(); + case '?': + return 255; + } + } + + set_argv0(argv); + str = argv[optind]; + + if (!loop) { + if (check_argc_min(argc - optind, 1)) + return 255; + + printf("%12u - %s\n", crc32c(~1, str, strlen(str)), str); + return 0; + } + + buf = malloc(length); + if (!buf) + return -ENOMEM; + srand(seed); + + while (1) { + for (i = 0; i < length; i++) + buf[i] = rand() % 94 + 33; + if (crc32c(~1, buf, length) == checksum) + printf("%12lu - %.*s\n", checksum, length, buf); + } + + return 0; +} diff --git a/btrfs-debug-tree.c b/btrfs-debug-tree.c new file mode 100644 index 00000000..266176f3 --- /dev/null +++ b/btrfs-debug-tree.c @@ -0,0 +1,462 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <uuid/uuid.h> +#include <getopt.h> + +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" +#include "transaction.h" +#include "volumes.h" +#include "utils.h" + +static int print_usage(int ret) +{ + fprintf(stderr, "usage: btrfs-debug-tree [-e] [-d] [-r] [-R] [-u]\n"); + fprintf(stderr, " [-b block_num ] device\n"); + fprintf(stderr, "\t-e : print detailed extents info\n"); + fprintf(stderr, "\t-d : print info of btrfs device and root tree dirs" + " only\n"); + fprintf(stderr, "\t-r : print info of roots only\n"); + fprintf(stderr, "\t-R : print info of roots and root backups\n"); + fprintf(stderr, "\t-u : print info of uuid tree only\n"); + fprintf(stderr, "\t-b block_num : print info of the specified block" + " only\n"); + fprintf(stderr, + "\t-t tree_id : print only the tree with the given id\n"); + fprintf(stderr, "%s\n", PACKAGE_STRING); + exit(ret); +} + +static void print_extents(struct btrfs_root *root, struct extent_buffer *eb) +{ + int i; + u32 nr; + u32 size; + + if (!eb) + return; + + if (btrfs_is_leaf(eb)) { + btrfs_print_leaf(root, eb); + return; + } + + size = btrfs_level_size(root, btrfs_header_level(eb) - 1); + nr = btrfs_header_nritems(eb); + for (i = 0; i < nr; i++) { + struct extent_buffer *next = read_tree_block(root, + btrfs_node_blockptr(eb, i), + size, + btrfs_node_ptr_generation(eb, i)); + if (!extent_buffer_uptodate(next)) + continue; + if (btrfs_is_leaf(next) && + btrfs_header_level(eb) != 1) + BUG(); + if (btrfs_header_level(next) != + btrfs_header_level(eb) - 1) + BUG(); + print_extents(root, next); + free_extent_buffer(next); + } +} + +static void print_old_roots(struct btrfs_super_block *super) +{ + struct btrfs_root_backup *backup; + int i; + + for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) { + backup = super->super_roots + i; + printf("btrfs root backup slot %d\n", i); + printf("\ttree root gen %llu block %llu\n", + (unsigned long long)btrfs_backup_tree_root_gen(backup), + (unsigned long long)btrfs_backup_tree_root(backup)); + + printf("\t\textent root gen %llu block %llu\n", + (unsigned long long)btrfs_backup_extent_root_gen(backup), + (unsigned long long)btrfs_backup_extent_root(backup)); + + printf("\t\tchunk root gen %llu block %llu\n", + (unsigned long long)btrfs_backup_chunk_root_gen(backup), + (unsigned long long)btrfs_backup_chunk_root(backup)); + + printf("\t\tdevice root gen %llu block %llu\n", + (unsigned long long)btrfs_backup_dev_root_gen(backup), + (unsigned long long)btrfs_backup_dev_root(backup)); + + printf("\t\tcsum root gen %llu block %llu\n", + (unsigned long long)btrfs_backup_csum_root_gen(backup), + (unsigned long long)btrfs_backup_csum_root(backup)); + + printf("\t\tfs root gen %llu block %llu\n", + (unsigned long long)btrfs_backup_fs_root_gen(backup), + (unsigned long long)btrfs_backup_fs_root(backup)); + + printf("\t\t%llu used %llu total %llu devices\n", + (unsigned long long)btrfs_backup_bytes_used(backup), + (unsigned long long)btrfs_backup_total_bytes(backup), + (unsigned long long)btrfs_backup_num_devices(backup)); + } +} + +int main(int ac, char **av) +{ + struct btrfs_root *root; + struct btrfs_fs_info *info; + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_root_item ri; + struct extent_buffer *leaf; + struct btrfs_disk_key disk_key; + struct btrfs_key found_key; + char uuidbuf[BTRFS_UUID_UNPARSED_SIZE]; + int ret; + int slot; + int extent_only = 0; + int device_only = 0; + int uuid_tree_only = 0; + int roots_only = 0; + int root_backups = 0; + u64 block_only = 0; + struct btrfs_root *tree_root_scan; + u64 tree_id = 0; + + radix_tree_init(); + + while(1) { + int c; + static const struct option long_options[] = { + { "help", no_argument, NULL, GETOPT_VAL_HELP}, + { NULL, 0, NULL, 0 } + }; + + c = getopt_long(ac, av, "deb:rRut:", long_options, NULL); + if (c < 0) + break; + switch(c) { + case 'e': + extent_only = 1; + break; + case 'd': + device_only = 1; + break; + case 'r': + roots_only = 1; + break; + case 'u': + uuid_tree_only = 1; + break; + case 'R': + roots_only = 1; + root_backups = 1; + break; + case 'b': + block_only = arg_strtou64(optarg); + break; + case 't': + tree_id = arg_strtou64(optarg); + break; + case GETOPT_VAL_HELP: + default: + print_usage(c != GETOPT_VAL_HELP); + } + } + set_argv0(av); + ac = ac - optind; + if (check_argc_exact(ac, 1)) + print_usage(1); + + ret = check_arg_type(av[optind]); + if (ret != BTRFS_ARG_BLKDEV && ret != BTRFS_ARG_REG) { + fprintf(stderr, "'%s' is not a block device or regular file\n", + av[optind]); + exit(1); + } + + info = open_ctree_fs_info(av[optind], 0, 0, OPEN_CTREE_PARTIAL); + if (!info) { + fprintf(stderr, "unable to open %s\n", av[optind]); + exit(1); + } + + root = info->fs_root; + if (!root) { + fprintf(stderr, "unable to open %s\n", av[optind]); + exit(1); + } + + if (block_only) { + leaf = read_tree_block(root, + block_only, + root->leafsize, 0); + + if (extent_buffer_uptodate(leaf) && + btrfs_header_level(leaf) != 0) { + free_extent_buffer(leaf); + leaf = NULL; + } + + if (!leaf) { + leaf = read_tree_block(root, + block_only, + root->nodesize, 0); + } + if (!extent_buffer_uptodate(leaf)) { + fprintf(stderr, "failed to read %llu\n", + (unsigned long long)block_only); + goto close_root; + } + btrfs_print_tree(root, leaf, 0); + free_extent_buffer(leaf); + goto close_root; + } + + if (!(extent_only || uuid_tree_only || tree_id)) { + if (roots_only) { + printf("root tree: %llu level %d\n", + (unsigned long long)info->tree_root->node->start, + btrfs_header_level(info->tree_root->node)); + printf("chunk tree: %llu level %d\n", + (unsigned long long)info->chunk_root->node->start, + btrfs_header_level(info->chunk_root->node)); + } else { + if (info->tree_root->node) { + printf("root tree\n"); + btrfs_print_tree(info->tree_root, + info->tree_root->node, 1); + } + + if (info->chunk_root->node) { + printf("chunk tree\n"); + btrfs_print_tree(info->chunk_root, + info->chunk_root->node, 1); + } + } + } + tree_root_scan = info->tree_root; + + btrfs_init_path(&path); +again: + if (!extent_buffer_uptodate(tree_root_scan->node)) + goto no_node; + + /* + * Tree's that are not pointed by the tree of tree roots + */ + if (tree_id && tree_id == BTRFS_ROOT_TREE_OBJECTID) { + if (!info->tree_root->node) { + error("cannot print root tree, invalid pointer"); + goto no_node; + } + printf("root tree\n"); + btrfs_print_tree(info->tree_root, info->tree_root->node, 1); + goto no_node; + } + + if (tree_id && tree_id == BTRFS_CHUNK_TREE_OBJECTID) { + if (!info->chunk_root->node) { + error("cannot print chunk tree, invalid pointer"); + goto no_node; + } + printf("chunk tree\n"); + btrfs_print_tree(info->chunk_root, info->chunk_root->node, 1); + goto no_node; + } + + key.offset = 0; + key.objectid = 0; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + ret = btrfs_search_slot(NULL, tree_root_scan, &key, &path, 0, 0); + BUG_ON(ret < 0); + while(1) { + leaf = path.nodes[0]; + slot = path.slots[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(tree_root_scan, &path); + if (ret != 0) + break; + leaf = path.nodes[0]; + slot = path.slots[0]; + } + btrfs_item_key(leaf, &disk_key, path.slots[0]); + btrfs_disk_key_to_cpu(&found_key, &disk_key); + if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) { + unsigned long offset; + struct extent_buffer *buf; + int skip = extent_only | device_only | uuid_tree_only; + + offset = btrfs_item_ptr_offset(leaf, slot); + read_extent_buffer(leaf, &ri, offset, sizeof(ri)); + buf = read_tree_block(tree_root_scan, + btrfs_root_bytenr(&ri), + btrfs_level_size(tree_root_scan, + btrfs_root_level(&ri)), + 0); + if (!extent_buffer_uptodate(buf)) + goto next; + if (tree_id && found_key.objectid != tree_id) { + free_extent_buffer(buf); + goto next; + } + + switch(found_key.objectid) { + case BTRFS_ROOT_TREE_OBJECTID: + if (!skip) + printf("root"); + break; + case BTRFS_EXTENT_TREE_OBJECTID: + if (!device_only && !uuid_tree_only) + skip = 0; + if (!skip) + printf("extent"); + break; + case BTRFS_CHUNK_TREE_OBJECTID: + if (!skip) { + printf("chunk"); + } + break; + case BTRFS_DEV_TREE_OBJECTID: + if (!uuid_tree_only) + skip = 0; + if (!skip) + printf("device"); + break; + case BTRFS_FS_TREE_OBJECTID: + if (!skip) { + printf("fs"); + } + break; + case BTRFS_ROOT_TREE_DIR_OBJECTID: + skip = 0; + printf("directory"); + break; + case BTRFS_CSUM_TREE_OBJECTID: + if (!skip) { + printf("checksum"); + } + break; + case BTRFS_ORPHAN_OBJECTID: + if (!skip) { + printf("orphan"); + } + break; + case BTRFS_TREE_LOG_OBJECTID: + if (!skip) { + printf("log"); + } + break; + case BTRFS_TREE_LOG_FIXUP_OBJECTID: + if (!skip) { + printf("log fixup"); + } + break; + case BTRFS_TREE_RELOC_OBJECTID: + if (!skip) { + printf("reloc"); + } + break; + case BTRFS_DATA_RELOC_TREE_OBJECTID: + if (!skip) { + printf("data reloc"); + } + break; + case BTRFS_EXTENT_CSUM_OBJECTID: + if (!skip) { + printf("extent checksum"); + } + break; + case BTRFS_QUOTA_TREE_OBJECTID: + if (!skip) { + printf("quota"); + } + break; + case BTRFS_UUID_TREE_OBJECTID: + if (!extent_only && !device_only) + skip = 0; + if (!skip) + printf("uuid"); + break; + case BTRFS_FREE_SPACE_TREE_OBJECTID: + if (!skip) + printf("free space"); + break; + case BTRFS_MULTIPLE_OBJECTIDS: + if (!skip) { + printf("multiple"); + } + break; + default: + if (!skip) { + printf("file"); + } + } + if (extent_only && !skip) { + print_extents(tree_root_scan, buf); + } else if (!skip) { + printf(" tree "); + btrfs_print_key(&disk_key); + if (roots_only) { + printf(" %llu level %d\n", + (unsigned long long)buf->start, + btrfs_header_level(buf)); + } else { + printf(" \n"); + btrfs_print_tree(tree_root_scan, buf, 1); + } + } + free_extent_buffer(buf); + } +next: + path.slots[0]++; + } +no_node: + btrfs_release_path(&path); + + if (tree_root_scan == info->tree_root && + info->log_root_tree) { + tree_root_scan = info->log_root_tree; + goto again; + } + + if (extent_only || device_only || uuid_tree_only) + goto close_root; + + if (root_backups) + print_old_roots(info->super_copy); + + printf("total bytes %llu\n", + (unsigned long long)btrfs_super_total_bytes(info->super_copy)); + printf("bytes used %llu\n", + (unsigned long long)btrfs_super_bytes_used(info->super_copy)); + uuidbuf[BTRFS_UUID_UNPARSED_SIZE - 1] = '\0'; + uuid_unparse(info->super_copy->fsid, uuidbuf); + printf("uuid %s\n", uuidbuf); + printf("%s\n", PACKAGE_STRING); +close_root: + ret = close_ctree(root); + btrfs_close_all_devices(); + return ret; +} diff --git a/btrfs-debugfs b/btrfs-debugfs new file mode 100755 index 00000000..cf1d285c --- /dev/null +++ b/btrfs-debugfs @@ -0,0 +1,296 @@ +#!/usr/bin/env python2 +# +# Simple python program to print out all the extents of a single file +# LGPLv2 license +# Copyright Facebook 2014 + +import sys,os,struct,fcntl,ctypes,stat + +# helpers for max ints +maxu64 = (1L << 64) - 1 +maxu32 = (1L << 32) - 1 + +# the inode (like form stat) +BTRFS_INODE_ITEM_KEY = 1 +# backref to the directory +BTRFS_INODE_REF_KEY = 12 +# backref to the directory v2 +BTRFS_INODE_EXTREF_KEY = 13 +# xattr items +BTRFS_XATTR_ITEM_KEY = 24 +# orphans for list files +BTRFS_ORPHAN_ITEM_KEY = 48 +# treelog items for dirs +BTRFS_DIR_LOG_ITEM_KEY = 60 +BTRFS_DIR_LOG_INDEX_KEY = 72 +# dir items and dir indexes both hold filenames +BTRFS_DIR_ITEM_KEY = 84 +BTRFS_DIR_INDEX_KEY = 96 +# these are the file extent pointers +BTRFS_EXTENT_DATA_KEY = 108 +# csums +BTRFS_EXTENT_CSUM_KEY = 128 +# root item for subvols and snapshots +BTRFS_ROOT_ITEM_KEY = 132 +# root item backrefs +BTRFS_ROOT_BACKREF_KEY = 144 +BTRFS_ROOT_REF_KEY = 156 +# each allocated extent has an extent item +BTRFS_EXTENT_ITEM_KEY = 168 +# optimized extents for metadata only +BTRFS_METADATA_ITEM_KEY = 169 +# backrefs for extents +BTRFS_TREE_BLOCK_REF_KEY = 176 +BTRFS_EXTENT_DATA_REF_KEY = 178 +BTRFS_EXTENT_REF_V0_KEY = 180 +BTRFS_SHARED_BLOCK_REF_KEY = 182 +BTRFS_SHARED_DATA_REF_KEY = 184 +# one of these for each block group +BTRFS_BLOCK_GROUP_ITEM_KEY = 192 +# dev extents records which part of each device is allocated +BTRFS_DEV_EXTENT_KEY = 204 +# dev items describe devs +BTRFS_DEV_ITEM_KEY = 216 +# one for each chunk +BTRFS_CHUNK_ITEM_KEY = 228 +# qgroup info +BTRFS_QGROUP_STATUS_KEY = 240 +BTRFS_QGROUP_INFO_KEY = 242 +BTRFS_QGROUP_LIMIT_KEY = 244 +BTRFS_QGROUP_RELATION_KEY = 246 +# records balance progress +BTRFS_BALANCE_ITEM_KEY = 248 +# stats on device errors +BTRFS_DEV_STATS_KEY = 249 +BTRFS_DEV_REPLACE_KEY = 250 +BTRFS_STRING_ITEM_KEY = 253 + +# in the kernel sources, this is flattened +# btrfs_ioctl_search_args_v2. It includes both the btrfs_ioctl_search_key +# and the buffer. We're using a 64K buffer size. +# +args_buffer_size = 65536 +class btrfs_ioctl_search_args(ctypes.Structure): + _pack_ = 1 + _fields_ = [ ("tree_id", ctypes.c_ulonglong), + ("min_objectid", ctypes.c_ulonglong), + ("max_objectid", ctypes.c_ulonglong), + ("min_offset", ctypes.c_ulonglong), + ("max_offset", ctypes.c_ulonglong), + ("min_transid", ctypes.c_ulonglong), + ("max_transid", ctypes.c_ulonglong), + ("min_type", ctypes.c_uint), + ("max_type", ctypes.c_uint), + ("nr_items", ctypes.c_uint), + ("unused", ctypes.c_uint), + ("unused1", ctypes.c_ulonglong), + ("unused2", ctypes.c_ulonglong), + ("unused3", ctypes.c_ulonglong), + ("unused4", ctypes.c_ulonglong), + ("buf_size", ctypes.c_ulonglong), + ("buf", ctypes.c_ubyte * args_buffer_size), + ] + +# the search ioctl resturns one header for each item +# +class btrfs_ioctl_search_header(ctypes.Structure): + _pack_ = 1 + _fields_ = [ ("transid", ctypes.c_ulonglong), + ("objectid", ctypes.c_ulonglong), + ("offset", ctypes.c_ulonglong), + ("type", ctypes.c_uint), + ("len", ctypes.c_uint), + ] + +# the type field in btrfs_file_extent_item +BTRFS_FILE_EXTENT_INLINE = 0 +BTRFS_FILE_EXTENT_REG = 1 +BTRFS_FILE_EXTENT_PREALLOC = 2 + +class btrfs_file_extent_item(ctypes.LittleEndianStructure): + _pack_ = 1 + _fields_ = [ ("generation", ctypes.c_ulonglong), + ("ram_bytes", ctypes.c_ulonglong), + ("compression", ctypes.c_ubyte), + ("encryption", ctypes.c_ubyte), + ("other_encoding", ctypes.c_ubyte * 2), + ("type", ctypes.c_ubyte), + ("disk_bytenr", ctypes.c_ulonglong), + ("disk_num_bytes", ctypes.c_ulonglong), + ("offset", ctypes.c_ulonglong), + ("num_bytes", ctypes.c_ulonglong), + ] + +class btrfs_ioctl_search(): + def __init__(self): + self.args = btrfs_ioctl_search_args() + self.args.tree_id = 0 + self.args.min_objectid = 0 + self.args.max_objectid = maxu64 + self.args.min_offset = 0 + self.args.max_offset = maxu64 + self.args.min_transid = 0 + self.args.max_transid = maxu64 + self.args.min_type = 0 + self.args.max_type = maxu32 + self.args.nr_items = 0 + self.args.buf_size = args_buffer_size + + # magic encoded for x86_64 this is the v2 search ioctl + self.ioctl_num = 3228603409L + + # the results of the search get stored into args.buf + def search(self, fd, nritems=65536): + self.args.nr_items = nritems + fcntl.ioctl(fd, self.ioctl_num, self.args, 1) + +# this moves the search key forward by one. If the end result is +# still a valid search key (all mins less than all maxes), we return +# True. Otherwise False +# +def advance_search(search): + if search.args.min_offset < maxu64: + search.args.min_offset += 1 + elif search.args.min_type < 255: + search.args.min_type += 1 + elif search.args.min_objectid < maxu64: + search.args.min_objectid += 1 + else: + return False + + if search.args.min_offset > search.args.max_offset: + return False + if search.args.min_type > search.args.max_type: + return False + if search.args.min_objectid > search.args.max_objectid: + return False + + return True + +# given one search_header and one file_item, print the details. This +# also tosses the [disk_bytenr,disk_num_bytes] into extent_hash to record +# which extents were used by this file +# +def print_one_extent(header, fi, extent_hash): + # we're ignoring inline items for now + if fi.type == BTRFS_FILE_EXTENT_INLINE: + # header.len is the length of the item returned. We subtract + # the part of the file item header that is actually used (21 bytes) + # and we get the length of the inlined data. + # this may or may not be compressed + inline_len = header.len - 21 + if fi.compression: + ram_bytes = fi.ram_bytes + else: + ram_bytes = inline_len + print "(%Lu %Lu): ram %Lu disk 0 disk_size %Lu -- inline" % \ + (header.objectid, header.offset, ram_bytes, inline_len) + extent_hash[-1] = inline_len + return + + if fi.disk_bytenr == 0: + tag = " -- hole" + else: + tag = "" + print "(%Lu %Lu): ram %Lu disk %Lu disk_size %Lu%s" % (header.objectid, + header.offset, fi.num_bytes, fi.disk_bytenr, fi.disk_num_bytes, tag) + + if fi.disk_bytenr: + extent_hash[fi.disk_bytenr] = fi.disk_num_bytes + +# open 'filename' and run the search ioctl against it, printing all the extents +# we find +def print_file_extents(filename): + extent_hash = {} + + s = btrfs_ioctl_search() + s.args.min_type = BTRFS_EXTENT_DATA_KEY + s.args.max_type = BTRFS_EXTENT_DATA_KEY + + try: + fd = os.open(filename, os.O_RDONLY) + st = os.fstat(fd) + except Exception, e: + sys.stderr.write("Failed to open %s (%s)\n" % (filename, e)) + return -1 + + if not stat.S_ISREG(st.st_mode): + sys.stderr.write("%s not a regular file\n" % filename) + return 0 + + s.args.min_objectid = st.st_ino + s.args.max_objectid = st.st_ino + + size = st.st_size + + while True: + try: + s.search(fd) + except Exception, e: + sys.stderr.write("Search ioctl failed for %s (%s)\n" % (filename, e)) + return -1 + + if s.args.nr_items == 0: + break + + # p is the results buffer from the kernel + p = ctypes.addressof(s.args.buf) + header = btrfs_ioctl_search_header() + header_size = ctypes.sizeof(header) + h = ctypes.addressof(header) + p_left = args_buffer_size + + for x in xrange(0, s.args.nr_items): + # for each item, copy the header from the buffer into + # our header struct. + ctypes.memmove(h, p, header_size) + p += header_size + p_left -= header_size + + # this would be a kernel bug it shouldn't be sending malformed + # items + if p_left <= 0: + break + + if header.type == BTRFS_EXTENT_DATA_KEY: + fi = btrfs_file_extent_item() + + # this would also be a kernel bug + if p_left < ctypes.sizeof(fi): + break + + # Copy the file item out of the results buffer + ctypes.memmove(ctypes.addressof(fi), p, ctypes.sizeof(fi)) + print_one_extent(header, fi, extent_hash) + + p += header.len + p_left -= header.len + if p_left <= 0: + break + + s.args.min_offset = header.offset + + if not advance_search(s): + break + + total_on_disk = 0 + total_extents = 0 + for x in extent_hash.itervalues(): + total_on_disk += x + total_extents += 1 + + # don't divide by zero + if total_on_disk == 0: + total_on_disk = 1 + + print "file: %s extents %Lu disk size %Lu logical size %Lu ratio %.2f" % \ + (filename, total_extents, total_on_disk, st.st_size, + float(st.st_size) / float(total_on_disk)) + return 0 + +if len(sys.argv) == 1: + sys.stderr.write("Usage: btrfs-debug filename ...\n") + sys.exit(1) + +for f in sys.argv[1:]: + print_file_extents(f) diff --git a/btrfs-find-root.c b/btrfs-find-root.c new file mode 100644 index 00000000..2d5bbb2a --- /dev/null +++ b/btrfs-find-root.c @@ -0,0 +1,222 @@ +/* + * Copyright (C) 2011 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <zlib.h> +#include <getopt.h> + +#include "kerncompat.h" +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" +#include "transaction.h" +#include "list.h" +#include "volumes.h" +#include "utils.h" +#include "crc32c.h" +#include "extent-cache.h" +#include "find-root.h" + +static void usage(void) +{ + fprintf(stderr, "Usage: find-roots [-a] [-o search_objectid] " + "[ -g search_generation ] [ -l search_level ] <device>\n"); +} + +/* + * Get reliable generation and level for given root. + * + * We have two sources of gen/level: superblock and tree root. + * superblock include the following level: + * Root, chunk, log + * and the following generations: + * Root, chunk, uuid + * Other gen/leven can only be read from its btrfs_tree_root if possible. + * + * Currently we only believe things from superblock. + */ +static void get_root_gen_and_level(u64 objectid, struct btrfs_fs_info *fs_info, + u64 *ret_gen, u8 *ret_level) +{ + struct btrfs_super_block *super = fs_info->super_copy; + u64 gen = (u64)-1; + u8 level = (u8)-1; + + switch (objectid) { + case BTRFS_ROOT_TREE_OBJECTID: + level = btrfs_super_root_level(super); + gen = btrfs_super_generation(super); + break; + case BTRFS_CHUNK_TREE_OBJECTID: + level = btrfs_super_chunk_root_level(super); + gen = btrfs_super_chunk_root_generation(super); + break; + case BTRFS_TREE_LOG_OBJECTID: + level = btrfs_super_log_root_level(super); + gen = btrfs_super_log_root_transid(super); + break; + case BTRFS_UUID_TREE_OBJECTID: + gen = btrfs_super_uuid_tree_generation(super); + break; + } + if (gen != (u64)-1) { + printf("Superblock thinks the generation is %llu\n", gen); + if (ret_gen) + *ret_gen = gen; + } else { + printf("Superblock doesn't contain generation info for root %llu\n", + objectid); + } + if (level != (u8)-1) { + printf("Superblock thinks the level is %u\n", level); + if (ret_level) + *ret_level = level; + } else { + printf("Superblock doesn't contain the level info for root %llu\n", + objectid); + } +} + +static void print_one_result(struct cache_extent *tree_block, + u8 level, u64 generation, + struct btrfs_find_root_filter *filter) +{ + int unsure = 0; + + if (filter->match_gen == (u64)-1 || filter->match_level == (u8)-1) + unsure = 1; + printf("Well block %llu(gen: %llu level: %u) seems good, ", + tree_block->start, generation, level); + if (unsure) + printf("but we are unsure about the correct generation/level\n"); + else if (level == filter->match_level && + generation == filter->match_gen) + printf("and it matches superblock\n"); + else + printf("but generation/level doesn't match, want gen: %llu level: %u\n", + filter->match_gen, filter->match_level); +} + +static void print_find_root_result(struct cache_tree *result, + struct btrfs_find_root_filter *filter) +{ + struct btrfs_find_root_gen_cache *gen_cache; + struct cache_extent *cache; + struct cache_extent *tree_block; + u64 generation = 0; + u8 level = 0; + + for (cache = last_cache_extent(result); + cache; cache = prev_cache_extent(cache)) { + gen_cache = container_of(cache, + struct btrfs_find_root_gen_cache, cache); + level = gen_cache->highest_level; + generation = cache->start; + /* For exact found one, skip it as it's output before */ + if (level == filter->match_level && + generation == filter->match_gen && + !filter->search_all) + continue; + for (tree_block = last_cache_extent(&gen_cache->eb_tree); + tree_block; tree_block = prev_cache_extent(tree_block)) + print_one_result(tree_block, level, generation, filter); + } +} + +int main(int argc, char **argv) +{ + struct btrfs_fs_info *fs_info; + struct btrfs_find_root_filter filter = {0}; + struct cache_tree result; + struct cache_extent *found; + int ret; + + /* Default to search root tree */ + filter.objectid = BTRFS_ROOT_TREE_OBJECTID; + filter.match_gen = (u64)-1; + filter.match_level = (u8)-1; + while (1) { + static const struct option long_options[] = { + { "help", no_argument, NULL, GETOPT_VAL_HELP}, + { NULL, 0, NULL, 0 } + }; + int c = getopt_long(argc, argv, "al:o:g:", long_options, NULL); + + if (c < 0) + break; + + switch (c) { + case 'a': + filter.search_all = 1; + break; + case 'o': + filter.objectid = arg_strtou64(optarg); + break; + case 'g': + filter.generation = arg_strtou64(optarg); + break; + case 'l': + filter.level = arg_strtou64(optarg); + break; + case GETOPT_VAL_HELP: + default: + usage(); + exit(c != GETOPT_VAL_HELP); + } + } + + set_argv0(argv); + argc = argc - optind; + if (check_argc_min(argc, 1)) { + usage(); + exit(1); + } + + fs_info = open_ctree_fs_info(argv[optind], 0, 0, + OPEN_CTREE_CHUNK_ROOT_ONLY | + OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR); + if (!fs_info) { + error("open ctree failed"); + exit(1); + } + cache_tree_init(&result); + + get_root_gen_and_level(filter.objectid, fs_info, + &filter.match_gen, &filter.match_level); + ret = btrfs_find_root_search(fs_info, &filter, &result, &found); + if (ret < 0) { + fprintf(stderr, "Fail to search the tree root: %s\n", + strerror(-ret)); + goto out; + } + if (ret > 0) { + printf("Found tree root at %llu gen %llu level %u\n", + found->start, filter.match_gen, filter.match_level); + ret = 0; + } + print_find_root_result(&result, &filter); +out: + btrfs_find_root_free(&result); + close_ctree_fs_info(fs_info); + btrfs_close_all_devices(); + return ret; +} diff --git a/btrfs-fragments.c b/btrfs-fragments.c new file mode 100644 index 00000000..17768c3f --- /dev/null +++ b/btrfs-fragments.c @@ -0,0 +1,452 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <dirent.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> +#include <libgen.h> +#include <limits.h> +#include <uuid/uuid.h> +#include <ctype.h> + +#include <gd.h> + +#undef ULONG_MAX + +#include "kerncompat.h" +#include "ctree.h" +#include "ioctl.h" +#include "utils.h" + +static int use_color; +static void +push_im(gdImagePtr im, char *name, char *dir) +{ + char fullname[2000]; + FILE *pngout; + + if (!im) + return; + + snprintf(fullname, sizeof(fullname), "%s/%s", dir, name); + pngout = fopen(fullname, "w"); + if (!pngout) { + printf("unable to create file %s\n", fullname); + exit(1); + } + + gdImagePng(im, pngout); + + fclose(pngout); + gdImageDestroy(im); +} + +static char * +chunk_type(u64 flags) +{ + switch (flags & (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_DATA | + BTRFS_BLOCK_GROUP_METADATA)) { + case BTRFS_BLOCK_GROUP_SYSTEM: + return "system"; + case BTRFS_BLOCK_GROUP_DATA: + return "data"; + case BTRFS_BLOCK_GROUP_METADATA: + return "metadata"; + case BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA: + return "mixed"; + default: + return "invalid"; + } +} + +static void +print_bg(FILE *html, char *name, u64 start, u64 len, u64 used, u64 flags, + u64 areas) +{ + double frag = (double)areas / (len / 4096) * 2; + + fprintf(html, "<p>%s chunk starts at %lld, size is %s, %.2f%% used, " + "%.2f%% fragmented</p>\n", chunk_type(flags), start, + pretty_size(len), 100.0 * used / len, 100.0 * frag); + fprintf(html, "<img src=\"%s\" border=\"1\" />\n", name); +} + +enum tree_colors { + COLOR_ROOT = 0, + COLOR_EXTENT, + COLOR_CHUNK, + COLOR_DEV, + COLOR_FS, + COLOR_CSUM, + COLOR_RELOC, + COLOR_DATA, + COLOR_UNKNOWN, + COLOR_MAX +}; + +static int +get_color(struct btrfs_extent_item *item, int len) +{ + u64 refs; + u64 flags; + u8 type; + u64 offset; + struct btrfs_extent_inline_ref *ref; + + refs = btrfs_stack_extent_refs(item); + flags = btrfs_stack_extent_flags(item); + + if (flags & BTRFS_EXTENT_FLAG_DATA) + return COLOR_DATA; + if (refs > 1) { + /* this must be an fs tree */ + return COLOR_FS; + } + + ref = (void *)item + sizeof(struct btrfs_extent_item) + + sizeof(struct btrfs_tree_block_info); + type = btrfs_stack_extent_inline_ref_type(ref); + offset = btrfs_stack_extent_inline_ref_offset(ref); + + switch (type) { + case BTRFS_EXTENT_DATA_REF_KEY: + return COLOR_DATA; + case BTRFS_SHARED_BLOCK_REF_KEY: + case BTRFS_SHARED_DATA_REF_KEY: + return COLOR_FS; + case BTRFS_TREE_BLOCK_REF_KEY: + break; + default: + return COLOR_UNKNOWN; + } + + switch (offset) { + case BTRFS_ROOT_TREE_OBJECTID: + return COLOR_ROOT; + case BTRFS_EXTENT_TREE_OBJECTID: + return COLOR_EXTENT; + case BTRFS_CHUNK_TREE_OBJECTID: + return COLOR_CHUNK; + case BTRFS_DEV_TREE_OBJECTID: + return COLOR_DEV; + case BTRFS_FS_TREE_OBJECTID: + return COLOR_FS; + case BTRFS_CSUM_TREE_OBJECTID: + return COLOR_CSUM; + case BTRFS_DATA_RELOC_TREE_OBJECTID: + return COLOR_RELOC; + } + + return COLOR_UNKNOWN; +} + +static void +init_colors(gdImagePtr im, int *colors) +{ + colors[COLOR_ROOT] = gdImageColorAllocate(im, 255, 0, 0); + colors[COLOR_EXTENT] = gdImageColorAllocate(im, 0, 255, 0); + colors[COLOR_CHUNK] = gdImageColorAllocate(im, 255, 0, 0); + colors[COLOR_DEV] = gdImageColorAllocate(im, 255, 0, 0); + colors[COLOR_FS] = gdImageColorAllocate(im, 0, 0, 0); + colors[COLOR_CSUM] = gdImageColorAllocate(im, 0, 0, 255); + colors[COLOR_RELOC] = gdImageColorAllocate(im, 128, 128, 128); + colors[COLOR_DATA] = gdImageColorAllocate(im, 100, 0, 0); + colors[COLOR_UNKNOWN] = gdImageColorAllocate(im, 50, 50, 50); +} + +int +list_fragments(int fd, u64 flags, char *dir) +{ + int ret; + struct btrfs_ioctl_search_args args; + struct btrfs_ioctl_search_key *sk = &args.key; + int i; + struct btrfs_ioctl_search_header *sh; + unsigned long off = 0; + int bgnum = 0; + u64 bgstart = 0; + u64 bglen = 0; + u64 bgend = 0; + u64 bgflags = 0; + u64 bgused = 0; + u64 saved_extent = 0; + u64 saved_len = 0; + int saved_color = 0; + u64 last_end = 0; + u64 areas = 0; + long px; + char name[1000]; + FILE *html; + int colors[COLOR_MAX]; + + gdImagePtr im = NULL; + int black = 0; + int width = 800; + + snprintf(name, sizeof(name), "%s/index.html", dir); + html = fopen(name, "w"); + if (!html) { + printf("unable to create %s\n", name); + exit(1); + } + + fprintf(html, "<html><header>\n"); + fprintf(html, "<title>Btrfs Block Group Allocation Map</title>\n"); + fprintf(html, "<style type=\"text/css\">\n"); + fprintf(html, "img {margin-left: 1em; margin-bottom: 2em;}\n"); + fprintf(html, "</style>\n"); + fprintf(html, "</header><body>\n"); + + memset(&args, 0, sizeof(args)); + + sk->tree_id = 2; + sk->max_type = -1; + sk->min_type = 0; + sk->max_objectid = (u64)-1; + sk->max_offset = (u64)-1; + sk->max_transid = (u64)-1; + + /* just a big number, doesn't matter much */ + sk->nr_items = 4096; + + while(1) { + ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args); + if (ret < 0) { + fprintf(stderr, "ERROR: can't perform the search\n"); + goto out_close; + } + /* the ioctl returns the number of item it found in nr_items */ + if (sk->nr_items == 0) + break; + + off = 0; + for (i = 0; i < sk->nr_items; i++) { + int j; + + sh = (struct btrfs_ioctl_search_header *)(args.buf + + off); + off += sizeof(*sh); + if (sh->type == BTRFS_BLOCK_GROUP_ITEM_KEY) { + struct btrfs_block_group_item *bg; + + if (im) { + push_im(im, name, dir); + im = NULL; + + print_bg(html, name, bgstart, bglen, + bgused, bgflags, areas); + } + + ++bgnum; + + bg = (struct btrfs_block_group_item *) + (args.buf + off); + bgflags = btrfs_block_group_flags(bg); + bgused = btrfs_block_group_used(bg); + + printf("found block group %lld len %lld " + "flags %lld\n", sh->objectid, + sh->offset, bgflags); + if (!(bgflags & flags)) { + /* skip this block group */ + sk->min_objectid = sh->objectid + + sh->offset; + sk->min_type = 0; + sk->min_offset = 0; + break; + } + im = gdImageCreate(width, + (sh->offset / 4096 + 799) / width); + + black = gdImageColorAllocate(im, 0, 0, 0); + + for (j = 0; j < ARRAY_SIZE(colors); ++j) + colors[j] = black; + + init_colors(im, colors); + bgstart = sh->objectid; + bglen = sh->offset; + bgend = bgstart + bglen; + + snprintf(name, sizeof(name), "bg%d.png", bgnum); + + last_end = bgstart; + if (saved_len) { + px = (saved_extent - bgstart) / 4096; + for (j = 0; j < saved_len / 4096; ++j) { + int x = (px + j) % width; + int y = (px + j) / width; + gdImageSetPixel(im, x, y, + saved_color); + } + last_end += saved_len; + } + areas = 0; + saved_len = 0; + } + if (im && sh->type == BTRFS_EXTENT_ITEM_KEY) { + int c; + struct btrfs_extent_item *item; + + item = (struct btrfs_extent_item *) + (args.buf + off); + + if (use_color) + c = colors[get_color(item, sh->len)]; + else + c = black; + if (sh->objectid > bgend) { + printf("WARN: extent %lld is without " + "block group\n", sh->objectid); + goto skip; + } + if (sh->objectid == bgend) { + saved_extent = sh->objectid; + saved_len = sh->offset; + saved_color = c; + goto skip; + } + px = (sh->objectid - bgstart) / 4096; + for (j = 0; j < sh->offset / 4096; ++j) { + int x = (px + j) % width; + int y = (px + j) / width; + gdImageSetPixel(im, x, y, c); + } + if (sh->objectid != last_end) + ++areas; + last_end = sh->objectid + sh->offset; +skip:; + } + off += sh->len; + + /* + * record the mins in sk so we can make sure the + * next search doesn't repeat this root + */ + sk->min_objectid = sh->objectid; + sk->min_type = sh->type; + sk->min_offset = sh->offset; + } + sk->nr_items = 4096; + + /* increment by one */ + if (++sk->min_offset == 0) + if (++sk->min_type == 0) + if (++sk->min_objectid == 0) + break; + } + + if (im) { + push_im(im, name, dir); + print_bg(html, name, bgstart, bglen, bgused, bgflags, areas); + } + + if (use_color) { + fprintf(html, "<p>"); + fprintf(html, "data - dark red, "); + fprintf(html, "fs tree - black, "); + fprintf(html, "extent tree - green, "); + fprintf(html, "csum tree - blue, "); + fprintf(html, "reloc tree - grey, "); + fprintf(html, "other trees - red, "); + fprintf(html, "unknown tree - dark grey"); + fprintf(html, "</p>"); + } + fprintf(html, "</body></html>\n"); + +out_close: + fclose(html); + + return ret; +} + +void +usage(void) +{ + printf("usage: btrfs-fragments [options] <path>\n"); + printf(" -c use color\n"); + printf(" -d print data chunks\n"); + printf(" -m print metadata chunks\n"); + printf(" -s print system chunks\n"); + printf(" (default is data+metadata)\n"); + printf(" -o <dir> output directory, default is html\n"); + exit(1); +} + +int main(int argc, char **argv) +{ + char *path; + int fd; + int ret; + u64 flags = 0; + char *dir = "html"; + DIR *dirstream = NULL; + + while (1) { + int c = getopt(argc, argv, "cmso:h"); + if (c < 0) + break; + switch (c) { + case 'c': + use_color = 1; + break; + case 'd': + flags |= BTRFS_BLOCK_GROUP_DATA; + break; + case 'm': + flags |= BTRFS_BLOCK_GROUP_METADATA; + break; + case 's': + flags |= BTRFS_BLOCK_GROUP_SYSTEM; + break; + case 'o': + dir = optarg; + break; + case 'h': + default: + usage(); + } + } + + set_argv0(argv); + argc = argc - optind; + if (check_argc_min(argc, 1)) { + usage(); + exit(1); + } + + path = argv[optind++]; + + fd = btrfs_open_dir(path, &dirstream, 1); + if (fd < 0) + exit(1); + + if (flags == 0) + flags = BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA; + + ret = list_fragments(fd, flags, dir); + close_file_or_dir(fd, dirstream); + if (ret) + exit(1); + + exit(0); +} diff --git a/btrfs-image.c b/btrfs-image.c new file mode 100644 index 00000000..c7fa18fb --- /dev/null +++ b/btrfs-image.c @@ -0,0 +1,2880 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <dirent.h> +#include <zlib.h> +#include <getopt.h> + +#include "kerncompat.h" +#include "crc32c.h" +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" +#include "utils.h" +#include "volumes.h" +#include "extent_io.h" + +#define HEADER_MAGIC 0xbd5c25e27295668bULL +#define MAX_PENDING_SIZE (256 * 1024) +#define BLOCK_SIZE 1024 +#define BLOCK_MASK (BLOCK_SIZE - 1) + +#define COMPRESS_NONE 0 +#define COMPRESS_ZLIB 1 + +struct meta_cluster_item { + __le64 bytenr; + __le32 size; +} __attribute__ ((__packed__)); + +struct meta_cluster_header { + __le64 magic; + __le64 bytenr; + __le32 nritems; + u8 compress; +} __attribute__ ((__packed__)); + +/* cluster header + index items + buffers */ +struct meta_cluster { + struct meta_cluster_header header; + struct meta_cluster_item items[]; +} __attribute__ ((__packed__)); + +#define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \ + sizeof(struct meta_cluster_item)) + +struct fs_chunk { + u64 logical; + u64 physical; + u64 bytes; + struct rb_node l; + struct rb_node p; + struct list_head list; +}; + +struct async_work { + struct list_head list; + struct list_head ordered; + u64 start; + u64 size; + u8 *buffer; + size_t bufsize; + int error; +}; + +struct metadump_struct { + struct btrfs_root *root; + FILE *out; + + struct meta_cluster *cluster; + + pthread_t *threads; + size_t num_threads; + pthread_mutex_t mutex; + pthread_cond_t cond; + struct rb_root name_tree; + + struct list_head list; + struct list_head ordered; + size_t num_items; + size_t num_ready; + + u64 pending_start; + u64 pending_size; + + int compress_level; + int done; + int data; + int sanitize_names; + + int error; +}; + +struct name { + struct rb_node n; + char *val; + char *sub; + u32 len; +}; + +struct mdrestore_struct { + FILE *in; + FILE *out; + + pthread_t *threads; + size_t num_threads; + pthread_mutex_t mutex; + pthread_cond_t cond; + + struct rb_root chunk_tree; + struct rb_root physical_tree; + struct list_head list; + struct list_head overlapping_chunks; + size_t num_items; + u32 leafsize; + u64 devid; + u64 alloced_chunks; + u64 last_physical_offset; + u8 uuid[BTRFS_UUID_SIZE]; + u8 fsid[BTRFS_FSID_SIZE]; + + int compress_method; + int done; + int error; + int old_restore; + int fixup_offset; + int multi_devices; + int clear_space_cache; + struct btrfs_fs_info *info; +}; + +static int search_for_chunk_blocks(struct mdrestore_struct *mdres, + u64 search, u64 cluster_bytenr); +static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size); + +static void csum_block(u8 *buf, size_t len) +{ + char result[BTRFS_CRC32_SIZE]; + u32 crc = ~(u32)0; + crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, result); + memcpy(buf, result, BTRFS_CRC32_SIZE); +} + +static int has_name(struct btrfs_key *key) +{ + switch (key->type) { + case BTRFS_DIR_ITEM_KEY: + case BTRFS_DIR_INDEX_KEY: + case BTRFS_INODE_REF_KEY: + case BTRFS_INODE_EXTREF_KEY: + case BTRFS_XATTR_ITEM_KEY: + return 1; + default: + break; + } + + return 0; +} + +static char *generate_garbage(u32 name_len) +{ + char *buf = malloc(name_len); + int i; + + if (!buf) + return NULL; + + for (i = 0; i < name_len; i++) { + char c = rand() % 94 + 33; + + if (c == '/') + c++; + buf[i] = c; + } + + return buf; +} + +static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz) +{ + struct name *entry = rb_entry(a, struct name, n); + struct name *ins = rb_entry(b, struct name, n); + u32 len; + + len = min(ins->len, entry->len); + return memcmp(ins->val, entry->val, len); +} + +static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz) +{ + struct fs_chunk *entry = rb_entry(a, struct fs_chunk, l); + struct fs_chunk *ins = rb_entry(b, struct fs_chunk, l); + + if (fuzz && ins->logical >= entry->logical && + ins->logical < entry->logical + entry->bytes) + return 0; + + if (ins->logical < entry->logical) + return -1; + else if (ins->logical > entry->logical) + return 1; + return 0; +} + +static int physical_cmp(struct rb_node *a, struct rb_node *b, int fuzz) +{ + struct fs_chunk *entry = rb_entry(a, struct fs_chunk, p); + struct fs_chunk *ins = rb_entry(b, struct fs_chunk, p); + + if (fuzz && ins->physical >= entry->physical && + ins->physical < entry->physical + entry->bytes) + return 0; + + if (fuzz && entry->physical >= ins->physical && + entry->physical < ins->physical + ins->bytes) + return 0; + + if (ins->physical < entry->physical) + return -1; + else if (ins->physical > entry->physical) + return 1; + return 0; +} + +static void tree_insert(struct rb_root *root, struct rb_node *ins, + int (*cmp)(struct rb_node *a, struct rb_node *b, + int fuzz)) +{ + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + int dir; + + while(*p) { + parent = *p; + + dir = cmp(*p, ins, 1); + if (dir < 0) + p = &(*p)->rb_left; + else if (dir > 0) + p = &(*p)->rb_right; + else + BUG(); + } + + rb_link_node(ins, parent, p); + rb_insert_color(ins, root); +} + +static struct rb_node *tree_search(struct rb_root *root, + struct rb_node *search, + int (*cmp)(struct rb_node *a, + struct rb_node *b, int fuzz), + int fuzz) +{ + struct rb_node *n = root->rb_node; + int dir; + + while (n) { + dir = cmp(n, search, fuzz); + if (dir < 0) + n = n->rb_left; + else if (dir > 0) + n = n->rb_right; + else + return n; + } + + return NULL; +} + +static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical, u64 *size) +{ + struct fs_chunk *fs_chunk; + struct rb_node *entry; + struct fs_chunk search; + u64 offset; + + if (logical == BTRFS_SUPER_INFO_OFFSET) + return logical; + + search.logical = logical; + entry = tree_search(&mdres->chunk_tree, &search.l, chunk_cmp, 1); + if (!entry) { + if (mdres->in != stdin) + printf("Couldn't find a chunk, using logical\n"); + return logical; + } + fs_chunk = rb_entry(entry, struct fs_chunk, l); + if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical) + BUG(); + offset = search.logical - fs_chunk->logical; + + *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical); + return fs_chunk->physical + offset; +} + + +static char *find_collision(struct metadump_struct *md, char *name, + u32 name_len) +{ + struct name *val; + struct rb_node *entry; + struct name tmp; + unsigned long checksum; + int found = 0; + int i; + + tmp.val = name; + tmp.len = name_len; + entry = tree_search(&md->name_tree, &tmp.n, name_cmp, 0); + if (entry) { + val = rb_entry(entry, struct name, n); + free(name); + return val->sub; + } + + val = malloc(sizeof(struct name)); + if (!val) { + fprintf(stderr, "Couldn't sanitize name, enomem\n"); + free(name); + return NULL; + } + + memset(val, 0, sizeof(*val)); + + val->val = name; + val->len = name_len; + val->sub = malloc(name_len); + if (!val->sub) { + fprintf(stderr, "Couldn't sanitize name, enomem\n"); + free(val); + free(name); + return NULL; + } + + checksum = crc32c(~1, val->val, name_len); + memset(val->sub, ' ', name_len); + i = 0; + while (1) { + if (crc32c(~1, val->sub, name_len) == checksum && + memcmp(val->sub, val->val, val->len)) { + found = 1; + break; + } + + if (val->sub[i] == 127) { + do { + i++; + if (i >= name_len) + break; + } while (val->sub[i] == 127); + + if (i >= name_len) + break; + val->sub[i]++; + if (val->sub[i] == '/') + val->sub[i]++; + memset(val->sub, ' ', i); + i = 0; + continue; + } else { + val->sub[i]++; + if (val->sub[i] == '/') + val->sub[i]++; + } + } + + if (!found) { + fprintf(stderr, "Couldn't find a collision for '%.*s', " + "generating normal garbage, it won't match indexes\n", + val->len, val->val); + for (i = 0; i < name_len; i++) { + char c = rand() % 94 + 33; + + if (c == '/') + c++; + val->sub[i] = c; + } + } + + tree_insert(&md->name_tree, &val->n, name_cmp); + return val->sub; +} + +static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb, + int slot) +{ + struct btrfs_dir_item *dir_item; + char *buf; + char *garbage; + unsigned long name_ptr; + u32 total_len; + u32 cur = 0; + u32 this_len; + u32 name_len; + int free_garbage = (md->sanitize_names == 1); + + dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); + total_len = btrfs_item_size_nr(eb, slot); + while (cur < total_len) { + this_len = sizeof(*dir_item) + + btrfs_dir_name_len(eb, dir_item) + + btrfs_dir_data_len(eb, dir_item); + name_ptr = (unsigned long)(dir_item + 1); + name_len = btrfs_dir_name_len(eb, dir_item); + + if (md->sanitize_names > 1) { + buf = malloc(name_len); + if (!buf) { + fprintf(stderr, "Couldn't sanitize name, " + "enomem\n"); + return; + } + read_extent_buffer(eb, buf, name_ptr, name_len); + garbage = find_collision(md, buf, name_len); + } else { + garbage = generate_garbage(name_len); + } + if (!garbage) { + fprintf(stderr, "Couldn't sanitize name, enomem\n"); + return; + } + write_extent_buffer(eb, garbage, name_ptr, name_len); + cur += this_len; + dir_item = (struct btrfs_dir_item *)((char *)dir_item + + this_len); + if (free_garbage) + free(garbage); + } +} + +static void sanitize_inode_ref(struct metadump_struct *md, + struct extent_buffer *eb, int slot, int ext) +{ + struct btrfs_inode_extref *extref; + struct btrfs_inode_ref *ref; + char *garbage, *buf; + unsigned long ptr; + unsigned long name_ptr; + u32 item_size; + u32 cur_offset = 0; + int len; + int free_garbage = (md->sanitize_names == 1); + + item_size = btrfs_item_size_nr(eb, slot); + ptr = btrfs_item_ptr_offset(eb, slot); + while (cur_offset < item_size) { + if (ext) { + extref = (struct btrfs_inode_extref *)(ptr + + cur_offset); + name_ptr = (unsigned long)(&extref->name); + len = btrfs_inode_extref_name_len(eb, extref); + cur_offset += sizeof(*extref); + } else { + ref = (struct btrfs_inode_ref *)(ptr + cur_offset); + len = btrfs_inode_ref_name_len(eb, ref); + name_ptr = (unsigned long)(ref + 1); + cur_offset += sizeof(*ref); + } + cur_offset += len; + + if (md->sanitize_names > 1) { + buf = malloc(len); + if (!buf) { + fprintf(stderr, "Couldn't sanitize name, " + "enomem\n"); + return; + } + read_extent_buffer(eb, buf, name_ptr, len); + garbage = find_collision(md, buf, len); + } else { + garbage = generate_garbage(len); + } + + if (!garbage) { + fprintf(stderr, "Couldn't sanitize name, enomem\n"); + return; + } + write_extent_buffer(eb, garbage, name_ptr, len); + if (free_garbage) + free(garbage); + } +} + +static void sanitize_xattr(struct metadump_struct *md, + struct extent_buffer *eb, int slot) +{ + struct btrfs_dir_item *dir_item; + unsigned long data_ptr; + u32 data_len; + + dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); + data_len = btrfs_dir_data_len(eb, dir_item); + + data_ptr = (unsigned long)((char *)(dir_item + 1) + + btrfs_dir_name_len(eb, dir_item)); + memset_extent_buffer(eb, 0, data_ptr, data_len); +} + +static void sanitize_name(struct metadump_struct *md, u8 *dst, + struct extent_buffer *src, struct btrfs_key *key, + int slot) +{ + struct extent_buffer *eb; + + eb = alloc_dummy_eb(src->start, src->len); + if (!eb) { + fprintf(stderr, "Couldn't sanitize name, no memory\n"); + return; + } + + memcpy(eb->data, dst, eb->len); + + switch (key->type) { + case BTRFS_DIR_ITEM_KEY: + case BTRFS_DIR_INDEX_KEY: + sanitize_dir_item(md, eb, slot); + break; + case BTRFS_INODE_REF_KEY: + sanitize_inode_ref(md, eb, slot, 0); + break; + case BTRFS_INODE_EXTREF_KEY: + sanitize_inode_ref(md, eb, slot, 1); + break; + case BTRFS_XATTR_ITEM_KEY: + sanitize_xattr(md, eb, slot); + break; + default: + break; + } + + memcpy(dst, eb->data, eb->len); + free(eb); +} + +/* + * zero inline extents and csum items + */ +static void zero_items(struct metadump_struct *md, u8 *dst, + struct extent_buffer *src) +{ + struct btrfs_file_extent_item *fi; + struct btrfs_item *item; + struct btrfs_key key; + u32 nritems = btrfs_header_nritems(src); + size_t size; + unsigned long ptr; + int i, extent_type; + + for (i = 0; i < nritems; i++) { + item = btrfs_item_nr(i); + btrfs_item_key_to_cpu(src, &key, i); + if (key.type == BTRFS_CSUM_ITEM_KEY) { + size = btrfs_item_size_nr(src, i); + memset(dst + btrfs_leaf_data(src) + + btrfs_item_offset_nr(src, i), 0, size); + continue; + } + + if (md->sanitize_names && has_name(&key)) { + sanitize_name(md, dst, src, &key, i); + continue; + } + + if (key.type != BTRFS_EXTENT_DATA_KEY) + continue; + + fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item); + extent_type = btrfs_file_extent_type(src, fi); + if (extent_type != BTRFS_FILE_EXTENT_INLINE) + continue; + + ptr = btrfs_file_extent_inline_start(fi); + size = btrfs_file_extent_inline_item_len(src, item); + memset(dst + ptr, 0, size); + } +} + +/* + * copy buffer and zero useless data in the buffer + */ +static void copy_buffer(struct metadump_struct *md, u8 *dst, + struct extent_buffer *src) +{ + int level; + size_t size; + u32 nritems; + + memcpy(dst, src->data, src->len); + if (src->start == BTRFS_SUPER_INFO_OFFSET) + return; + + level = btrfs_header_level(src); + nritems = btrfs_header_nritems(src); + + if (nritems == 0) { + size = sizeof(struct btrfs_header); + memset(dst + size, 0, src->len - size); + } else if (level == 0) { + size = btrfs_leaf_data(src) + + btrfs_item_offset_nr(src, nritems - 1) - + btrfs_item_nr_offset(nritems); + memset(dst + btrfs_item_nr_offset(nritems), 0, size); + zero_items(md, dst, src); + } else { + size = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nritems; + memset(dst + size, 0, src->len - size); + } + csum_block(dst, src->len); +} + +static void *dump_worker(void *data) +{ + struct metadump_struct *md = (struct metadump_struct *)data; + struct async_work *async; + int ret; + + while (1) { + pthread_mutex_lock(&md->mutex); + while (list_empty(&md->list)) { + if (md->done) { + pthread_mutex_unlock(&md->mutex); + goto out; + } + pthread_cond_wait(&md->cond, &md->mutex); + } + async = list_entry(md->list.next, struct async_work, list); + list_del_init(&async->list); + pthread_mutex_unlock(&md->mutex); + + if (md->compress_level > 0) { + u8 *orig = async->buffer; + + async->bufsize = compressBound(async->size); + async->buffer = malloc(async->bufsize); + if (!async->buffer) { + fprintf(stderr, "Error allocing buffer\n"); + pthread_mutex_lock(&md->mutex); + if (!md->error) + md->error = -ENOMEM; + pthread_mutex_unlock(&md->mutex); + pthread_exit(NULL); + } + + ret = compress2(async->buffer, + (unsigned long *)&async->bufsize, + orig, async->size, md->compress_level); + + if (ret != Z_OK) + async->error = 1; + + free(orig); + } + + pthread_mutex_lock(&md->mutex); + md->num_ready++; + pthread_mutex_unlock(&md->mutex); + } +out: + pthread_exit(NULL); +} + +static void meta_cluster_init(struct metadump_struct *md, u64 start) +{ + struct meta_cluster_header *header; + + md->num_items = 0; + md->num_ready = 0; + header = &md->cluster->header; + header->magic = cpu_to_le64(HEADER_MAGIC); + header->bytenr = cpu_to_le64(start); + header->nritems = cpu_to_le32(0); + header->compress = md->compress_level > 0 ? + COMPRESS_ZLIB : COMPRESS_NONE; +} + +static void metadump_destroy(struct metadump_struct *md, int num_threads) +{ + int i; + struct rb_node *n; + + pthread_mutex_lock(&md->mutex); + md->done = 1; + pthread_cond_broadcast(&md->cond); + pthread_mutex_unlock(&md->mutex); + + for (i = 0; i < num_threads; i++) + pthread_join(md->threads[i], NULL); + + pthread_cond_destroy(&md->cond); + pthread_mutex_destroy(&md->mutex); + + while ((n = rb_first(&md->name_tree))) { + struct name *name; + + name = rb_entry(n, struct name, n); + rb_erase(n, &md->name_tree); + free(name->val); + free(name->sub); + free(name); + } + free(md->threads); + free(md->cluster); +} + +static int metadump_init(struct metadump_struct *md, struct btrfs_root *root, + FILE *out, int num_threads, int compress_level, + int sanitize_names) +{ + int i, ret = 0; + + memset(md, 0, sizeof(*md)); + md->cluster = calloc(1, BLOCK_SIZE); + if (!md->cluster) + return -ENOMEM; + md->threads = calloc(num_threads, sizeof(pthread_t)); + if (!md->threads) { + free(md->cluster); + return -ENOMEM; + } + INIT_LIST_HEAD(&md->list); + INIT_LIST_HEAD(&md->ordered); + md->root = root; + md->out = out; + md->pending_start = (u64)-1; + md->compress_level = compress_level; + md->sanitize_names = sanitize_names; + if (sanitize_names > 1) + crc32c_optimization_init(); + + md->name_tree.rb_node = NULL; + md->num_threads = num_threads; + pthread_cond_init(&md->cond, NULL); + pthread_mutex_init(&md->mutex, NULL); + meta_cluster_init(md, 0); + + if (!num_threads) + return 0; + + for (i = 0; i < num_threads; i++) { + ret = pthread_create(md->threads + i, NULL, dump_worker, md); + if (ret) + break; + } + + if (ret) + metadump_destroy(md, i + 1); + + return ret; +} + +static int write_zero(FILE *out, size_t size) +{ + static char zero[BLOCK_SIZE]; + return fwrite(zero, size, 1, out); +} + +static int write_buffers(struct metadump_struct *md, u64 *next) +{ + struct meta_cluster_header *header = &md->cluster->header; + struct meta_cluster_item *item; + struct async_work *async; + u64 bytenr = 0; + u32 nritems = 0; + int ret; + int err = 0; + + if (list_empty(&md->ordered)) + goto out; + + /* wait until all buffers are compressed */ + while (!err && md->num_items > md->num_ready) { + struct timespec ts = { + .tv_sec = 0, + .tv_nsec = 10000000, + }; + pthread_mutex_unlock(&md->mutex); + nanosleep(&ts, NULL); + pthread_mutex_lock(&md->mutex); + err = md->error; + } + + if (err) { + fprintf(stderr, "One of the threads errored out %s\n", + strerror(err)); + goto out; + } + + /* setup and write index block */ + list_for_each_entry(async, &md->ordered, ordered) { + item = md->cluster->items + nritems; + item->bytenr = cpu_to_le64(async->start); + item->size = cpu_to_le32(async->bufsize); + nritems++; + } + header->nritems = cpu_to_le32(nritems); + + ret = fwrite(md->cluster, BLOCK_SIZE, 1, md->out); + if (ret != 1) { + fprintf(stderr, "Error writing out cluster: %d\n", errno); + return -EIO; + } + + /* write buffers */ + bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE; + while (!list_empty(&md->ordered)) { + async = list_entry(md->ordered.next, struct async_work, + ordered); + list_del_init(&async->ordered); + + bytenr += async->bufsize; + if (!err) + ret = fwrite(async->buffer, async->bufsize, 1, + md->out); + if (ret != 1) { + err = -EIO; + ret = 0; + fprintf(stderr, "Error writing out cluster: %d\n", + errno); + } + + free(async->buffer); + free(async); + } + + /* zero unused space in the last block */ + if (!err && bytenr & BLOCK_MASK) { + size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK); + + bytenr += size; + ret = write_zero(md->out, size); + if (ret != 1) { + fprintf(stderr, "Error zeroing out buffer: %d\n", + errno); + err = -EIO; + } + } +out: + *next = bytenr; + return err; +} + +static int read_data_extent(struct metadump_struct *md, + struct async_work *async) +{ + struct btrfs_root *root = md->root; + u64 bytes_left = async->size; + u64 logical = async->start; + u64 offset = 0; + u64 read_len; + int num_copies; + int cur_mirror; + int ret; + + num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, logical, + bytes_left); + + /* Try our best to read data, just like read_tree_block() */ + for (cur_mirror = 0; cur_mirror < num_copies; cur_mirror++) { + while (bytes_left) { + read_len = bytes_left; + ret = read_extent_data(root, + (char *)(async->buffer + offset), + logical, &read_len, cur_mirror); + if (ret < 0) + break; + offset += read_len; + logical += read_len; + bytes_left -= read_len; + } + } + if (bytes_left) + return -EIO; + return 0; +} + +static int get_dev_fd(struct btrfs_root *root) +{ + struct btrfs_device *dev; + + dev = list_first_entry(&root->fs_info->fs_devices->devices, + struct btrfs_device, dev_list); + return dev->fd; +} + +static int flush_pending(struct metadump_struct *md, int done) +{ + struct async_work *async = NULL; + struct extent_buffer *eb; + u64 blocksize = md->root->nodesize; + u64 start; + u64 size; + size_t offset; + int ret = 0; + + if (md->pending_size) { + async = calloc(1, sizeof(*async)); + if (!async) + return -ENOMEM; + + async->start = md->pending_start; + async->size = md->pending_size; + async->bufsize = async->size; + async->buffer = malloc(async->bufsize); + if (!async->buffer) { + free(async); + return -ENOMEM; + } + offset = 0; + start = async->start; + size = async->size; + + if (md->data) { + ret = read_data_extent(md, async); + if (ret) { + free(async->buffer); + free(async); + return ret; + } + } + + /* + * Balance can make the mapping not cover the super block, so + * just copy directly from one of the devices. + */ + if (start == BTRFS_SUPER_INFO_OFFSET) { + int fd = get_dev_fd(md->root); + + ret = pread64(fd, async->buffer, size, start); + if (ret < size) { + free(async->buffer); + free(async); + fprintf(stderr, "Error reading superblock\n"); + return -EIO; + } + size = 0; + ret = 0; + } + + while (!md->data && size > 0) { + u64 this_read = min(blocksize, size); + eb = read_tree_block(md->root, start, this_read, 0); + if (!extent_buffer_uptodate(eb)) { + free(async->buffer); + free(async); + fprintf(stderr, + "Error reading metadata block\n"); + return -EIO; + } + copy_buffer(md, async->buffer + offset, eb); + free_extent_buffer(eb); + start += this_read; + offset += this_read; + size -= this_read; + } + + md->pending_start = (u64)-1; + md->pending_size = 0; + } else if (!done) { + return 0; + } + + pthread_mutex_lock(&md->mutex); + if (async) { + list_add_tail(&async->ordered, &md->ordered); + md->num_items++; + if (md->compress_level > 0) { + list_add_tail(&async->list, &md->list); + pthread_cond_signal(&md->cond); + } else { + md->num_ready++; + } + } + if (md->num_items >= ITEMS_PER_CLUSTER || done) { + ret = write_buffers(md, &start); + if (ret) + fprintf(stderr, "Error writing buffers %d\n", + errno); + else + meta_cluster_init(md, start); + } + pthread_mutex_unlock(&md->mutex); + return ret; +} + +static int add_extent(u64 start, u64 size, struct metadump_struct *md, + int data) +{ + int ret; + if (md->data != data || + md->pending_size + size > MAX_PENDING_SIZE || + md->pending_start + md->pending_size != start) { + ret = flush_pending(md, 0); + if (ret) + return ret; + md->pending_start = start; + } + readahead_tree_block(md->root, start, size, 0); + md->pending_size += size; + md->data = data; + return 0; +} + +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 +static int is_tree_block(struct btrfs_root *extent_root, + struct btrfs_path *path, u64 bytenr) +{ + struct extent_buffer *leaf; + struct btrfs_key key; + u64 ref_objectid; + int ret; + + leaf = path->nodes[0]; + while (1) { + struct btrfs_extent_ref_v0 *ref_item; + path->slots[0]++; + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(extent_root, path); + if (ret < 0) + return ret; + if (ret > 0) + break; + leaf = path->nodes[0]; + } + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != bytenr) + break; + if (key.type != BTRFS_EXTENT_REF_V0_KEY) + continue; + ref_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref_v0); + ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item); + if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) + return 1; + break; + } + return 0; +} +#endif + +static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb, + struct metadump_struct *metadump, int root_tree) +{ + struct extent_buffer *tmp; + struct btrfs_root_item *ri; + struct btrfs_key key; + u64 bytenr; + int level; + int nritems = 0; + int i = 0; + int ret; + + ret = add_extent(btrfs_header_bytenr(eb), root->leafsize, metadump, 0); + if (ret) { + fprintf(stderr, "Error adding metadata block\n"); + return ret; + } + + if (btrfs_header_level(eb) == 0 && !root_tree) + return 0; + + level = btrfs_header_level(eb); + nritems = btrfs_header_nritems(eb); + for (i = 0; i < nritems; i++) { + if (level == 0) { + btrfs_item_key_to_cpu(eb, &key, i); + if (key.type != BTRFS_ROOT_ITEM_KEY) + continue; + ri = btrfs_item_ptr(eb, i, struct btrfs_root_item); + bytenr = btrfs_disk_root_bytenr(eb, ri); + tmp = read_tree_block(root, bytenr, root->leafsize, 0); + if (!extent_buffer_uptodate(tmp)) { + fprintf(stderr, + "Error reading log root block\n"); + return -EIO; + } + ret = copy_tree_blocks(root, tmp, metadump, 0); + free_extent_buffer(tmp); + if (ret) + return ret; + } else { + bytenr = btrfs_node_blockptr(eb, i); + tmp = read_tree_block(root, bytenr, root->leafsize, 0); + if (!extent_buffer_uptodate(tmp)) { + fprintf(stderr, "Error reading log block\n"); + return -EIO; + } + ret = copy_tree_blocks(root, tmp, metadump, root_tree); + free_extent_buffer(tmp); + if (ret) + return ret; + } + } + + return 0; +} + +static int copy_log_trees(struct btrfs_root *root, + struct metadump_struct *metadump, + struct btrfs_path *path) +{ + u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy); + + if (blocknr == 0) + return 0; + + if (!root->fs_info->log_root_tree || + !root->fs_info->log_root_tree->node) { + fprintf(stderr, "Error copying tree log, it wasn't setup\n"); + return -EIO; + } + + return copy_tree_blocks(root, root->fs_info->log_root_tree->node, + metadump, 1); +} + +static int copy_space_cache(struct btrfs_root *root, + struct metadump_struct *metadump, + struct btrfs_path *path) +{ + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + struct btrfs_key key; + u64 bytenr, num_bytes; + int ret; + + root = root->fs_info->tree_root; + + key.objectid = 0; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + fprintf(stderr, "Error searching for free space inode %d\n", + ret); + return ret; + } + + leaf = path->nodes[0]; + + while (1) { + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) { + fprintf(stderr, "Error going to next leaf " + "%d\n", ret); + return ret; + } + if (ret > 0) + break; + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.type != BTRFS_EXTENT_DATA_KEY) { + path->slots[0]++; + continue; + } + + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, fi) != + BTRFS_FILE_EXTENT_REG) { + path->slots[0]++; + continue; + } + + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); + ret = add_extent(bytenr, num_bytes, metadump, 1); + if (ret) { + fprintf(stderr, "Error adding space cache blocks %d\n", + ret); + btrfs_release_path(path); + return ret; + } + path->slots[0]++; + } + + return 0; +} + +static int copy_from_extent_tree(struct metadump_struct *metadump, + struct btrfs_path *path) +{ + struct btrfs_root *extent_root; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + struct btrfs_key key; + u64 bytenr; + u64 num_bytes; + int ret; + + extent_root = metadump->root->fs_info->extent_root; + bytenr = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE; + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); + if (ret < 0) { + fprintf(stderr, "Error searching extent root %d\n", ret); + return ret; + } + ret = 0; + + leaf = path->nodes[0]; + + while (1) { + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(extent_root, path); + if (ret < 0) { + fprintf(stderr, "Error going to next leaf %d" + "\n", ret); + break; + } + if (ret > 0) { + ret = 0; + break; + } + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid < bytenr || + (key.type != BTRFS_EXTENT_ITEM_KEY && + key.type != BTRFS_METADATA_ITEM_KEY)) { + path->slots[0]++; + continue; + } + + bytenr = key.objectid; + if (key.type == BTRFS_METADATA_ITEM_KEY) + num_bytes = extent_root->leafsize; + else + num_bytes = key.offset; + + if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) { + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_item); + if (btrfs_extent_flags(leaf, ei) & + BTRFS_EXTENT_FLAG_TREE_BLOCK) { + ret = add_extent(bytenr, num_bytes, metadump, + 0); + if (ret) { + fprintf(stderr, "Error adding block " + "%d\n", ret); + break; + } + } + } else { +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + ret = is_tree_block(extent_root, path, bytenr); + if (ret < 0) { + fprintf(stderr, "Error checking tree block " + "%d\n", ret); + break; + } + + if (ret) { + ret = add_extent(bytenr, num_bytes, metadump, + 0); + if (ret) { + fprintf(stderr, "Error adding block " + "%d\n", ret); + break; + } + } + ret = 0; +#else + fprintf(stderr, "Either extent tree corruption or " + "you haven't built with V0 support\n"); + ret = -EIO; + break; +#endif + } + bytenr += num_bytes; + } + + btrfs_release_path(path); + + return ret; +} + +static int create_metadump(const char *input, FILE *out, int num_threads, + int compress_level, int sanitize, int walk_trees) +{ + struct btrfs_root *root; + struct btrfs_path *path = NULL; + struct metadump_struct metadump; + int ret; + int err = 0; + + root = open_ctree(input, 0, 0); + if (!root) { + fprintf(stderr, "Open ctree failed\n"); + return -EIO; + } + + BUG_ON(root->nodesize != root->leafsize); + + ret = metadump_init(&metadump, root, out, num_threads, + compress_level, sanitize); + if (ret) { + fprintf(stderr, "Error initing metadump %d\n", ret); + close_ctree(root); + return ret; + } + + ret = add_extent(BTRFS_SUPER_INFO_OFFSET, BTRFS_SUPER_INFO_SIZE, + &metadump, 0); + if (ret) { + fprintf(stderr, "Error adding metadata %d\n", ret); + err = ret; + goto out; + } + + path = btrfs_alloc_path(); + if (!path) { + fprintf(stderr, "Out of memory allocing path\n"); + err = -ENOMEM; + goto out; + } + + if (walk_trees) { + ret = copy_tree_blocks(root, root->fs_info->chunk_root->node, + &metadump, 1); + if (ret) { + err = ret; + goto out; + } + + ret = copy_tree_blocks(root, root->fs_info->tree_root->node, + &metadump, 1); + if (ret) { + err = ret; + goto out; + } + } else { + ret = copy_from_extent_tree(&metadump, path); + if (ret) { + err = ret; + goto out; + } + } + + ret = copy_log_trees(root, &metadump, path); + if (ret) { + err = ret; + goto out; + } + + ret = copy_space_cache(root, &metadump, path); +out: + ret = flush_pending(&metadump, 1); + if (ret) { + if (!err) + err = ret; + fprintf(stderr, "Error flushing pending %d\n", ret); + } + + metadump_destroy(&metadump, num_threads); + + btrfs_free_path(path); + ret = close_ctree(root); + return err ? err : ret; +} + +static void update_super_old(u8 *buffer) +{ + struct btrfs_super_block *super = (struct btrfs_super_block *)buffer; + struct btrfs_chunk *chunk; + struct btrfs_disk_key *key; + u32 sectorsize = btrfs_super_sectorsize(super); + u64 flags = btrfs_super_flags(super); + + flags |= BTRFS_SUPER_FLAG_METADUMP; + btrfs_set_super_flags(super, flags); + + key = (struct btrfs_disk_key *)(super->sys_chunk_array); + chunk = (struct btrfs_chunk *)(super->sys_chunk_array + + sizeof(struct btrfs_disk_key)); + + btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID); + btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY); + btrfs_set_disk_key_offset(key, 0); + + btrfs_set_stack_chunk_length(chunk, (u64)-1); + btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID); + btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN); + btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM); + btrfs_set_stack_chunk_io_align(chunk, sectorsize); + btrfs_set_stack_chunk_io_width(chunk, sectorsize); + btrfs_set_stack_chunk_sector_size(chunk, sectorsize); + btrfs_set_stack_chunk_num_stripes(chunk, 1); + btrfs_set_stack_chunk_sub_stripes(chunk, 0); + chunk->stripe.devid = super->dev_item.devid; + btrfs_set_stack_stripe_offset(&chunk->stripe, 0); + memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE); + btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk)); + csum_block(buffer, BTRFS_SUPER_INFO_SIZE); +} + +static int update_super(struct mdrestore_struct *mdres, u8 *buffer) +{ + struct btrfs_super_block *super = (struct btrfs_super_block *)buffer; + struct btrfs_chunk *chunk; + struct btrfs_disk_key *disk_key; + struct btrfs_key key; + u64 flags = btrfs_super_flags(super); + u32 new_array_size = 0; + u32 array_size; + u32 cur = 0; + u8 *ptr, *write_ptr; + int old_num_stripes; + + write_ptr = ptr = super->sys_chunk_array; + array_size = btrfs_super_sys_array_size(super); + + while (cur < array_size) { + disk_key = (struct btrfs_disk_key *)ptr; + btrfs_disk_key_to_cpu(&key, disk_key); + + new_array_size += sizeof(*disk_key); + memmove(write_ptr, ptr, sizeof(*disk_key)); + + write_ptr += sizeof(*disk_key); + ptr += sizeof(*disk_key); + cur += sizeof(*disk_key); + + if (key.type == BTRFS_CHUNK_ITEM_KEY) { + u64 physical, size = 0; + + chunk = (struct btrfs_chunk *)ptr; + old_num_stripes = btrfs_stack_chunk_num_stripes(chunk); + chunk = (struct btrfs_chunk *)write_ptr; + + memmove(write_ptr, ptr, sizeof(*chunk)); + btrfs_set_stack_chunk_num_stripes(chunk, 1); + btrfs_set_stack_chunk_sub_stripes(chunk, 0); + btrfs_set_stack_chunk_type(chunk, + BTRFS_BLOCK_GROUP_SYSTEM); + chunk->stripe.devid = super->dev_item.devid; + physical = logical_to_physical(mdres, key.offset, + &size); + if (size != (u64)-1) + btrfs_set_stack_stripe_offset(&chunk->stripe, + physical); + memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, + BTRFS_UUID_SIZE); + new_array_size += sizeof(*chunk); + } else { + fprintf(stderr, "Bogus key in the sys chunk array " + "%d\n", key.type); + return -EIO; + } + write_ptr += sizeof(*chunk); + ptr += btrfs_chunk_item_size(old_num_stripes); + cur += btrfs_chunk_item_size(old_num_stripes); + } + + if (mdres->clear_space_cache) + btrfs_set_super_cache_generation(super, 0); + + flags |= BTRFS_SUPER_FLAG_METADUMP_V2; + btrfs_set_super_flags(super, flags); + btrfs_set_super_sys_array_size(super, new_array_size); + csum_block(buffer, BTRFS_SUPER_INFO_SIZE); + + return 0; +} + +static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size) +{ + struct extent_buffer *eb; + + eb = calloc(1, sizeof(struct extent_buffer) + size); + if (!eb) + return NULL; + + eb->start = bytenr; + eb->len = size; + return eb; +} + +static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size) +{ + struct btrfs_item *item; + u32 nritems; + u32 old_size; + u32 old_data_start; + u32 size_diff; + u32 data_end; + int i; + + old_size = btrfs_item_size_nr(eb, slot); + if (old_size == new_size) + return; + + nritems = btrfs_header_nritems(eb); + data_end = btrfs_item_offset_nr(eb, nritems - 1); + + old_data_start = btrfs_item_offset_nr(eb, slot); + size_diff = old_size - new_size; + + for (i = slot; i < nritems; i++) { + u32 ioff; + item = btrfs_item_nr(i); + ioff = btrfs_item_offset(eb, item); + btrfs_set_item_offset(eb, item, ioff + size_diff); + } + + memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff, + btrfs_leaf_data(eb) + data_end, + old_data_start + new_size - data_end); + item = btrfs_item_nr(slot); + btrfs_set_item_size(eb, item, new_size); +} + +static int fixup_chunk_tree_block(struct mdrestore_struct *mdres, + struct async_work *async, u8 *buffer, + size_t size) +{ + struct extent_buffer *eb; + size_t size_left = size; + u64 bytenr = async->start; + int i; + + if (size_left % mdres->leafsize) + return 0; + + eb = alloc_dummy_eb(bytenr, mdres->leafsize); + if (!eb) + return -ENOMEM; + + while (size_left) { + eb->start = bytenr; + memcpy(eb->data, buffer, mdres->leafsize); + + if (btrfs_header_bytenr(eb) != bytenr) + break; + if (memcmp(mdres->fsid, + eb->data + offsetof(struct btrfs_header, fsid), + BTRFS_FSID_SIZE)) + break; + + if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) + goto next; + + if (btrfs_header_level(eb) != 0) + goto next; + + for (i = 0; i < btrfs_header_nritems(eb); i++) { + struct btrfs_chunk chunk; + struct btrfs_key key; + u64 type, physical, size = (u64)-1; + + btrfs_item_key_to_cpu(eb, &key, i); + if (key.type != BTRFS_CHUNK_ITEM_KEY) + continue; + truncate_item(eb, i, sizeof(chunk)); + read_extent_buffer(eb, &chunk, + btrfs_item_ptr_offset(eb, i), + sizeof(chunk)); + + size = 0; + physical = logical_to_physical(mdres, key.offset, + &size); + + /* Zero out the RAID profile */ + type = btrfs_stack_chunk_type(&chunk); + type &= (BTRFS_BLOCK_GROUP_DATA | + BTRFS_BLOCK_GROUP_SYSTEM | + BTRFS_BLOCK_GROUP_METADATA | + BTRFS_BLOCK_GROUP_DUP); + btrfs_set_stack_chunk_type(&chunk, type); + + btrfs_set_stack_chunk_num_stripes(&chunk, 1); + btrfs_set_stack_chunk_sub_stripes(&chunk, 0); + btrfs_set_stack_stripe_devid(&chunk.stripe, mdres->devid); + if (size != (u64)-1) + btrfs_set_stack_stripe_offset(&chunk.stripe, + physical); + memcpy(chunk.stripe.dev_uuid, mdres->uuid, + BTRFS_UUID_SIZE); + write_extent_buffer(eb, &chunk, + btrfs_item_ptr_offset(eb, i), + sizeof(chunk)); + } + memcpy(buffer, eb->data, eb->len); + csum_block(buffer, eb->len); +next: + size_left -= mdres->leafsize; + buffer += mdres->leafsize; + bytenr += mdres->leafsize; + } + + free(eb); + return 0; +} + +static void write_backup_supers(int fd, u8 *buf) +{ + struct btrfs_super_block *super = (struct btrfs_super_block *)buf; + struct stat st; + u64 size; + u64 bytenr; + int i; + int ret; + + if (fstat(fd, &st)) { + fprintf(stderr, "Couldn't stat restore point, won't be able " + "to write backup supers: %d\n", errno); + return; + } + + size = btrfs_device_size(fd, &st); + + for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + if (bytenr + BTRFS_SUPER_INFO_SIZE > size) + break; + btrfs_set_super_bytenr(super, bytenr); + csum_block(buf, BTRFS_SUPER_INFO_SIZE); + ret = pwrite64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr); + if (ret < BTRFS_SUPER_INFO_SIZE) { + if (ret < 0) + fprintf(stderr, "Problem writing out backup " + "super block %d, err %d\n", i, errno); + else + fprintf(stderr, "Short write writing out " + "backup super block\n"); + break; + } + } +} + +static void *restore_worker(void *data) +{ + struct mdrestore_struct *mdres = (struct mdrestore_struct *)data; + struct async_work *async; + size_t size; + u8 *buffer; + u8 *outbuf; + int outfd; + int ret; + int compress_size = MAX_PENDING_SIZE * 4; + + outfd = fileno(mdres->out); + buffer = malloc(compress_size); + if (!buffer) { + fprintf(stderr, "Error allocing buffer\n"); + pthread_mutex_lock(&mdres->mutex); + if (!mdres->error) + mdres->error = -ENOMEM; + pthread_mutex_unlock(&mdres->mutex); + pthread_exit(NULL); + } + + while (1) { + u64 bytenr; + off_t offset = 0; + int err = 0; + + pthread_mutex_lock(&mdres->mutex); + while (!mdres->leafsize || list_empty(&mdres->list)) { + if (mdres->done) { + pthread_mutex_unlock(&mdres->mutex); + goto out; + } + pthread_cond_wait(&mdres->cond, &mdres->mutex); + } + async = list_entry(mdres->list.next, struct async_work, list); + list_del_init(&async->list); + pthread_mutex_unlock(&mdres->mutex); + + if (mdres->compress_method == COMPRESS_ZLIB) { + size = compress_size; + ret = uncompress(buffer, (unsigned long *)&size, + async->buffer, async->bufsize); + if (ret != Z_OK) { + fprintf(stderr, "Error decompressing %d\n", + ret); + err = -EIO; + } + outbuf = buffer; + } else { + outbuf = async->buffer; + size = async->bufsize; + } + + if (!mdres->multi_devices) { + if (async->start == BTRFS_SUPER_INFO_OFFSET) { + if (mdres->old_restore) { + update_super_old(outbuf); + } else { + ret = update_super(mdres, outbuf); + if (ret) + err = ret; + } + } else if (!mdres->old_restore) { + ret = fixup_chunk_tree_block(mdres, async, outbuf, size); + if (ret) + err = ret; + } + } + + if (!mdres->fixup_offset) { + while (size) { + u64 chunk_size = size; + if (!mdres->multi_devices && !mdres->old_restore) + bytenr = logical_to_physical(mdres, + async->start + offset, + &chunk_size); + else + bytenr = async->start + offset; + + ret = pwrite64(outfd, outbuf+offset, chunk_size, + bytenr); + if (ret != chunk_size) { + if (ret < 0) { + fprintf(stderr, "Error writing to " + "device %d\n", errno); + err = errno; + break; + } else { + fprintf(stderr, "Short write\n"); + err = -EIO; + break; + } + } + size -= chunk_size; + offset += chunk_size; + } + } else if (async->start != BTRFS_SUPER_INFO_OFFSET) { + ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0); + if (ret) { + printk("Error write data\n"); + exit(1); + } + } + + + /* backup super blocks are already there at fixup_offset stage */ + if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET) + write_backup_supers(outfd, outbuf); + + pthread_mutex_lock(&mdres->mutex); + if (err && !mdres->error) + mdres->error = err; + mdres->num_items--; + pthread_mutex_unlock(&mdres->mutex); + + free(async->buffer); + free(async); + } +out: + free(buffer); + pthread_exit(NULL); +} + +static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads) +{ + struct rb_node *n; + int i; + + while ((n = rb_first(&mdres->chunk_tree))) { + struct fs_chunk *entry; + + entry = rb_entry(n, struct fs_chunk, l); + rb_erase(n, &mdres->chunk_tree); + rb_erase(&entry->p, &mdres->physical_tree); + free(entry); + } + pthread_mutex_lock(&mdres->mutex); + mdres->done = 1; + pthread_cond_broadcast(&mdres->cond); + pthread_mutex_unlock(&mdres->mutex); + + for (i = 0; i < num_threads; i++) + pthread_join(mdres->threads[i], NULL); + + pthread_cond_destroy(&mdres->cond); + pthread_mutex_destroy(&mdres->mutex); + free(mdres->threads); +} + +static int mdrestore_init(struct mdrestore_struct *mdres, + FILE *in, FILE *out, int old_restore, + int num_threads, int fixup_offset, + struct btrfs_fs_info *info, int multi_devices) +{ + int i, ret = 0; + + memset(mdres, 0, sizeof(*mdres)); + pthread_cond_init(&mdres->cond, NULL); + pthread_mutex_init(&mdres->mutex, NULL); + INIT_LIST_HEAD(&mdres->list); + INIT_LIST_HEAD(&mdres->overlapping_chunks); + mdres->in = in; + mdres->out = out; + mdres->old_restore = old_restore; + mdres->chunk_tree.rb_node = NULL; + mdres->fixup_offset = fixup_offset; + mdres->info = info; + mdres->multi_devices = multi_devices; + mdres->clear_space_cache = 0; + mdres->last_physical_offset = 0; + mdres->alloced_chunks = 0; + + if (!num_threads) + return 0; + + mdres->num_threads = num_threads; + mdres->threads = calloc(num_threads, sizeof(pthread_t)); + if (!mdres->threads) + return -ENOMEM; + for (i = 0; i < num_threads; i++) { + ret = pthread_create(mdres->threads + i, NULL, restore_worker, + mdres); + if (ret) + break; + } + if (ret) + mdrestore_destroy(mdres, i + 1); + return ret; +} + +static int fill_mdres_info(struct mdrestore_struct *mdres, + struct async_work *async) +{ + struct btrfs_super_block *super; + u8 *buffer = NULL; + u8 *outbuf; + int ret; + + /* We've already been initialized */ + if (mdres->leafsize) + return 0; + + if (mdres->compress_method == COMPRESS_ZLIB) { + size_t size = MAX_PENDING_SIZE * 2; + + buffer = malloc(MAX_PENDING_SIZE * 2); + if (!buffer) + return -ENOMEM; + ret = uncompress(buffer, (unsigned long *)&size, + async->buffer, async->bufsize); + if (ret != Z_OK) { + fprintf(stderr, "Error decompressing %d\n", ret); + free(buffer); + return -EIO; + } + outbuf = buffer; + } else { + outbuf = async->buffer; + } + + super = (struct btrfs_super_block *)outbuf; + mdres->leafsize = btrfs_super_leafsize(super); + memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE); + memcpy(mdres->uuid, super->dev_item.uuid, + BTRFS_UUID_SIZE); + mdres->devid = le64_to_cpu(super->dev_item.devid); + free(buffer); + return 0; +} + +static int add_cluster(struct meta_cluster *cluster, + struct mdrestore_struct *mdres, u64 *next) +{ + struct meta_cluster_item *item; + struct meta_cluster_header *header = &cluster->header; + struct async_work *async; + u64 bytenr; + u32 i, nritems; + int ret; + + mdres->compress_method = header->compress; + + bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE; + nritems = le32_to_cpu(header->nritems); + for (i = 0; i < nritems; i++) { + item = &cluster->items[i]; + async = calloc(1, sizeof(*async)); + if (!async) { + fprintf(stderr, "Error allocating async\n"); + return -ENOMEM; + } + async->start = le64_to_cpu(item->bytenr); + async->bufsize = le32_to_cpu(item->size); + async->buffer = malloc(async->bufsize); + if (!async->buffer) { + fprintf(stderr, "Error allocing async buffer\n"); + free(async); + return -ENOMEM; + } + ret = fread(async->buffer, async->bufsize, 1, mdres->in); + if (ret != 1) { + fprintf(stderr, "Error reading buffer %d\n", errno); + free(async->buffer); + free(async); + return -EIO; + } + bytenr += async->bufsize; + + pthread_mutex_lock(&mdres->mutex); + if (async->start == BTRFS_SUPER_INFO_OFFSET) { + ret = fill_mdres_info(mdres, async); + if (ret) { + fprintf(stderr, "Error setting up restore\n"); + pthread_mutex_unlock(&mdres->mutex); + free(async->buffer); + free(async); + return ret; + } + } + list_add_tail(&async->list, &mdres->list); + mdres->num_items++; + pthread_cond_signal(&mdres->cond); + pthread_mutex_unlock(&mdres->mutex); + } + if (bytenr & BLOCK_MASK) { + char buffer[BLOCK_MASK]; + size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK); + + bytenr += size; + ret = fread(buffer, size, 1, mdres->in); + if (ret != 1) { + fprintf(stderr, "Error reading in buffer %d\n", errno); + return -EIO; + } + } + *next = bytenr; + return 0; +} + +static int wait_for_worker(struct mdrestore_struct *mdres) +{ + int ret = 0; + + pthread_mutex_lock(&mdres->mutex); + ret = mdres->error; + while (!ret && mdres->num_items > 0) { + struct timespec ts = { + .tv_sec = 0, + .tv_nsec = 10000000, + }; + pthread_mutex_unlock(&mdres->mutex); + nanosleep(&ts, NULL); + pthread_mutex_lock(&mdres->mutex); + ret = mdres->error; + } + pthread_mutex_unlock(&mdres->mutex); + return ret; +} + +static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer, + u64 bytenr, u64 item_bytenr, u32 bufsize, + u64 cluster_bytenr) +{ + struct extent_buffer *eb; + int ret = 0; + int i; + + eb = alloc_dummy_eb(bytenr, mdres->leafsize); + if (!eb) { + ret = -ENOMEM; + goto out; + } + + while (item_bytenr != bytenr) { + buffer += mdres->leafsize; + item_bytenr += mdres->leafsize; + } + + memcpy(eb->data, buffer, mdres->leafsize); + if (btrfs_header_bytenr(eb) != bytenr) { + fprintf(stderr, "Eb bytenr doesn't match found bytenr\n"); + ret = -EIO; + goto out; + } + + if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid), + BTRFS_FSID_SIZE)) { + fprintf(stderr, "Fsid doesn't match\n"); + ret = -EIO; + goto out; + } + + if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) { + fprintf(stderr, "Does not belong to the chunk tree\n"); + ret = -EIO; + goto out; + } + + for (i = 0; i < btrfs_header_nritems(eb); i++) { + struct btrfs_chunk chunk; + struct fs_chunk *fs_chunk; + struct btrfs_key key; + + if (btrfs_header_level(eb)) { + u64 blockptr = btrfs_node_blockptr(eb, i); + + ret = search_for_chunk_blocks(mdres, blockptr, + cluster_bytenr); + if (ret) + break; + continue; + } + + /* Yay a leaf! We loves leafs! */ + btrfs_item_key_to_cpu(eb, &key, i); + if (key.type != BTRFS_CHUNK_ITEM_KEY) + continue; + + fs_chunk = malloc(sizeof(struct fs_chunk)); + if (!fs_chunk) { + fprintf(stderr, "Erorr allocating chunk\n"); + ret = -ENOMEM; + break; + } + memset(fs_chunk, 0, sizeof(*fs_chunk)); + read_extent_buffer(eb, &chunk, btrfs_item_ptr_offset(eb, i), + sizeof(chunk)); + + fs_chunk->logical = key.offset; + fs_chunk->physical = btrfs_stack_stripe_offset(&chunk.stripe); + fs_chunk->bytes = btrfs_stack_chunk_length(&chunk); + INIT_LIST_HEAD(&fs_chunk->list); + if (tree_search(&mdres->physical_tree, &fs_chunk->p, + physical_cmp, 1) != NULL) + list_add(&fs_chunk->list, &mdres->overlapping_chunks); + else + tree_insert(&mdres->physical_tree, &fs_chunk->p, + physical_cmp); + if (fs_chunk->physical + fs_chunk->bytes > + mdres->last_physical_offset) + mdres->last_physical_offset = fs_chunk->physical + + fs_chunk->bytes; + mdres->alloced_chunks += fs_chunk->bytes; + tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp); + } +out: + free(eb); + return ret; +} + +/* If you have to ask you aren't worthy */ +static int search_for_chunk_blocks(struct mdrestore_struct *mdres, + u64 search, u64 cluster_bytenr) +{ + struct meta_cluster *cluster; + struct meta_cluster_header *header; + struct meta_cluster_item *item; + u64 current_cluster = cluster_bytenr, bytenr; + u64 item_bytenr; + u32 bufsize, nritems, i; + u32 max_size = MAX_PENDING_SIZE * 2; + u8 *buffer, *tmp = NULL; + int ret = 0; + + cluster = malloc(BLOCK_SIZE); + if (!cluster) { + fprintf(stderr, "Error allocating cluster\n"); + return -ENOMEM; + } + + buffer = malloc(max_size); + if (!buffer) { + fprintf(stderr, "Error allocing buffer\n"); + free(cluster); + return -ENOMEM; + } + + if (mdres->compress_method == COMPRESS_ZLIB) { + tmp = malloc(max_size); + if (!tmp) { + fprintf(stderr, "Error allocing tmp buffer\n"); + free(cluster); + free(buffer); + return -ENOMEM; + } + } + + bytenr = current_cluster; + while (1) { + if (fseek(mdres->in, current_cluster, SEEK_SET)) { + fprintf(stderr, "Error seeking: %d\n", errno); + ret = -EIO; + break; + } + + ret = fread(cluster, BLOCK_SIZE, 1, mdres->in); + if (ret == 0) { + if (cluster_bytenr != 0) { + cluster_bytenr = 0; + current_cluster = 0; + bytenr = 0; + continue; + } + printf("ok this is where we screwed up?\n"); + ret = -EIO; + break; + } else if (ret < 0) { + fprintf(stderr, "Error reading image\n"); + break; + } + ret = 0; + + header = &cluster->header; + if (le64_to_cpu(header->magic) != HEADER_MAGIC || + le64_to_cpu(header->bytenr) != current_cluster) { + fprintf(stderr, "bad header in metadump image\n"); + ret = -EIO; + break; + } + + bytenr += BLOCK_SIZE; + nritems = le32_to_cpu(header->nritems); + for (i = 0; i < nritems; i++) { + size_t size; + + item = &cluster->items[i]; + bufsize = le32_to_cpu(item->size); + item_bytenr = le64_to_cpu(item->bytenr); + + if (bufsize > max_size) { + fprintf(stderr, "item %u size %u too big\n", + i, bufsize); + ret = -EIO; + break; + } + + if (mdres->compress_method == COMPRESS_ZLIB) { + ret = fread(tmp, bufsize, 1, mdres->in); + if (ret != 1) { + fprintf(stderr, "Error reading: %d\n", + errno); + ret = -EIO; + break; + } + + size = max_size; + ret = uncompress(buffer, + (unsigned long *)&size, tmp, + bufsize); + if (ret != Z_OK) { + fprintf(stderr, "Error decompressing " + "%d\n", ret); + ret = -EIO; + break; + } + } else { + ret = fread(buffer, bufsize, 1, mdres->in); + if (ret != 1) { + fprintf(stderr, "Error reading: %d\n", + errno); + ret = -EIO; + break; + } + size = bufsize; + } + ret = 0; + + if (item_bytenr <= search && + item_bytenr + size > search) { + ret = read_chunk_block(mdres, buffer, search, + item_bytenr, size, + current_cluster); + if (!ret) + ret = 1; + break; + } + bytenr += bufsize; + } + if (ret) { + if (ret > 0) + ret = 0; + break; + } + if (bytenr & BLOCK_MASK) + bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK); + current_cluster = bytenr; + } + + free(tmp); + free(buffer); + free(cluster); + return ret; +} + +static int build_chunk_tree(struct mdrestore_struct *mdres, + struct meta_cluster *cluster) +{ + struct btrfs_super_block *super; + struct meta_cluster_header *header; + struct meta_cluster_item *item = NULL; + u64 chunk_root_bytenr = 0; + u32 i, nritems; + u64 bytenr = 0; + u8 *buffer; + int ret; + + /* We can't seek with stdin so don't bother doing this */ + if (mdres->in == stdin) + return 0; + + ret = fread(cluster, BLOCK_SIZE, 1, mdres->in); + if (ret <= 0) { + fprintf(stderr, "Error reading in cluster: %d\n", errno); + return -EIO; + } + ret = 0; + + header = &cluster->header; + if (le64_to_cpu(header->magic) != HEADER_MAGIC || + le64_to_cpu(header->bytenr) != 0) { + fprintf(stderr, "bad header in metadump image\n"); + return -EIO; + } + + bytenr += BLOCK_SIZE; + mdres->compress_method = header->compress; + nritems = le32_to_cpu(header->nritems); + for (i = 0; i < nritems; i++) { + item = &cluster->items[i]; + + if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET) + break; + bytenr += le32_to_cpu(item->size); + if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) { + fprintf(stderr, "Error seeking: %d\n", errno); + return -EIO; + } + } + + if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) { + fprintf(stderr, "Huh, didn't find the super?\n"); + return -EINVAL; + } + + buffer = malloc(le32_to_cpu(item->size)); + if (!buffer) { + fprintf(stderr, "Error allocing buffer\n"); + return -ENOMEM; + } + + ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in); + if (ret != 1) { + fprintf(stderr, "Error reading buffer: %d\n", errno); + free(buffer); + return -EIO; + } + + if (mdres->compress_method == COMPRESS_ZLIB) { + size_t size = MAX_PENDING_SIZE * 2; + u8 *tmp; + + tmp = malloc(MAX_PENDING_SIZE * 2); + if (!tmp) { + free(buffer); + return -ENOMEM; + } + ret = uncompress(tmp, (unsigned long *)&size, + buffer, le32_to_cpu(item->size)); + if (ret != Z_OK) { + fprintf(stderr, "Error decompressing %d\n", ret); + free(buffer); + free(tmp); + return -EIO; + } + free(buffer); + buffer = tmp; + } + + pthread_mutex_lock(&mdres->mutex); + super = (struct btrfs_super_block *)buffer; + chunk_root_bytenr = btrfs_super_chunk_root(super); + mdres->leafsize = btrfs_super_leafsize(super); + memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE); + memcpy(mdres->uuid, super->dev_item.uuid, + BTRFS_UUID_SIZE); + mdres->devid = le64_to_cpu(super->dev_item.devid); + free(buffer); + pthread_mutex_unlock(&mdres->mutex); + + return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0); +} + +static int range_contains_super(u64 physical, u64 bytes) +{ + u64 super_bytenr; + int i; + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + super_bytenr = btrfs_sb_offset(i); + if (super_bytenr >= physical && + super_bytenr < physical + bytes) + return 1; + } + + return 0; +} + +static void remap_overlapping_chunks(struct mdrestore_struct *mdres) +{ + struct fs_chunk *fs_chunk; + + while (!list_empty(&mdres->overlapping_chunks)) { + fs_chunk = list_first_entry(&mdres->overlapping_chunks, + struct fs_chunk, list); + list_del_init(&fs_chunk->list); + if (range_contains_super(fs_chunk->physical, + fs_chunk->bytes)) { + fprintf(stderr, "Remapping a chunk that had a super " + "mirror inside of it, clearing space cache " + "so we don't end up with corruption\n"); + mdres->clear_space_cache = 1; + } + fs_chunk->physical = mdres->last_physical_offset; + tree_insert(&mdres->physical_tree, &fs_chunk->p, physical_cmp); + mdres->last_physical_offset += fs_chunk->bytes; + } +} + +static int fixup_devices(struct btrfs_fs_info *fs_info, + struct mdrestore_struct *mdres, off_t dev_size) +{ + struct btrfs_trans_handle *trans; + struct btrfs_dev_item *dev_item; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_root *root = fs_info->chunk_root; + struct btrfs_key key; + u64 devid, cur_devid; + int ret; + + path = btrfs_alloc_path(); + if (!path) { + fprintf(stderr, "Error alloc'ing path\n"); + return -ENOMEM; + } + + trans = btrfs_start_transaction(fs_info->tree_root, 1); + if (IS_ERR(trans)) { + fprintf(stderr, "Error starting transaction %ld\n", + PTR_ERR(trans)); + btrfs_free_path(path); + return PTR_ERR(trans); + } + + dev_item = &fs_info->super_copy->dev_item; + + devid = btrfs_stack_device_id(dev_item); + + btrfs_set_stack_device_total_bytes(dev_item, dev_size); + btrfs_set_stack_device_bytes_used(dev_item, mdres->alloced_chunks); + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = 0; + +again: + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) { + fprintf(stderr, "search failed %d\n", ret); + exit(1); + } + + while (1) { + leaf = path->nodes[0]; + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) { + fprintf(stderr, "Error going to next leaf " + "%d\n", ret); + exit(1); + } + if (ret > 0) { + ret = 0; + break; + } + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.type > BTRFS_DEV_ITEM_KEY) + break; + if (key.type != BTRFS_DEV_ITEM_KEY) { + path->slots[0]++; + continue; + } + + dev_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_dev_item); + cur_devid = btrfs_device_id(leaf, dev_item); + if (devid != cur_devid) { + ret = btrfs_del_item(trans, root, path); + if (ret) { + fprintf(stderr, "Error deleting item %d\n", + ret); + exit(1); + } + btrfs_release_path(path); + goto again; + } + + btrfs_set_device_total_bytes(leaf, dev_item, dev_size); + btrfs_set_device_bytes_used(leaf, dev_item, + mdres->alloced_chunks); + btrfs_mark_buffer_dirty(leaf); + path->slots[0]++; + } + + btrfs_free_path(path); + ret = btrfs_commit_transaction(trans, fs_info->tree_root); + if (ret) { + fprintf(stderr, "Commit failed %d\n", ret); + return ret; + } + return 0; +} + +static int restore_metadump(const char *input, FILE *out, int old_restore, + int num_threads, int fixup_offset, + const char *target, int multi_devices) +{ + struct meta_cluster *cluster = NULL; + struct meta_cluster_header *header; + struct mdrestore_struct mdrestore; + struct btrfs_fs_info *info = NULL; + u64 bytenr = 0; + FILE *in = NULL; + int ret = 0; + + if (!strcmp(input, "-")) { + in = stdin; + } else { + in = fopen(input, "r"); + if (!in) { + perror("unable to open metadump image"); + return 1; + } + } + + /* NOTE: open with write mode */ + if (fixup_offset) { + BUG_ON(!target); + info = open_ctree_fs_info(target, 0, 0, + OPEN_CTREE_WRITES | + OPEN_CTREE_RESTORE | + OPEN_CTREE_PARTIAL); + if (!info) { + fprintf(stderr, "%s: open ctree failed\n", __func__); + ret = -EIO; + goto failed_open; + } + } + + cluster = malloc(BLOCK_SIZE); + if (!cluster) { + fprintf(stderr, "Error allocating cluster\n"); + ret = -ENOMEM; + goto failed_info; + } + + ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads, + fixup_offset, info, multi_devices); + if (ret) { + fprintf(stderr, "Error initing mdrestore %d\n", ret); + goto failed_cluster; + } + + if (!multi_devices && !old_restore) { + ret = build_chunk_tree(&mdrestore, cluster); + if (ret) + goto out; + if (!list_empty(&mdrestore.overlapping_chunks)) + remap_overlapping_chunks(&mdrestore); + } + + if (in != stdin && fseek(in, 0, SEEK_SET)) { + fprintf(stderr, "Error seeking %d\n", errno); + goto out; + } + + while (!mdrestore.error) { + ret = fread(cluster, BLOCK_SIZE, 1, in); + if (!ret) + break; + + header = &cluster->header; + if (le64_to_cpu(header->magic) != HEADER_MAGIC || + le64_to_cpu(header->bytenr) != bytenr) { + fprintf(stderr, "bad header in metadump image\n"); + ret = -EIO; + break; + } + ret = add_cluster(cluster, &mdrestore, &bytenr); + if (ret) { + fprintf(stderr, "Error adding cluster\n"); + break; + } + } + ret = wait_for_worker(&mdrestore); + + if (!ret && !multi_devices && !old_restore) { + struct btrfs_root *root; + struct stat st; + + root = open_ctree_fd(fileno(out), target, 0, + OPEN_CTREE_PARTIAL | + OPEN_CTREE_WRITES | + OPEN_CTREE_NO_DEVICES); + if (!root) { + fprintf(stderr, "unable to open %s\n", target); + ret = -EIO; + goto out; + } + info = root->fs_info; + + if (stat(target, &st)) { + fprintf(stderr, "statting %s failed\n", target); + close_ctree(info->chunk_root); + return 1; + } + + ret = fixup_devices(info, &mdrestore, st.st_size); + close_ctree(info->chunk_root); + if (ret) + goto out; + } +out: + mdrestore_destroy(&mdrestore, num_threads); +failed_cluster: + free(cluster); +failed_info: + if (fixup_offset && info) + close_ctree(info->chunk_root); +failed_open: + if (in != stdin) + fclose(in); + return ret; +} + +static int update_disk_super_on_device(struct btrfs_fs_info *info, + const char *other_dev, u64 cur_devid) +{ + struct btrfs_key key; + struct extent_buffer *leaf; + struct btrfs_path path; + struct btrfs_dev_item *dev_item; + struct btrfs_super_block *disk_super; + char dev_uuid[BTRFS_UUID_SIZE]; + char fs_uuid[BTRFS_UUID_SIZE]; + u64 devid, type, io_align, io_width; + u64 sector_size, total_bytes, bytes_used; + char buf[BTRFS_SUPER_INFO_SIZE]; + int fp = -1; + int ret; + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = cur_devid; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0); + if (ret) { + fprintf(stderr, "ERROR: search key failed\n"); + ret = -EIO; + goto out; + } + + leaf = path.nodes[0]; + dev_item = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_dev_item); + + devid = btrfs_device_id(leaf, dev_item); + if (devid != cur_devid) { + printk("ERROR: devid %llu mismatch with %llu\n", devid, cur_devid); + ret = -EIO; + goto out; + } + + type = btrfs_device_type(leaf, dev_item); + io_align = btrfs_device_io_align(leaf, dev_item); + io_width = btrfs_device_io_width(leaf, dev_item); + sector_size = btrfs_device_sector_size(leaf, dev_item); + total_bytes = btrfs_device_total_bytes(leaf, dev_item); + bytes_used = btrfs_device_bytes_used(leaf, dev_item); + read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); + read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE); + + btrfs_release_path(&path); + + printk("update disk super on %s devid=%llu\n", other_dev, devid); + + /* update other devices' super block */ + fp = open(other_dev, O_CREAT | O_RDWR, 0600); + if (fp < 0) { + fprintf(stderr, "ERROR: could not open %s\n", other_dev); + ret = -EIO; + goto out; + } + + memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE); + + disk_super = (struct btrfs_super_block *)buf; + dev_item = &disk_super->dev_item; + + btrfs_set_stack_device_type(dev_item, type); + btrfs_set_stack_device_id(dev_item, devid); + btrfs_set_stack_device_total_bytes(dev_item, total_bytes); + btrfs_set_stack_device_bytes_used(dev_item, bytes_used); + btrfs_set_stack_device_io_align(dev_item, io_align); + btrfs_set_stack_device_io_width(dev_item, io_width); + btrfs_set_stack_device_sector_size(dev_item, sector_size); + memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE); + memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE); + csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE); + + ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET); + if (ret != BTRFS_SUPER_INFO_SIZE) { + if (ret < 0) + fprintf(stderr, "ERROR: cannot write superblock: %s\n", strerror(ret)); + else + fprintf(stderr, "ERROR: cannot write superblock\n"); + ret = -EIO; + goto out; + } + + write_backup_supers(fp, (u8 *)buf); + +out: + if (fp != -1) + close(fp); + return ret; +} + +static void print_usage(int ret) +{ + fprintf(stderr, "usage: btrfs-image [options] source target\n"); + fprintf(stderr, "\t-r \trestore metadump image\n"); + fprintf(stderr, "\t-c value\tcompression level (0 ~ 9)\n"); + fprintf(stderr, "\t-t value\tnumber of threads (1 ~ 32)\n"); + fprintf(stderr, "\t-o \tdon't mess with the chunk tree when restoring\n"); + fprintf(stderr, "\t-s \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n"); + fprintf(stderr, "\t-w \twalk all trees instead of using extent tree, do this if your extent tree is broken\n"); + fprintf(stderr, "\t-m \trestore for multiple devices\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\tIn the dump mode, source is the btrfs device and target is the output file (use '-' for stdout).\n"); + fprintf(stderr, "\tIn the restore mode, source is the dumped image and target is the btrfs device/file.\n"); + exit(ret); +} + +int main(int argc, char *argv[]) +{ + char *source; + char *target; + u64 num_threads = 0; + u64 compress_level = 0; + int create = 1; + int old_restore = 0; + int walk_trees = 0; + int multi_devices = 0; + int ret; + int sanitize = 0; + int dev_cnt = 0; + int usage_error = 0; + FILE *out; + + while (1) { + static const struct option long_options[] = { + { "help", no_argument, NULL, GETOPT_VAL_HELP}, + { NULL, 0, NULL, 0 } + }; + int c = getopt_long(argc, argv, "rc:t:oswm", long_options, NULL); + if (c < 0) + break; + switch (c) { + case 'r': + create = 0; + break; + case 't': + num_threads = arg_strtou64(optarg); + if (num_threads > 32) + print_usage(1); + break; + case 'c': + compress_level = arg_strtou64(optarg); + if (compress_level > 9) + print_usage(1); + break; + case 'o': + old_restore = 1; + break; + case 's': + sanitize++; + break; + case 'w': + walk_trees = 1; + break; + case 'm': + create = 0; + multi_devices = 1; + break; + case GETOPT_VAL_HELP: + default: + print_usage(c != GETOPT_VAL_HELP); + } + } + + argc = argc - optind; + set_argv0(argv); + if (check_argc_min(argc, 2)) + print_usage(1); + + dev_cnt = argc - 1; + + if (create) { + if (old_restore) { + fprintf(stderr, "Usage error: create and restore cannot be used at the same time\n"); + usage_error++; + } + } else { + if (walk_trees || sanitize || compress_level) { + fprintf(stderr, "Usage error: use -w, -s, -c options for restore makes no sense\n"); + usage_error++; + } + if (multi_devices && dev_cnt < 2) { + fprintf(stderr, "Usage error: not enough devices specified for -m option\n"); + usage_error++; + } + if (!multi_devices && dev_cnt != 1) { + fprintf(stderr, "Usage error: accepts only 1 device without -m option\n"); + usage_error++; + } + } + + if (usage_error) + print_usage(1); + + source = argv[optind]; + target = argv[optind + 1]; + + if (create && !strcmp(target, "-")) { + out = stdout; + } else { + out = fopen(target, "w+"); + if (!out) { + perror("unable to create target file"); + exit(1); + } + } + + if (compress_level > 0 || create == 0) { + if (num_threads == 0) { + long tmp = sysconf(_SC_NPROCESSORS_ONLN); + + if (tmp <= 0) + tmp = 1; + num_threads = tmp; + } + } else { + num_threads = 0; + } + + if (create) { + ret = check_mounted(source); + if (ret < 0) { + fprintf(stderr, "Could not check mount status: %s\n", + strerror(-ret)); + exit(1); + } else if (ret) + fprintf(stderr, + "WARNING: The device is mounted. Make sure the filesystem is quiescent.\n"); + + ret = create_metadump(source, out, num_threads, + compress_level, sanitize, walk_trees); + } else { + ret = restore_metadump(source, out, old_restore, num_threads, + 0, target, multi_devices); + } + if (ret) { + printk("%s failed (%s)\n", (create) ? "create" : "restore", + strerror(errno)); + goto out; + } + + /* extended support for multiple devices */ + if (!create && multi_devices) { + struct btrfs_fs_info *info; + u64 total_devs; + int i; + + info = open_ctree_fs_info(target, 0, 0, + OPEN_CTREE_PARTIAL | + OPEN_CTREE_RESTORE); + if (!info) { + fprintf(stderr, "unable to open %s error = %s\n", + target, strerror(errno)); + return 1; + } + + total_devs = btrfs_super_num_devices(info->super_copy); + if (total_devs != dev_cnt) { + printk("it needs %llu devices but has only %d\n", + total_devs, dev_cnt); + close_ctree(info->chunk_root); + goto out; + } + + /* update super block on other disks */ + for (i = 2; i <= dev_cnt; i++) { + ret = update_disk_super_on_device(info, + argv[optind + i], (u64)i); + if (ret) { + printk("update disk super failed devid=%d (error=%d)\n", + i, ret); + close_ctree(info->chunk_root); + exit(1); + } + } + + close_ctree(info->chunk_root); + + /* fix metadata block to map correct chunk */ + ret = restore_metadump(source, out, 0, num_threads, 1, + target, 1); + if (ret) { + fprintf(stderr, "fix metadump failed (error=%d)\n", + ret); + exit(1); + } + } +out: + if (out == stdout) { + fflush(out); + } else { + fclose(out); + if (ret && create) { + int unlink_ret; + + unlink_ret = unlink(target); + if (unlink_ret) + fprintf(stderr, + "unlink output file failed : %s\n", + strerror(errno)); + } + } + + btrfs_close_all_devices(); + + return !!ret; +} diff --git a/btrfs-list.c b/btrfs-list.c new file mode 100644 index 00000000..2da54bf7 --- /dev/null +++ b/btrfs-list.c @@ -0,0 +1,1914 @@ +/* + * Copyright (C) 2010 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <dirent.h> +#include <libgen.h> +#include "ctree.h" +#include "transaction.h" +#include "utils.h" +#include "ioctl.h" +#include <uuid/uuid.h> +#include "btrfs-list.h" +#include "rbtree-utils.h" + +#define BTRFS_LIST_NFILTERS_INCREASE (2 * BTRFS_LIST_FILTER_MAX) +#define BTRFS_LIST_NCOMPS_INCREASE (2 * BTRFS_LIST_COMP_MAX) + +/* we store all the roots we find in an rbtree so that we can + * search for them later. + */ +struct root_lookup { + struct rb_root root; +}; + +static struct { + char *name; + char *column_name; + int need_print; +} btrfs_list_columns[] = { + { + .name = "ID", + .column_name = "ID", + .need_print = 0, + }, + { + .name = "gen", + .column_name = "Gen", + .need_print = 0, + }, + { + .name = "cgen", + .column_name = "CGen", + .need_print = 0, + }, + { + .name = "parent", + .column_name = "Parent", + .need_print = 0, + }, + { + .name = "top level", + .column_name = "Top Level", + .need_print = 0, + }, + { + .name = "otime", + .column_name = "OTime", + .need_print = 0, + }, + { + .name = "parent_uuid", + .column_name = "Parent UUID", + .need_print = 0, + }, + { + .name = "received_uuid", + .column_name = "Received UUID", + .need_print = 0, + }, + { + .name = "uuid", + .column_name = "UUID", + .need_print = 0, + }, + { + .name = "path", + .column_name = "Path", + .need_print = 0, + }, + { + .name = NULL, + .column_name = NULL, + .need_print = 0, + }, +}; + +static btrfs_list_filter_func all_filter_funcs[]; +static btrfs_list_comp_func all_comp_funcs[]; + +void btrfs_list_setup_print_column(enum btrfs_list_column_enum column) +{ + int i; + + BUG_ON(column < 0 || column > BTRFS_LIST_ALL); + + if (column < BTRFS_LIST_ALL) { + btrfs_list_columns[column].need_print = 1; + return; + } + + for (i = 0; i < BTRFS_LIST_ALL; i++) + btrfs_list_columns[i].need_print = 1; +} + +static void root_lookup_init(struct root_lookup *tree) +{ + tree->root.rb_node = NULL; +} + +static int comp_entry_with_rootid(struct root_info *entry1, + struct root_info *entry2, + int is_descending) +{ + int ret; + + if (entry1->root_id > entry2->root_id) + ret = 1; + else if (entry1->root_id < entry2->root_id) + ret = -1; + else + ret = 0; + + return is_descending ? -ret : ret; +} + +static int comp_entry_with_gen(struct root_info *entry1, + struct root_info *entry2, + int is_descending) +{ + int ret; + + if (entry1->gen > entry2->gen) + ret = 1; + else if (entry1->gen < entry2->gen) + ret = -1; + else + ret = 0; + + return is_descending ? -ret : ret; +} + +static int comp_entry_with_ogen(struct root_info *entry1, + struct root_info *entry2, + int is_descending) +{ + int ret; + + if (entry1->ogen > entry2->ogen) + ret = 1; + else if (entry1->ogen < entry2->ogen) + ret = -1; + else + ret = 0; + + return is_descending ? -ret : ret; +} + +static int comp_entry_with_path(struct root_info *entry1, + struct root_info *entry2, + int is_descending) +{ + int ret; + + if (strcmp(entry1->full_path, entry2->full_path) > 0) + ret = 1; + else if (strcmp(entry1->full_path, entry2->full_path) < 0) + ret = -1; + else + ret = 0; + + return is_descending ? -ret : ret; +} + +static btrfs_list_comp_func all_comp_funcs[] = { + [BTRFS_LIST_COMP_ROOTID] = comp_entry_with_rootid, + [BTRFS_LIST_COMP_OGEN] = comp_entry_with_ogen, + [BTRFS_LIST_COMP_GEN] = comp_entry_with_gen, + [BTRFS_LIST_COMP_PATH] = comp_entry_with_path, +}; + +static char *all_sort_items[] = { + [BTRFS_LIST_COMP_ROOTID] = "rootid", + [BTRFS_LIST_COMP_OGEN] = "ogen", + [BTRFS_LIST_COMP_GEN] = "gen", + [BTRFS_LIST_COMP_PATH] = "path", + [BTRFS_LIST_COMP_MAX] = NULL, +}; + +static int btrfs_list_get_sort_item(char *sort_name) +{ + int i; + + for (i = 0; i < BTRFS_LIST_COMP_MAX; i++) { + if (strcmp(sort_name, all_sort_items[i]) == 0) + return i; + } + return -1; +} + +struct btrfs_list_comparer_set *btrfs_list_alloc_comparer_set(void) +{ + struct btrfs_list_comparer_set *set; + int size; + + size = sizeof(struct btrfs_list_comparer_set) + + BTRFS_LIST_NCOMPS_INCREASE * sizeof(struct btrfs_list_comparer); + set = calloc(1, size); + if (!set) { + fprintf(stderr, "memory allocation failed\n"); + exit(1); + } + + set->total = BTRFS_LIST_NCOMPS_INCREASE; + + return set; +} + +void btrfs_list_free_comparer_set(struct btrfs_list_comparer_set *comp_set) +{ + free(comp_set); +} + +static int btrfs_list_setup_comparer(struct btrfs_list_comparer_set **comp_set, + enum btrfs_list_comp_enum comparer, int is_descending) +{ + struct btrfs_list_comparer_set *set = *comp_set; + int size; + + BUG_ON(!set); + BUG_ON(comparer >= BTRFS_LIST_COMP_MAX); + BUG_ON(set->ncomps > set->total); + + if (set->ncomps == set->total) { + void *tmp; + + size = set->total + BTRFS_LIST_NCOMPS_INCREASE; + size = sizeof(*set) + size * sizeof(struct btrfs_list_comparer); + tmp = set; + set = realloc(set, size); + if (!set) { + fprintf(stderr, "memory allocation failed\n"); + free(tmp); + exit(1); + } + + memset(&set->comps[set->total], 0, + BTRFS_LIST_NCOMPS_INCREASE * + sizeof(struct btrfs_list_comparer)); + set->total += BTRFS_LIST_NCOMPS_INCREASE; + *comp_set = set; + } + + BUG_ON(set->comps[set->ncomps].comp_func); + + set->comps[set->ncomps].comp_func = all_comp_funcs[comparer]; + set->comps[set->ncomps].is_descending = is_descending; + set->ncomps++; + return 0; +} + +static int sort_comp(struct root_info *entry1, struct root_info *entry2, + struct btrfs_list_comparer_set *set) +{ + int rootid_compared = 0; + int i, ret = 0; + + if (!set || !set->ncomps) + goto comp_rootid; + + for (i = 0; i < set->ncomps; i++) { + if (!set->comps[i].comp_func) + break; + + ret = set->comps[i].comp_func(entry1, entry2, + set->comps[i].is_descending); + if (ret) + return ret; + + if (set->comps[i].comp_func == comp_entry_with_rootid) + rootid_compared = 1; + } + + if (!rootid_compared) { +comp_rootid: + ret = comp_entry_with_rootid(entry1, entry2, 0); + } + + return ret; +} + +static int sort_tree_insert(struct root_lookup *sort_tree, + struct root_info *ins, + struct btrfs_list_comparer_set *comp_set) +{ + struct rb_node **p = &sort_tree->root.rb_node; + struct rb_node *parent = NULL; + struct root_info *curr; + int ret; + + while (*p) { + parent = *p; + curr = rb_entry(parent, struct root_info, sort_node); + + ret = sort_comp(ins, curr, comp_set); + if (ret < 0) + p = &(*p)->rb_left; + else if (ret > 0) + p = &(*p)->rb_right; + else + return -EEXIST; + } + + rb_link_node(&ins->sort_node, parent, p); + rb_insert_color(&ins->sort_node, &sort_tree->root); + return 0; +} + +/* + * insert a new root into the tree. returns the existing root entry + * if one is already there. Both root_id and ref_tree are used + * as the key + */ +static int root_tree_insert(struct root_lookup *root_tree, + struct root_info *ins) +{ + struct rb_node **p = &root_tree->root.rb_node; + struct rb_node * parent = NULL; + struct root_info *curr; + int ret; + + while(*p) { + parent = *p; + curr = rb_entry(parent, struct root_info, rb_node); + + ret = comp_entry_with_rootid(ins, curr, 0); + if (ret < 0) + p = &(*p)->rb_left; + else if (ret > 0) + p = &(*p)->rb_right; + else + return -EEXIST; + } + + rb_link_node(&ins->rb_node, parent, p); + rb_insert_color(&ins->rb_node, &root_tree->root); + return 0; +} + +/* + * find a given root id in the tree. We return the smallest one, + * rb_next can be used to move forward looking for more if required + */ +static struct root_info *root_tree_search(struct root_lookup *root_tree, + u64 root_id) +{ + struct rb_node *n = root_tree->root.rb_node; + struct root_info *entry; + struct root_info tmp; + int ret; + + tmp.root_id = root_id; + + while(n) { + entry = rb_entry(n, struct root_info, rb_node); + + ret = comp_entry_with_rootid(&tmp, entry, 0); + if (ret < 0) + n = n->rb_left; + else if (ret > 0) + n = n->rb_right; + else + return entry; + } + return NULL; +} + +static int update_root(struct root_lookup *root_lookup, + u64 root_id, u64 ref_tree, u64 root_offset, u64 flags, + u64 dir_id, char *name, int name_len, u64 ogen, u64 gen, + time_t ot, void *uuid, void *puuid, void *ruuid) +{ + struct root_info *ri; + + ri = root_tree_search(root_lookup, root_id); + if (!ri || ri->root_id != root_id) + return -ENOENT; + if (name && name_len > 0) { + free(ri->name); + + ri->name = malloc(name_len + 1); + if (!ri->name) { + fprintf(stderr, "memory allocation failed\n"); + exit(1); + } + strncpy(ri->name, name, name_len); + ri->name[name_len] = 0; + } + if (ref_tree) + ri->ref_tree = ref_tree; + if (root_offset) + ri->root_offset = root_offset; + if (flags) + ri->flags = flags; + if (dir_id) + ri->dir_id = dir_id; + if (gen) + ri->gen = gen; + if (ogen) + ri->ogen = ogen; + if (!ri->ogen && root_offset) + ri->ogen = root_offset; + if (ot) + ri->otime = ot; + if (uuid) + memcpy(&ri->uuid, uuid, BTRFS_UUID_SIZE); + if (puuid) + memcpy(&ri->puuid, puuid, BTRFS_UUID_SIZE); + if (ruuid) + memcpy(&ri->ruuid, ruuid, BTRFS_UUID_SIZE); + + return 0; +} + +/* + * add_root - update the existed root, or allocate a new root and insert it + * into the lookup tree. + * root_id: object id of the root + * ref_tree: object id of the referring root. + * root_offset: offset value of the root'key + * dir_id: inode id of the directory in ref_tree where this root can be found. + * name: the name of root_id in that directory + * name_len: the length of name + * ogen: the original generation of the root + * gen: the current generation of the root + * ot: the original time(create time) of the root + * uuid: uuid of the root + * puuid: uuid of the root parent if any + * ruuid: uuid of the received subvol, if any + */ +static int add_root(struct root_lookup *root_lookup, + u64 root_id, u64 ref_tree, u64 root_offset, u64 flags, + u64 dir_id, char *name, int name_len, u64 ogen, u64 gen, + time_t ot, void *uuid, void *puuid, void *ruuid) +{ + struct root_info *ri; + int ret; + + ret = update_root(root_lookup, root_id, ref_tree, root_offset, flags, + dir_id, name, name_len, ogen, gen, ot, + uuid, puuid, ruuid); + if (!ret) + return 0; + + ri = calloc(1, sizeof(*ri)); + if (!ri) { + printf("memory allocation failed\n"); + exit(1); + } + ri->root_id = root_id; + + if (name && name_len > 0) { + ri->name = malloc(name_len + 1); + if (!ri->name) { + fprintf(stderr, "memory allocation failed\n"); + exit(1); + } + strncpy(ri->name, name, name_len); + ri->name[name_len] = 0; + } + if (ref_tree) + ri->ref_tree = ref_tree; + if (dir_id) + ri->dir_id = dir_id; + if (root_offset) + ri->root_offset = root_offset; + if (flags) + ri->flags = flags; + if (gen) + ri->gen = gen; + if (ogen) + ri->ogen = ogen; + if (!ri->ogen && root_offset) + ri->ogen = root_offset; + if (ot) + ri->otime = ot; + + if (uuid) + memcpy(&ri->uuid, uuid, BTRFS_UUID_SIZE); + + if (puuid) + memcpy(&ri->puuid, puuid, BTRFS_UUID_SIZE); + + if (ruuid) + memcpy(&ri->ruuid, ruuid, BTRFS_UUID_SIZE); + + ret = root_tree_insert(root_lookup, ri); + if (ret) { + printf("failed to insert tree %llu\n", (unsigned long long)root_id); + exit(1); + } + return 0; +} + +static void __free_root_info(struct rb_node *node) +{ + struct root_info *ri; + + ri = rb_entry(node, struct root_info, rb_node); + free(ri->name); + free(ri->path); + free(ri->full_path); + free(ri); +} + +static inline void __free_all_subvolumn(struct root_lookup *root_tree) +{ + rb_free_nodes(&root_tree->root, __free_root_info); +} + +/* + * for a given root_info, search through the root_lookup tree to construct + * the full path name to it. + * + * This can't be called until all the root_info->path fields are filled + * in by lookup_ino_path + */ +static int resolve_root(struct root_lookup *rl, struct root_info *ri, + u64 top_id) +{ + char *full_path = NULL; + int len = 0; + struct root_info *found; + + /* + * we go backwards from the root_info object and add pathnames + * from parent directories as we go. + */ + found = ri; + while (1) { + char *tmp; + u64 next; + int add_len; + + /* + * ref_tree = 0 indicates the subvolumes + * has been deleted. + */ + if (!found->ref_tree) { + free(full_path); + return -ENOENT; + } + + add_len = strlen(found->path); + + if (full_path) { + /* room for / and for null */ + tmp = malloc(add_len + 2 + len); + if (!tmp) { + perror("malloc failed"); + exit(1); + } + memcpy(tmp + add_len + 1, full_path, len); + tmp[add_len] = '/'; + memcpy(tmp, found->path, add_len); + tmp [add_len + len + 1] = '\0'; + free(full_path); + full_path = tmp; + len += add_len + 1; + } else { + full_path = strdup(found->path); + len = add_len; + } + if (!ri->top_id) + ri->top_id = found->ref_tree; + + next = found->ref_tree; + if (next == top_id) + break; + /* + * if the ref_tree = BTRFS_FS_TREE_OBJECTID, + * we are at the top + */ + if (next == BTRFS_FS_TREE_OBJECTID) + break; + /* + * if the ref_tree wasn't in our tree of roots, the + * subvolume was deleted. + */ + found = root_tree_search(rl, next); + if (!found) { + free(full_path); + return -ENOENT; + } + } + + ri->full_path = full_path; + + return 0; +} + +/* + * for a single root_info, ask the kernel to give us a path name + * inside it's ref_root for the dir_id where it lives. + * + * This fills in root_info->path with the path to the directory and and + * appends this root's name. + */ +static int lookup_ino_path(int fd, struct root_info *ri) +{ + struct btrfs_ioctl_ino_lookup_args args; + int ret; + + if (ri->path) + return 0; + + if (!ri->ref_tree) + return -ENOENT; + + memset(&args, 0, sizeof(args)); + args.treeid = ri->ref_tree; + args.objectid = ri->dir_id; + + ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args); + if (ret < 0) { + if (errno == ENOENT) { + ri->ref_tree = 0; + return -ENOENT; + } + fprintf(stderr, "ERROR: Failed to lookup path for root %llu - %s\n", + (unsigned long long)ri->ref_tree, + strerror(errno)); + return ret; + } + + if (args.name[0]) { + /* + * we're in a subdirectory of ref_tree, the kernel ioctl + * puts a / in there for us + */ + ri->path = malloc(strlen(ri->name) + strlen(args.name) + 1); + if (!ri->path) { + perror("malloc failed"); + exit(1); + } + strcpy(ri->path, args.name); + strcat(ri->path, ri->name); + } else { + /* we're at the root of ref_tree */ + ri->path = strdup(ri->name); + if (!ri->path) { + perror("strdup failed"); + exit(1); + } + } + return 0; +} + +/* finding the generation for a given path is a two step process. + * First we use the inode loookup routine to find out the root id + * + * Then we use the tree search ioctl to scan all the root items for a + * given root id and spit out the latest generation we can find + */ +static u64 find_root_gen(int fd) +{ + struct btrfs_ioctl_ino_lookup_args ino_args; + int ret; + struct btrfs_ioctl_search_args args; + struct btrfs_ioctl_search_key *sk = &args.key; + struct btrfs_ioctl_search_header sh; + unsigned long off = 0; + u64 max_found = 0; + int i; + + memset(&ino_args, 0, sizeof(ino_args)); + ino_args.objectid = BTRFS_FIRST_FREE_OBJECTID; + + /* this ioctl fills in ino_args->treeid */ + ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args); + if (ret < 0) { + fprintf(stderr, "ERROR: Failed to lookup path for dirid %llu - %s\n", + (unsigned long long)BTRFS_FIRST_FREE_OBJECTID, + strerror(errno)); + return 0; + } + + memset(&args, 0, sizeof(args)); + + sk->tree_id = 1; + + /* + * there may be more than one ROOT_ITEM key if there are + * snapshots pending deletion, we have to loop through + * them. + */ + sk->min_objectid = ino_args.treeid; + sk->max_objectid = ino_args.treeid; + sk->max_type = BTRFS_ROOT_ITEM_KEY; + sk->min_type = BTRFS_ROOT_ITEM_KEY; + sk->max_offset = (u64)-1; + sk->max_transid = (u64)-1; + sk->nr_items = 4096; + + while (1) { + ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args); + if (ret < 0) { + fprintf(stderr, "ERROR: can't perform the search - %s\n", + strerror(errno)); + return 0; + } + /* the ioctl returns the number of item it found in nr_items */ + if (sk->nr_items == 0) + break; + + off = 0; + for (i = 0; i < sk->nr_items; i++) { + struct btrfs_root_item *item; + + memcpy(&sh, args.buf + off, sizeof(sh)); + off += sizeof(sh); + item = (struct btrfs_root_item *)(args.buf + off); + off += sh.len; + + sk->min_objectid = sh.objectid; + sk->min_type = sh.type; + sk->min_offset = sh.offset; + + if (sh.objectid > ino_args.treeid) + break; + + if (sh.objectid == ino_args.treeid && + sh.type == BTRFS_ROOT_ITEM_KEY) { + max_found = max(max_found, + btrfs_root_generation(item)); + } + } + if (sk->min_offset < (u64)-1) + sk->min_offset++; + else + break; + + if (sk->min_type != BTRFS_ROOT_ITEM_KEY) + break; + if (sk->min_objectid != ino_args.treeid) + break; + } + return max_found; +} + +/* pass in a directory id and this will return + * the full path of the parent directory inside its + * subvolume root. + * + * It may return NULL if it is in the root, or an ERR_PTR if things + * go badly. + */ +static char *__ino_resolve(int fd, u64 dirid) +{ + struct btrfs_ioctl_ino_lookup_args args; + int ret; + char *full; + + memset(&args, 0, sizeof(args)); + args.objectid = dirid; + + ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args); + if (ret < 0) { + fprintf(stderr, "ERROR: Failed to lookup path for dirid %llu - %s\n", + (unsigned long long)dirid, strerror(errno)); + return ERR_PTR(ret); + } + + if (args.name[0]) { + /* + * we're in a subdirectory of ref_tree, the kernel ioctl + * puts a / in there for us + */ + full = strdup(args.name); + if (!full) { + perror("malloc failed"); + return ERR_PTR(-ENOMEM); + } + } else { + /* we're at the root of ref_tree */ + full = NULL; + } + return full; +} + +/* + * simple string builder, returning a new string with both + * dirid and name + */ +static char *build_name(char *dirid, char *name) +{ + char *full; + if (!dirid) + return strdup(name); + + full = malloc(strlen(dirid) + strlen(name) + 1); + if (!full) + return NULL; + strcpy(full, dirid); + strcat(full, name); + return full; +} + +/* + * given an inode number, this returns the full path name inside the subvolume + * to that file/directory. cache_dirid and cache_name are used to + * cache the results so we can avoid tree searches if a later call goes + * to the same directory or file name + */ +static char *ino_resolve(int fd, u64 ino, u64 *cache_dirid, char **cache_name) + +{ + u64 dirid; + char *dirname; + char *name; + char *full; + int ret; + struct btrfs_ioctl_search_args args; + struct btrfs_ioctl_search_key *sk = &args.key; + struct btrfs_ioctl_search_header *sh; + unsigned long off = 0; + int namelen; + + memset(&args, 0, sizeof(args)); + + sk->tree_id = 0; + + /* + * step one, we search for the inode back ref. We just use the first + * one + */ + sk->min_objectid = ino; + sk->max_objectid = ino; + sk->max_type = BTRFS_INODE_REF_KEY; + sk->max_offset = (u64)-1; + sk->min_type = BTRFS_INODE_REF_KEY; + sk->max_transid = (u64)-1; + sk->nr_items = 1; + + ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args); + if (ret < 0) { + fprintf(stderr, "ERROR: can't perform the search - %s\n", + strerror(errno)); + return NULL; + } + /* the ioctl returns the number of item it found in nr_items */ + if (sk->nr_items == 0) + return NULL; + + off = 0; + sh = (struct btrfs_ioctl_search_header *)(args.buf + off); + + if (sh->type == BTRFS_INODE_REF_KEY) { + struct btrfs_inode_ref *ref; + dirid = sh->offset; + + ref = (struct btrfs_inode_ref *)(sh + 1); + namelen = btrfs_stack_inode_ref_name_len(ref); + + name = (char *)(ref + 1); + name = strndup(name, namelen); + + /* use our cached value */ + if (dirid == *cache_dirid && *cache_name) { + dirname = *cache_name; + goto build; + } + } else { + return NULL; + } + /* + * the inode backref gives us the file name and the parent directory id. + * From here we use __ino_resolve to get the path to the parent + */ + dirname = __ino_resolve(fd, dirid); +build: + full = build_name(dirname, name); + if (*cache_name && dirname != *cache_name) + free(*cache_name); + + *cache_name = dirname; + *cache_dirid = dirid; + free(name); + + return full; +} + +int btrfs_list_get_default_subvolume(int fd, u64 *default_id) +{ + struct btrfs_ioctl_search_args args; + struct btrfs_ioctl_search_key *sk = &args.key; + struct btrfs_ioctl_search_header *sh; + u64 found = 0; + int ret; + + memset(&args, 0, sizeof(args)); + + /* + * search for a dir item with a name 'default' in the tree of + * tree roots, it should point us to a default root + */ + sk->tree_id = 1; + + /* don't worry about ancient format and request only one item */ + sk->nr_items = 1; + + sk->max_objectid = BTRFS_ROOT_TREE_DIR_OBJECTID; + sk->min_objectid = BTRFS_ROOT_TREE_DIR_OBJECTID; + sk->max_type = BTRFS_DIR_ITEM_KEY; + sk->min_type = BTRFS_DIR_ITEM_KEY; + sk->max_offset = (u64)-1; + sk->max_transid = (u64)-1; + + ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args); + if (ret < 0) + return ret; + + /* the ioctl returns the number of items it found in nr_items */ + if (sk->nr_items == 0) + goto out; + + sh = (struct btrfs_ioctl_search_header *)args.buf; + + if (sh->type == BTRFS_DIR_ITEM_KEY) { + struct btrfs_dir_item *di; + int name_len; + char *name; + + di = (struct btrfs_dir_item *)(sh + 1); + name_len = btrfs_stack_dir_name_len(di); + name = (char *)(di + 1); + + if (!strncmp("default", name, name_len)) + found = btrfs_disk_key_objectid(&di->location); + } + +out: + *default_id = found; + return 0; +} + +static int __list_subvol_search(int fd, struct root_lookup *root_lookup) +{ + int ret; + struct btrfs_ioctl_search_args args; + struct btrfs_ioctl_search_key *sk = &args.key; + struct btrfs_ioctl_search_header sh; + struct btrfs_root_ref *ref; + struct btrfs_root_item *ri; + unsigned long off = 0; + int name_len; + char *name; + u64 dir_id; + u64 gen = 0; + u64 ogen; + u64 flags; + int i; + time_t t; + u8 uuid[BTRFS_UUID_SIZE]; + u8 puuid[BTRFS_UUID_SIZE]; + u8 ruuid[BTRFS_UUID_SIZE]; + + root_lookup_init(root_lookup); + memset(&args, 0, sizeof(args)); + + /* search in the tree of tree roots */ + sk->tree_id = 1; + + /* + * set the min and max to backref keys. The search will + * only send back this type of key now. + */ + sk->max_type = BTRFS_ROOT_BACKREF_KEY; + sk->min_type = BTRFS_ROOT_ITEM_KEY; + + sk->min_objectid = BTRFS_FIRST_FREE_OBJECTID; + + /* + * set all the other params to the max, we'll take any objectid + * and any trans + */ + sk->max_objectid = BTRFS_LAST_FREE_OBJECTID; + sk->max_offset = (u64)-1; + sk->max_transid = (u64)-1; + + /* just a big number, doesn't matter much */ + sk->nr_items = 4096; + + while(1) { + ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args); + if (ret < 0) + return ret; + /* the ioctl returns the number of item it found in nr_items */ + if (sk->nr_items == 0) + break; + + off = 0; + + /* + * for each item, pull the key out of the header and then + * read the root_ref item it contains + */ + for (i = 0; i < sk->nr_items; i++) { + memcpy(&sh, args.buf + off, sizeof(sh)); + off += sizeof(sh); + if (sh.type == BTRFS_ROOT_BACKREF_KEY) { + ref = (struct btrfs_root_ref *)(args.buf + off); + name_len = btrfs_stack_root_ref_name_len(ref); + name = (char *)(ref + 1); + dir_id = btrfs_stack_root_ref_dirid(ref); + + add_root(root_lookup, sh.objectid, sh.offset, + 0, 0, dir_id, name, name_len, 0, 0, 0, + NULL, NULL, NULL); + } else if (sh.type == BTRFS_ROOT_ITEM_KEY) { + ri = (struct btrfs_root_item *)(args.buf + off); + gen = btrfs_root_generation(ri); + flags = btrfs_root_flags(ri); + if(sh.len > + sizeof(struct btrfs_root_item_v0)) { + t = btrfs_stack_timespec_sec(&ri->otime); + ogen = btrfs_root_otransid(ri); + memcpy(uuid, ri->uuid, BTRFS_UUID_SIZE); + memcpy(puuid, ri->parent_uuid, BTRFS_UUID_SIZE); + memcpy(ruuid, ri->received_uuid, BTRFS_UUID_SIZE); + } else { + t = 0; + ogen = 0; + memset(uuid, 0, BTRFS_UUID_SIZE); + memset(puuid, 0, BTRFS_UUID_SIZE); + memset(ruuid, 0, BTRFS_UUID_SIZE); + } + + add_root(root_lookup, sh.objectid, 0, + sh.offset, flags, 0, NULL, 0, ogen, + gen, t, uuid, puuid, ruuid); + } + + off += sh.len; + + /* + * record the mins in sk so we can make sure the + * next search doesn't repeat this root + */ + sk->min_objectid = sh.objectid; + sk->min_type = sh.type; + sk->min_offset = sh.offset; + } + sk->nr_items = 4096; + sk->min_offset++; + if (!sk->min_offset) /* overflow */ + sk->min_type++; + else + continue; + + if (sk->min_type > BTRFS_ROOT_BACKREF_KEY) { + sk->min_type = BTRFS_ROOT_ITEM_KEY; + sk->min_objectid++; + } else + continue; + + if (sk->min_objectid > sk->max_objectid) + break; + } + + return 0; +} + +static int filter_by_rootid(struct root_info *ri, u64 data) +{ + return ri->root_id == data; +} + +static int filter_snapshot(struct root_info *ri, u64 data) +{ + return !!ri->root_offset; +} + +static int filter_flags(struct root_info *ri, u64 flags) +{ + return ri->flags & flags; +} + +static int filter_gen_more(struct root_info *ri, u64 data) +{ + return ri->gen >= data; +} + +static int filter_gen_less(struct root_info *ri, u64 data) +{ + return ri->gen <= data; +} + +static int filter_gen_equal(struct root_info *ri, u64 data) +{ + return ri->gen == data; +} + +static int filter_cgen_more(struct root_info *ri, u64 data) +{ + return ri->ogen >= data; +} + +static int filter_cgen_less(struct root_info *ri, u64 data) +{ + return ri->ogen <= data; +} + +static int filter_cgen_equal(struct root_info *ri, u64 data) +{ + return ri->ogen == data; +} + +static int filter_topid_equal(struct root_info *ri, u64 data) +{ + return ri->top_id == data; +} + +static int filter_full_path(struct root_info *ri, u64 data) +{ + if (ri->full_path && ri->top_id != data) { + char *tmp; + char p[] = "<FS_TREE>"; + int add_len = strlen(p); + int len = strlen(ri->full_path); + + tmp = malloc(len + add_len + 2); + if (!tmp) { + fprintf(stderr, "memory allocation failed\n"); + exit(1); + } + memcpy(tmp + add_len + 1, ri->full_path, len); + tmp[len + add_len + 1] = '\0'; + tmp[add_len] = '/'; + memcpy(tmp, p, add_len); + free(ri->full_path); + ri->full_path = tmp; + } + return 1; +} + +static int filter_by_parent(struct root_info *ri, u64 data) +{ + return !uuid_compare(ri->puuid, (u8 *)(unsigned long)data); +} + +static int filter_deleted(struct root_info *ri, u64 data) +{ + return ri->deleted; +} + +static btrfs_list_filter_func all_filter_funcs[] = { + [BTRFS_LIST_FILTER_ROOTID] = filter_by_rootid, + [BTRFS_LIST_FILTER_SNAPSHOT_ONLY] = filter_snapshot, + [BTRFS_LIST_FILTER_FLAGS] = filter_flags, + [BTRFS_LIST_FILTER_GEN_MORE] = filter_gen_more, + [BTRFS_LIST_FILTER_GEN_LESS] = filter_gen_less, + [BTRFS_LIST_FILTER_GEN_EQUAL] = filter_gen_equal, + [BTRFS_LIST_FILTER_CGEN_MORE] = filter_cgen_more, + [BTRFS_LIST_FILTER_CGEN_LESS] = filter_cgen_less, + [BTRFS_LIST_FILTER_CGEN_EQUAL] = filter_cgen_equal, + [BTRFS_LIST_FILTER_TOPID_EQUAL] = filter_topid_equal, + [BTRFS_LIST_FILTER_FULL_PATH] = filter_full_path, + [BTRFS_LIST_FILTER_BY_PARENT] = filter_by_parent, + [BTRFS_LIST_FILTER_DELETED] = filter_deleted, +}; + +struct btrfs_list_filter_set *btrfs_list_alloc_filter_set(void) +{ + struct btrfs_list_filter_set *set; + int size; + + size = sizeof(struct btrfs_list_filter_set) + + BTRFS_LIST_NFILTERS_INCREASE * sizeof(struct btrfs_list_filter); + set = calloc(1, size); + if (!set) { + fprintf(stderr, "memory allocation failed\n"); + exit(1); + } + + set->total = BTRFS_LIST_NFILTERS_INCREASE; + + return set; +} + +void btrfs_list_free_filter_set(struct btrfs_list_filter_set *filter_set) +{ + free(filter_set); +} + +int btrfs_list_setup_filter(struct btrfs_list_filter_set **filter_set, + enum btrfs_list_filter_enum filter, u64 data) +{ + struct btrfs_list_filter_set *set = *filter_set; + int size; + + BUG_ON(!set); + BUG_ON(filter >= BTRFS_LIST_FILTER_MAX); + BUG_ON(set->nfilters > set->total); + + if (set->nfilters == set->total) { + void *tmp; + + size = set->total + BTRFS_LIST_NFILTERS_INCREASE; + size = sizeof(*set) + size * sizeof(struct btrfs_list_filter); + tmp = set; + set = realloc(set, size); + if (!set) { + fprintf(stderr, "memory allocation failed\n"); + free(tmp); + exit(1); + } + + memset(&set->filters[set->total], 0, + BTRFS_LIST_NFILTERS_INCREASE * + sizeof(struct btrfs_list_filter)); + set->total += BTRFS_LIST_NFILTERS_INCREASE; + *filter_set = set; + } + + BUG_ON(set->filters[set->nfilters].filter_func); + + if (filter == BTRFS_LIST_FILTER_DELETED) + set->only_deleted = 1; + + set->filters[set->nfilters].filter_func = all_filter_funcs[filter]; + set->filters[set->nfilters].data = data; + set->nfilters++; + return 0; +} + +static int filter_root(struct root_info *ri, + struct btrfs_list_filter_set *set) +{ + int i, ret; + + if (!set) + return 1; + + if (set->only_deleted && !ri->deleted) + return 0; + + if (!set->only_deleted && ri->deleted) + return 0; + + for (i = 0; i < set->nfilters; i++) { + if (!set->filters[i].filter_func) + break; + ret = set->filters[i].filter_func(ri, set->filters[i].data); + if (!ret) + return 0; + } + return 1; +} + +static void __filter_and_sort_subvol(struct root_lookup *all_subvols, + struct root_lookup *sort_tree, + struct btrfs_list_filter_set *filter_set, + struct btrfs_list_comparer_set *comp_set, + u64 top_id) +{ + struct rb_node *n; + struct root_info *entry; + int ret; + + root_lookup_init(sort_tree); + + n = rb_last(&all_subvols->root); + while (n) { + entry = rb_entry(n, struct root_info, rb_node); + + ret = resolve_root(all_subvols, entry, top_id); + if (ret == -ENOENT) { + entry->full_path = strdup("DELETED"); + entry->deleted = 1; + } + ret = filter_root(entry, filter_set); + if (ret) + sort_tree_insert(sort_tree, entry, comp_set); + n = rb_prev(n); + } +} + +static int __list_subvol_fill_paths(int fd, struct root_lookup *root_lookup) +{ + struct rb_node *n; + + n = rb_first(&root_lookup->root); + while (n) { + struct root_info *entry; + int ret; + entry = rb_entry(n, struct root_info, rb_node); + ret = lookup_ino_path(fd, entry); + if (ret && ret != -ENOENT) + return ret; + n = rb_next(n); + } + + return 0; +} + +static void print_subvolume_column(struct root_info *subv, + enum btrfs_list_column_enum column) +{ + char tstr[256]; + char uuidparse[BTRFS_UUID_UNPARSED_SIZE]; + + BUG_ON(column >= BTRFS_LIST_ALL || column < 0); + + switch (column) { + case BTRFS_LIST_OBJECTID: + printf("%llu", subv->root_id); + break; + case BTRFS_LIST_GENERATION: + printf("%llu", subv->gen); + break; + case BTRFS_LIST_OGENERATION: + printf("%llu", subv->ogen); + break; + case BTRFS_LIST_PARENT: + printf("%llu", subv->ref_tree); + break; + case BTRFS_LIST_TOP_LEVEL: + printf("%llu", subv->top_id); + break; + case BTRFS_LIST_OTIME: + if (subv->otime) { + struct tm tm; + + localtime_r(&subv->otime, &tm); + strftime(tstr, 256, "%Y-%m-%d %X", &tm); + } else + strcpy(tstr, "-"); + printf("%s", tstr); + break; + case BTRFS_LIST_UUID: + if (uuid_is_null(subv->uuid)) + strcpy(uuidparse, "-"); + else + uuid_unparse(subv->uuid, uuidparse); + printf("%s", uuidparse); + break; + case BTRFS_LIST_PUUID: + if (uuid_is_null(subv->puuid)) + strcpy(uuidparse, "-"); + else + uuid_unparse(subv->puuid, uuidparse); + printf("%s", uuidparse); + break; + case BTRFS_LIST_RUUID: + if (uuid_is_null(subv->ruuid)) + strcpy(uuidparse, "-"); + else + uuid_unparse(subv->ruuid, uuidparse); + printf("%s", uuidparse); + break; + case BTRFS_LIST_PATH: + BUG_ON(!subv->full_path); + printf("%s", subv->full_path); + break; + default: + break; + } +} + +static void print_single_volume_info_raw(struct root_info *subv, char *raw_prefix) +{ + int i; + + for (i = 0; i < BTRFS_LIST_ALL; i++) { + if (!btrfs_list_columns[i].need_print) + continue; + + if (raw_prefix) + printf("%s",raw_prefix); + + print_subvolume_column(subv, i); + } + printf("\n"); +} + +static void print_single_volume_info_table(struct root_info *subv) +{ + int i; + + for (i = 0; i < BTRFS_LIST_ALL; i++) { + if (!btrfs_list_columns[i].need_print) + continue; + + print_subvolume_column(subv, i); + + if (i != BTRFS_LIST_PATH) + printf("\t"); + + if (i == BTRFS_LIST_TOP_LEVEL) + printf("\t"); + } + printf("\n"); +} + +static void print_single_volume_info_default(struct root_info *subv) +{ + int i; + + for (i = 0; i < BTRFS_LIST_ALL; i++) { + if (!btrfs_list_columns[i].need_print) + continue; + + printf("%s ", btrfs_list_columns[i].name); + print_subvolume_column(subv, i); + + if (i != BTRFS_LIST_PATH) + printf(" "); + } + printf("\n"); +} + +static void print_all_volume_info_tab_head(void) +{ + int i; + int len; + char barrier[20]; + + for (i = 0; i < BTRFS_LIST_ALL; i++) { + if (btrfs_list_columns[i].need_print) + printf("%s\t", btrfs_list_columns[i].name); + + if (i == BTRFS_LIST_ALL-1) + printf("\n"); + } + + for (i = 0; i < BTRFS_LIST_ALL; i++) { + memset(barrier, 0, sizeof(barrier)); + + if (btrfs_list_columns[i].need_print) { + len = strlen(btrfs_list_columns[i].name); + while (len--) + strcat(barrier, "-"); + + printf("%s\t", barrier); + } + if (i == BTRFS_LIST_ALL-1) + printf("\n"); + } +} + +static void print_all_volume_info(struct root_lookup *sorted_tree, + int layout, char *raw_prefix) +{ + struct rb_node *n; + struct root_info *entry; + + if (layout == BTRFS_LIST_LAYOUT_TABLE) + print_all_volume_info_tab_head(); + + n = rb_first(&sorted_tree->root); + while (n) { + entry = rb_entry(n, struct root_info, sort_node); + switch (layout) { + case BTRFS_LIST_LAYOUT_DEFAULT: + print_single_volume_info_default(entry); + break; + case BTRFS_LIST_LAYOUT_TABLE: + print_single_volume_info_table(entry); + break; + case BTRFS_LIST_LAYOUT_RAW: + print_single_volume_info_raw(entry, raw_prefix); + break; + } + n = rb_next(n); + } +} + +static int btrfs_list_subvols(int fd, struct root_lookup *root_lookup) +{ + int ret; + + ret = __list_subvol_search(fd, root_lookup); + if (ret) { + fprintf(stderr, "ERROR: can't perform the search - %s\n", + strerror(errno)); + return ret; + } + + /* + * now we have an rbtree full of root_info objects, but we need to fill + * in their path names within the subvol that is referencing each one. + */ + ret = __list_subvol_fill_paths(fd, root_lookup); + return ret; +} + +int btrfs_list_subvols_print(int fd, struct btrfs_list_filter_set *filter_set, + struct btrfs_list_comparer_set *comp_set, + int layout, int full_path, char *raw_prefix) +{ + struct root_lookup root_lookup; + struct root_lookup root_sort; + int ret = 0; + u64 top_id = 0; + + if (full_path) + ret = btrfs_list_get_path_rootid(fd, &top_id); + if (ret) + return ret; + + ret = btrfs_list_subvols(fd, &root_lookup); + if (ret) + return ret; + __filter_and_sort_subvol(&root_lookup, &root_sort, filter_set, + comp_set, top_id); + + print_all_volume_info(&root_sort, layout, raw_prefix); + __free_all_subvolumn(&root_lookup); + + return 0; +} + +static char *strdup_or_null(const char *s) +{ + if (!s) + return NULL; + return strdup(s); +} + +int btrfs_get_subvol(int fd, struct root_info *the_ri) +{ + int ret, rr; + struct root_lookup rl; + struct rb_node *rbn; + struct root_info *ri; + u64 root_id; + + ret = btrfs_list_get_path_rootid(fd, &root_id); + if (ret) + return ret; + + ret = btrfs_list_subvols(fd, &rl); + if (ret) + return ret; + + rbn = rb_first(&rl.root); + while(rbn) { + ri = rb_entry(rbn, struct root_info, rb_node); + rr = resolve_root(&rl, ri, root_id); + if (rr == -ENOENT) { + ret = -ENOENT; + rbn = rb_next(rbn); + continue; + } + if (!comp_entry_with_rootid(the_ri, ri, 0)) { + memcpy(the_ri, ri, offsetof(struct root_info, path)); + the_ri->path = strdup_or_null(ri->path); + the_ri->name = strdup_or_null(ri->name); + the_ri->full_path = strdup_or_null(ri->full_path); + ret = 0; + break; + } + rbn = rb_next(rbn); + } + __free_all_subvolumn(&rl); + return ret; +} + +static int print_one_extent(int fd, struct btrfs_ioctl_search_header *sh, + struct btrfs_file_extent_item *item, + u64 found_gen, u64 *cache_dirid, + char **cache_dir_name, u64 *cache_ino, + char **cache_full_name) +{ + u64 len = 0; + u64 disk_start = 0; + u64 disk_offset = 0; + u8 type; + int compressed = 0; + int flags = 0; + char *name = NULL; + + if (sh->objectid == *cache_ino) { + name = *cache_full_name; + } else if (*cache_full_name) { + free(*cache_full_name); + *cache_full_name = NULL; + } + if (!name) { + name = ino_resolve(fd, sh->objectid, cache_dirid, + cache_dir_name); + *cache_full_name = name; + *cache_ino = sh->objectid; + } + if (!name) + return -EIO; + + type = btrfs_stack_file_extent_type(item); + compressed = btrfs_stack_file_extent_compression(item); + + if (type == BTRFS_FILE_EXTENT_REG || + type == BTRFS_FILE_EXTENT_PREALLOC) { + disk_start = btrfs_stack_file_extent_disk_bytenr(item); + disk_offset = btrfs_stack_file_extent_offset(item); + len = btrfs_stack_file_extent_num_bytes(item); + } else if (type == BTRFS_FILE_EXTENT_INLINE) { + disk_start = 0; + disk_offset = 0; + len = btrfs_stack_file_extent_ram_bytes(item); + } else { + printf("unhandled extent type %d for inode %llu " + "file offset %llu gen %llu\n", + type, + (unsigned long long)sh->objectid, + (unsigned long long)sh->offset, + (unsigned long long)found_gen); + + return -EIO; |