From cef291134744f2385b02820a52f1fd28c3c354d1 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Fri, 28 Sep 2018 10:19:41 +0100 Subject: Import bmap-tools_3.5.orig.tar.gz [dgit import orig bmap-tools_3.5.orig.tar.gz] --- .coveragerc | 2 + .gitignore | 61 +++ .travis.yml | 38 ++ COPYING | 339 +++++++++++++++ TODO | 13 + __main__.py | 11 + bmaptool | 1 + bmaptools/BmapCopy.py | 794 +++++++++++++++++++++++++++++++++++ bmaptools/BmapCreate.py | 356 ++++++++++++++++ bmaptools/BmapHelpers.py | 85 ++++ bmaptools/CLI.py | 736 ++++++++++++++++++++++++++++++++ bmaptools/Filemap.py | 526 +++++++++++++++++++++++ bmaptools/TransRead.py | 611 +++++++++++++++++++++++++++ bmaptools/__init__.py | 0 contrib/bmap_write.py | 70 +++ debian/bmap-tools.docs | 1 + debian/bmap-tools.install | 1 + debian/changelog | 207 +++++++++ debian/compat | 1 + debian/control | 31 ++ debian/copyright | 28 ++ debian/manpages | 1 + debian/rules | 4 + docs/README | 341 +++++++++++++++ docs/RELEASE_NOTES | 289 +++++++++++++ docs/man1/bmaptool.1 | 315 ++++++++++++++ make_a_release.sh | 169 ++++++++ packaging/bmap-tools.changes | 124 ++++++ packaging/bmap-tools.spec | 89 ++++ setup.cfg | 2 + setup.py | 42 ++ tests/__init__.py | 0 tests/helpers.py | 304 ++++++++++++++ tests/oldcodebase/BmapCopy1_0.py | 710 +++++++++++++++++++++++++++++++ tests/oldcodebase/BmapCopy2_0.py | 634 ++++++++++++++++++++++++++++ tests/oldcodebase/BmapCopy2_1.py | 633 ++++++++++++++++++++++++++++ tests/oldcodebase/BmapCopy2_2.py | 635 ++++++++++++++++++++++++++++ tests/oldcodebase/BmapCopy2_3.py | 670 +++++++++++++++++++++++++++++ tests/oldcodebase/BmapCopy2_4.py | 670 +++++++++++++++++++++++++++++ tests/oldcodebase/BmapCopy2_5.py | 727 ++++++++++++++++++++++++++++++++ tests/oldcodebase/BmapCopy2_6.py | 727 ++++++++++++++++++++++++++++++++ tests/oldcodebase/BmapCopy3_0.py | 767 +++++++++++++++++++++++++++++++++ tests/oldcodebase/__init__.py | 0 tests/test-data/test.image.bmap.v1.2 | 90 ++++ tests/test-data/test.image.bmap.v1.3 | 94 +++++ tests/test-data/test.image.bmap.v1.4 | 97 +++++ tests/test-data/test.image.bmap.v2.0 | 97 +++++ tests/test-data/test.image.gz | Bin 0 -> 1612 bytes tests/test_api_base.py | 259 ++++++++++++ tests/test_compat.py | 155 +++++++ tests/test_filemap.py | 161 +++++++ 51 files changed, 12718 insertions(+) create mode 100644 .coveragerc create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 COPYING create mode 100644 TODO create mode 100755 __main__.py create mode 120000 bmaptool create mode 100644 bmaptools/BmapCopy.py create mode 100644 bmaptools/BmapCreate.py create mode 100644 bmaptools/BmapHelpers.py create mode 100644 bmaptools/CLI.py create mode 100644 bmaptools/Filemap.py create mode 100644 bmaptools/TransRead.py create mode 100644 bmaptools/__init__.py create mode 100755 contrib/bmap_write.py create mode 100644 debian/bmap-tools.docs create mode 100644 debian/bmap-tools.install create mode 100644 debian/changelog create mode 100644 debian/compat create mode 100644 debian/control create mode 100644 debian/copyright create mode 100644 debian/manpages create mode 100755 debian/rules create mode 100644 docs/README create mode 100644 docs/RELEASE_NOTES create mode 100644 docs/man1/bmaptool.1 create mode 100755 make_a_release.sh create mode 100644 packaging/bmap-tools.changes create mode 100644 packaging/bmap-tools.spec create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 tests/__init__.py create mode 100644 tests/helpers.py create mode 100644 tests/oldcodebase/BmapCopy1_0.py create mode 100644 tests/oldcodebase/BmapCopy2_0.py create mode 100644 tests/oldcodebase/BmapCopy2_1.py create mode 100644 tests/oldcodebase/BmapCopy2_2.py create mode 100644 tests/oldcodebase/BmapCopy2_3.py create mode 100644 tests/oldcodebase/BmapCopy2_4.py create mode 100644 tests/oldcodebase/BmapCopy2_5.py create mode 100644 tests/oldcodebase/BmapCopy2_6.py create mode 100644 tests/oldcodebase/BmapCopy3_0.py create mode 100644 tests/oldcodebase/__init__.py create mode 100644 tests/test-data/test.image.bmap.v1.2 create mode 100644 tests/test-data/test.image.bmap.v1.3 create mode 100644 tests/test-data/test.image.bmap.v1.4 create mode 100644 tests/test-data/test.image.bmap.v2.0 create mode 100644 tests/test-data/test.image.gz create mode 100644 tests/test_api_base.py create mode 100644 tests/test_compat.py create mode 100644 tests/test_filemap.py diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..cc764ca --- /dev/null +++ b/.coveragerc @@ -0,0 +1,2 @@ +[run] +include = bmaptools/* diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9a83996 --- /dev/null +++ b/.gitignore @@ -0,0 +1,61 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Sphinx documentation +docs/_build/ + +# IPython Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# dotenv +.env + +# virtualenv +venv/ +ENV/ diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..e229a36 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,38 @@ +language: python +python: + - "2.6" + - "2.7" + - "3.3" + - "3.4" + - "3.5" + - "3.6" + - "3.7-dev" # 3.7 development branch + - "nightly" # currently points to 3.6-dev +# pypy 2.x currently disabled, until testing fixed. +# - "pypy" +# - "pypy3" +# command to install dependencies +install: + - pip install codecov + - pip install . +# command to run tests +script: python setup.py nosetests --with-coverage --cover-xml +after_success: + - codecov +# Don't bug people yet +notifications: + email: false + +# Perform testing also using optional compressors +sudo: required +dist: trusty +before_install: + - sudo apt-get -qq update + - sudo apt-get install -y pbzip2 pigz lzop liblz4-tool +# addons: +# apt: +# packages: +# - pbzip2 +# - pigz +# - lzop +# - liblz4-tool diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..d159169 --- /dev/null +++ b/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/TODO b/TODO new file mode 100644 index 0000000..f252e3f --- /dev/null +++ b/TODO @@ -0,0 +1,13 @@ +Current TODO list, any help with these is appreciated. + +1. Teach bmaptool to update the alternate GPT partition +2. Add a test for bmap with invalid checksums +3. When writing to a file, and the file did not exist, so we create it, + and then fail, we do not remove the half-written file. +4. Teach make_a_release.sh to modify the version in the 'doc/man1/bmaptool.1' + file too. +5. Use __author__ and __version__ in bmaptool, and import them from + 'setup.py' +6. Move all the documentation from tizen.org to 01.org +7. Make sure the web documentation describes all features of releases starting + from 3.1. diff --git a/__main__.py b/__main__.py new file mode 100755 index 0000000..ea24b7b --- /dev/null +++ b/__main__.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +# -*- coding: utf-8 -*- +import re +import sys + +from bmaptools.CLI import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe|\.pyz)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bmaptool b/bmaptool new file mode 120000 index 0000000..5a427d1 --- /dev/null +++ b/bmaptool @@ -0,0 +1 @@ +__main__.py \ No newline at end of file diff --git a/bmaptools/BmapCopy.py b/bmaptools/BmapCopy.py new file mode 100644 index 0000000..b0e251e --- /dev/null +++ b/bmaptools/BmapCopy.py @@ -0,0 +1,794 @@ +# -*- coding: utf-8 -*- +# vim: ts=4 sw=4 et ai si +# +# Copyright (c) 2012-2014 Intel, Inc. +# License: GPLv2 +# Author: Artem Bityutskiy +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This module implements copying of images with bmap and provides the following +API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. +""" + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# * Too many arguments - R0913 +# * Too many statements (R0915) +# pylint: disable=R0902 +# pylint: disable=R0913 +# pylint: disable=R0915 + +import os +import stat +import sys +import hashlib +import logging +import datetime +from six import reraise +from six.moves import queue as Queue +from six.moves import _thread as thread +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +_log = logging.getLogger(__name__) # pylint: disable=C0103 + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = "2.0" + + +class Error(Exception): + """ + A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. + """ + pass + + +class BmapCopy(object): + """ + This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file object of the destination file copy the image to + * full path or a file object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the checksum while copying or not. Note, this is done only in case + of bmap-based copying and only if bmap contains checksums (e.g., bmap + version 1.0 did not have checksums support). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. + """ + + def __init__(self, image, dest, bmap=None, image_size=None): + """ + The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file object of the destination file to copy the image + to. + bmap - file object of the bmap file to use for copying. + image_size - size of the image in bytes. + """ + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 6 + + self.bmap_version = None + self.bmap_version_major = None + self.bmap_version_minor = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # The bmap file checksum type and length + self._cs_type = None + self._cs_len = None + self._cs_attrib_name = None + self._bmap_cs_attrib_name = None + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes // self.block_size + + def set_progress_indicator(self, file_obj, format_string): + """ + Setup the progress indicator which shows how much data has been copied + in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. + """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ + Set image size and initialize various other geometry-related attributes. + """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = (self.image_size + self.block_size - 1) // self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _verify_bmap_checksum(self): + """ + This is a helper function which verifies the bmap file checksum. + """ + + import mmap + + correct_chksum = self._xml.find(self._bmap_cs_attrib_name).text.strip() + + # Before verifying the shecksum, we have to substitute the checksum + # value stored in the file with all zeroes. For these purposes we + # create private memory mapping of the bmap file. + mapped_bmap = mmap.mmap(self._f_bmap.fileno(), 0, + access=mmap.ACCESS_COPY) + + chksum_pos = mapped_bmap.find(correct_chksum.encode()) + assert chksum_pos != -1 + + mapped_bmap[chksum_pos:chksum_pos + self._cs_len] = b'0' * self._cs_len + + hash_obj = hashlib.new(self._cs_type) + hash_obj.update(mapped_bmap) + calculated_chksum = hash_obj.hexdigest() + + mapped_bmap.close() + + if calculated_chksum != correct_chksum: + raise Error("checksum mismatch for bmap file '%s': calculated " + "'%s', should be '%s'" + % (self._bmap_path, calculated_chksum, correct_chksum)) + + def _parse_bmap(self): + """ + Parse the bmap file and initialize corresponding class instance attributs. + """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + # Extrace the erroneous line with some context + self._f_bmap.seek(0) + xml_extract = "" + for num, line in enumerate(self._f_bmap): + if num >= err.position[0] - 4 and num <= err.position[0] + 4: + xml_extract += "Line %d: %s" % (num, line) + + raise Error("cannot parse the bmap file '%s' which should be a " + "proper XML file: %s, the XML extract:\n%s" % + (self._bmap_path, err, xml_extract)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + self.bmap_version_major = int(self.bmap_version.split('.', 1)[0]) + self.bmap_version_minor = int(self.bmap_version.split('.', 1)[1]) + if self.bmap_version_major > int(SUPPORTED_BMAP_VERSION.split('.', 1)[0]): + raise Error("only bmap format version up to %d is supported, " + "version %d is not supported" + % (SUPPORTED_BMAP_VERSION, self.bmap_version_major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) // self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " + "blocks count (%d bytes != %d blocks * %d bytes)" + % (self.image_size, self.blocks_cnt, self.block_size)) + + if self.bmap_version_major > 1 or \ + (self.bmap_version_major == 1 and self.bmap_version_minor == 4): + # In bmap format version 1.0-1.3 the only supported checksum type + # was SHA1. Version 2.0 started supporting arbitrary checksum + # types. A new "ChecksumType" tag was introduce to specify the + # checksum function name. And all XML tags which contained "sha1" + # in their name were renamed to something more neutral. This was an + # change incompatible with previous formats. + # + # There is a special format version 1.4, which should not have been + # ever issued, but was released by a mistake. The mistake was that + # when implementing version 2.0 support we mistakenly gave it + # version number 1.4. This was later on fixed and format version + # 1.4 became version 2.0. So 1.4 and 2.0 formats are identical. + # + # Note, bmap files did not contain checksums prior to version 1.3. + self._cs_type = xml.find("ChecksumType").text.strip() + self._cs_attrib_name = "chksum" + self._bmap_cs_attrib_name = "BmapFileChecksum" + elif self.bmap_version_minor == 3: + self._cs_type = "sha1" + self._cs_attrib_name = "sha1" + self._bmap_cs_attrib_name = "BmapFileSHA1" + + if self._cs_type: + try: + self._cs_len = len(hashlib.new(self._cs_type).hexdigest()) + except ValueError as err: + raise Error("cannot initialize hash function \"%s\": %s" % + (self._cs_type, err)) + self._verify_bmap_checksum() + + def _update_progress(self, blocks_written): + """ + Print the progress indicator if the mapped area size is known and if + the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. + """ + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + _log.debug("wrote %d blocks out of %d (%d%%)" % + (blocks_written, self.mapped_cnt, percent)) + else: + _log.debug("wrote %d blocks" % blocks_written) + + if not self._progress_file: + return + + if self.mapped_cnt: + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds=250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') # pylint: disable=W1401 + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ + This is a helper generator that parses the bmap XML file and for each + block range in the XML file it yields ('first', 'last', 'chksum') + tuples, where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'chksum' is the checksum of the range ('None' is used if it is + missing). + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. + """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if self._cs_attrib_name in xml_element.attrib: + chksum = xml_element.attrib[self._cs_attrib_name] + else: + chksum = None + + yield (first, last, chksum) + + def _get_batches(self, first, last): + """ + This is a helper generator which splits block ranges from the bmap file + to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). + """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ + This is generator which reads the image file in '_batch_blocks' chunks + and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. + """ + + _log.debug("the reader thread has started") + try: + for (first, last, chksum) in self._get_block_ranges(): + if verify and chksum: + hash_obj = hashlib.new(self._cs_type) + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " + "image file '%s': %s" + % (start, end, self._image_path, err)) + + if not buf: + _log.debug("no more data to read from file '%s'", + self._image_path) + self._batch_queue.put(None) + return + + if verify and chksum: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) // self.block_size + _log.debug("queueing %d blocks, queue length is %d" % + (blocks, self._batch_queue.qsize())) + + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and chksum and hash_obj.hexdigest() != chksum: + raise Error("checksum mismatch for blocks range %d-%d: " + "calculated %s, should be %s (image file %s)" + % (first, last, hash_obj.hexdigest(), + chksum, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + self._batch_queue.put(("error", sys.exc_info())) + + self._batch_queue.put(None) + + def copy(self, sync=True, verify=True): + """ + Copy the image to the destination file using bmap. The 'sync' argument + defines whether the destination file has to be synchronized upon + return. The 'verify' argument defines whether the checksum has to be + verified while copying. + """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + if self.image_size and self._dest_is_regfile: + # If we already know image size, make sure that destination file + # has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" + % (self._dest_path, err)) + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + reraise(exc_info[0], exc_info[1], exc_info[2]) + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " + "have %u - bmap file '%s' does not belong to this " + "image" + % (blocks_written, self._image_path, self._dest_path, + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ + Synchronize the destination file to make sure all the data are actually + written to the disk. + """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ + This class is a specialized version of 'BmapCopy' which copies the image to + a block device. Unlike the base 'BmapCopy' class, this class does various + optimizations specific to block devices, e.g., switching to the 'noop' I/O + scheduler. + """ + + def __init__(self, image, dest, bmap=None, image_size=None): + """ + The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. + """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size) + + self._dest_fsync_watermark = (6 * 1024 * 1024) // self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " + "fit the block device '%s' which has %s capacity" + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" % \ + (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" + + def _tune_block_device(self): + """ + Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + _log.debug("failed to enable I/O optimization, expect " + "suboptimal speed (reason: cannot switch to the " + "'noop' I/O scheduler: %s or blk-mq in use)" % err) + else: + # The file contains a list of schedulers with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the name of the current scheduler. + import re + + match = re.match(r'.*\[(.+)\].*', contents) + if match: + self._old_scheduler_value = match.group(1) + + # Limit the write buffering, because we do not need too much of it when + # writing sequntially. Excessive buffering makes some systems not very + # responsive, e.g., this was observed in Fedora 17. + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + _log.warning("failed to disable excessive buffering, expect " + "worse system responsiveness (reason: cannot set " + "max. I/O ratio to 1: %s)" % err) + + def _restore_bdev_settings(self): + """ + Restore old block device settings which we changed in + '_tune_block_device()'. + """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" + % (self._old_max_ratio_value, err)) + + def copy(self, sync=True, verify=True): + """ + The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. + """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() diff --git a/bmaptools/BmapCreate.py b/bmaptools/BmapCreate.py new file mode 100644 index 0000000..f4fb413 --- /dev/null +++ b/bmaptools/BmapCreate.py @@ -0,0 +1,356 @@ +# -*- coding: utf-8 -*- +# vim: ts=4 sw=4 et ai si +# +# Copyright (c) 2012-2014 Intel, Inc. +# License: GPLv2 +# Author: Artem Bityutskiy +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This module implements the block map (bmap) creation functionality and provides +the corresponding API in form of the 'BmapCreate' class. + +The idea is that while images files may generally be very large (e.g., 4GiB), +they may nevertheless contain only little real data, e.g., 512MiB. This data +are files, directories, file-system meta-data, partition table, etc. When +copying the image to the target device, you do not have to copy all the 4GiB of +data, you can copy only 512MiB of it, which is 4 times less, so copying should +presumably be 4 times faster. + +The block map file is an XML file which contains a list of blocks which have to +be copied to the target device. The other blocks are not used and there is no +need to copy them. The XML file also contains some additional information like +block size, image size, count of mapped blocks, etc. There are also many +commentaries, so it is human-readable. + +The image has to be a sparse file. Generally, this means that when you generate +this image file, you should start with a huge sparse file which contains a +single hole spanning the entire file. Then you should partition it, write all +the data (probably by means of loop-back mounting the image or parts of it), +etc. The end result should be a sparse file where mapped areas represent useful +parts of the image and holes represent useless parts of the image, which do not +have to be copied when copying the image to the target device. + +This module uses the FIEMAP ioctl to detect holes. +""" + +# Disable the following pylint recommendations: +# * Too many instance attributes - R0902 +# * Too few public methods - R0903 +# pylint: disable=R0902,R0903 + +import hashlib +from bmaptools.BmapHelpers import human_size +from bmaptools import Filemap + +# The bmap format version we generate. +# +# Changelog: +# o 1.3 -> 2.0: +# Support SHA256 and SHA512 checksums, in 1.3 only SHA1 was supported. +# "BmapFileChecksum" is used instead of "BmapFileSHA1", and "chksum=" +# attribute is used instead "sha1=". Introduced "ChecksumType" tag. This is +# an incompatible change. +# Note, bmap format 1.4 is identical to 2.0. Version 1.4 was a mistake, +# instead of incrementing the major version number, we incremented minor +# version number. Unfortunately, the mistake slipped into bmap-tools version +# 3.0, and was only fixed in bmap-tools v3.1. +SUPPORTED_BMAP_VERSION = "2.0" + +_BMAP_START_TEMPLATE = \ + """ + + + + + %u + + + %u + + + %u + +""" + + +class Error(Exception): + """ + A class for exceptions generated by this module. We currently support only + one type of exceptions, and we basically throw human-readable problem + description in case of errors. + """ + pass + + +class BmapCreate(object): + """ + This class implements the bmap creation functionality. To generate a bmap + for an image (which is supposedly a sparse file), you should first create + an instance of 'BmapCreate' and provide: + + * full path or a file-like object of the image to create bmap for + * full path or a file object to use for writing the results to + + Then you should invoke the 'generate()' method of this class. It will use + the FIEMAP ioctl to generate the bmap. + """ + + def __init__(self, image, bmap, chksum_type="sha256"): + """ + Initialize a class instance: + * image - full path or a file-like object of the image to create bmap + for + * bmap - full path or a file object to use for writing the resulting + bmap to + * chksum - type of the check sum to use in the bmap file (all checksum + types which python's "hashlib" module supports are allowed). + """ + + self.image_size = None + self.image_size_human = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._mapped_count_pos1 = None + self._mapped_count_pos2 = None + self._chksum_pos = None + + self._f_image_needs_close = False + self._f_bmap_needs_close = False + + self._cs_type = chksum_type.lower() + try: + self._cs_len = len(hashlib.new(self._cs_type).hexdigest()) + except ValueError as err: + raise Error("cannot initialize hash function \"%s\": %s" % + (self._cs_type, err)) + + if hasattr(image, "read"): + self._f_image = image + self._image_path = image.name + else: + self._image_path = image + self._open_image_file() + + if hasattr(bmap, "read"): + self._f_bmap = bmap + self._bmap_path = bmap.name + else: + self._bmap_path = bmap + self._open_bmap_file() + + try: + self.filemap = Filemap.filemap(self._f_image) + except (Filemap.Error, Filemap.ErrorNotSupp) as err: + raise Error("cannot generate bmap for file '%s': %s" + % (self._image_path, err)) + + self.image_size = self.filemap.image_size + self.image_size_human = human_size(self.image_size) + if self.image_size == 0: + raise Error("cannot generate bmap for zero-sized image file '%s'" + % self._image_path) + + self.block_size = self.filemap.block_size + self.blocks_cnt = self.filemap.blocks_cnt + + def __del__(self): + """The class destructor which closes the opened files.""" + if self._f_image_needs_close: + self._f_image.close() + if self._f_bmap_needs_close: + self._f_bmap.close() + + def _open_image_file(self): + """Open the image file.""" + try: + self._f_image = open(self._image_path, 'rb') + except IOError as err: + raise Error("cannot open image file '%s': %s" + % (self._image_path, err)) + + self._f_image_needs_close = True + + def _open_bmap_file(self): + """Open the bmap file.""" + try: + self._f_bmap = open(self._bmap_path, 'w+') + except IOError as err: + raise Error("cannot open bmap file '%s': %s" + % (self._bmap_path, err)) + + self._f_bmap_needs_close = True + + def _bmap_file_start(self): + """ + A helper function which generates the starting contents of the block + map file: the header comment, image size, block size, etc. + """ + + # We do not know the amount of mapped blocks at the moment, so just put + # whitespaces instead of real numbers. Assume the longest possible + # numbers. + + xml = _BMAP_START_TEMPLATE \ + % (SUPPORTED_BMAP_VERSION, self.image_size_human, + self.image_size, self.block_size, self.blocks_cnt) + xml += " \n" % (' ' * len(self.image_size_human), + ' ' * len("100.0%")) + xml += " " + + self._f_bmap.write(xml) + self._mapped_count_pos2 = self._f_bmap.tell() + + xml = "%s \n\n" % (' ' * len(str(self.blocks_cnt))) + + # pylint: disable=C0301 + xml += " \n" + xml += " %s \n\n" % self._cs_type + + xml += " \n" + xml += " " + + self._f_bmap.write(xml) + self._chksum_pos = self._f_bmap.tell() + + xml = "0" * self._cs_len + " \n\n" + xml += " \n" + xml += " \n" + # pylint: enable=C0301 + + self._f_bmap.write(xml) + + def _bmap_file_end(self): + """ + A helper function which generates the final parts of the block map + file: the ending tags and the information about the amount of mapped + blocks. + """ + + xml = " \n" + xml += "\n" + + self._f_bmap.write(xml) + + self._f_bmap.seek(self._mapped_count_pos1) + self._f_bmap.write("%s or %.1f%%" + % (self.mapped_size_human, self.mapped_percent)) + + self._f_bmap.seek(self._mapped_count_pos2) + self._f_bmap.write("%u" % self.mapped_cnt) + + self._f_bmap.seek(0) + hash_obj = hashlib.new(self._cs_type) + hash_obj.update(self._f_bmap.read().encode()) + chksum = hash_obj.hexdigest() + self._f_bmap.seek(self._chksum_pos) + self._f_bmap.write("%s" % chksum) + + def _calculate_chksum(self, first, last): + """ + A helper function which calculates checksum for the range of blocks of + the image file: from block 'first' to block 'last'. + """ + + start = first * self.block_size + end = (last + 1) * self.block_size + + self._f_image.seek(start) + hash_obj = hashlib.new(self._cs_type) + + chunk_size = 1024 * 1024 + to_read = end - start + read = 0 + + while read < to_read: + if read + chunk_size > to_read: + chunk_size = to_read - read + chunk = self._f_image.read(chunk_size) + hash_obj.update(chunk) + read += chunk_size + + return hash_obj.hexdigest() + + def generate(self, include_checksums=True): + """ + Generate bmap for the image file. If 'include_checksums' is 'True', + also generate checksums for block ranges. + """ + + # Save image file position in order to restore it at the end + image_pos = self._f_image.tell() + + self._bmap_file_start() + + # Generate the block map and write it to the XML block map + # file as we go. + self.mapped_cnt = 0 + for first, last in self.filemap.get_mapped_ranges(0, self.blocks_cnt): + self.mapped_cnt += last - first + 1 + if include_checksums: + chksum = self._calculate_chksum(first, last) + chksum = " chksum=\"%s\"" % chksum + else: + chksum = "" + + if first != last: + self._f_bmap.write(" %s-%s \n" + % (chksum, first, last)) + else: + self._f_bmap.write(" %s \n" + % (chksum, first)) + + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + self._bmap_file_end() + + try: + self._f_bmap.flush() + except IOError as err: + raise Error("cannot flush the bmap file '%s': %s" + % (self._bmap_path, err)) + + self._f_image.seek(image_pos) diff --git a/bmaptools/BmapHelpers.py b/bmaptools/BmapHelpers.py new file mode 100644 index 0000000..790e713 --- /dev/null +++ b/bmaptools/BmapHelpers.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +# vim: ts=4 sw=4 et ai si +# +# Copyright (c) 2012-2014 Intel, Inc. +# License: GPLv2 +# Author: Artem Bityutskiy +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This module contains various shared helper functions. +""" + +import os +import struct +from fcntl import ioctl + +def human_size(size): + """Transform size in bytes into a human-readable form.""" + if size == 1: + return "1 byte" + + if size < 512: + return "%d bytes" % size + + for modifier in ["KiB", "MiB", "GiB", "TiB"]: + size /= 1024.0 + if size < 1024: + return "%.1f %s" % (size, modifier) + + return "%.1f %s" % (size, 'EiB') + +def human_time(seconds): + """Transform time in seconds to the HH:MM:SS format.""" + (minutes, seconds) = divmod(seconds, 60) + (hours, minutes) = divmod(minutes, 60) + + result = "" + if hours: + result = "%dh " % hours + if minutes: + result += "%dm " % minutes + + return result + "%.1fs" % seconds + +def get_block_size(file_obj): + """ + Return block size for file object 'file_obj'. Errors are indicated by the + 'IOError' exception. + """ + + # Get the block size of the host file-system for the image file by calling + # the FIGETBSZ ioctl (number 2). + try: + binary_data = ioctl(file_obj, 2, struct.pack('I', 0)) + bsize = struct.unpack('I', binary_data)[0] + if not bsize: + raise IOError("get 0 bsize by FIGETBSZ ioctl") + except IOError as err: + stat = os.fstat(file_obj.fileno()) + if hasattr(stat, 'st_blksize'): + bsize = stat.st_blksize + else: + raise IOError("Unable to determine block size") + return bsize + +def program_is_available(name): + """ + This is a helper function which check if the external program 'name' is + available in the system. + """ + + for path in os.environ["PATH"].split(os.pathsep): + program = os.path.join(path.strip('"'), name) + if os.path.isfile(program) and os.access(program, os.X_OK): + return True + + return False diff --git a/bmaptools/CLI.py b/bmaptools/CLI.py new file mode 100644 index 0000000..ea7b880 --- /dev/null +++ b/bmaptools/CLI.py @@ -0,0 +1,736 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# vim: ts=4 sw=4 et ai si +# +# Copyright (c) 2012-2014 Intel, Inc. +# License: GPLv2 +# Author: Artem Bityutskiy +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +A tool for creating block maps (bmap) and copying disk images using bmap files. +Documentation can be found here: +source.tizen.org/documentation/reference/bmaptool +""" + +# Disable the following pylint recommendations: +# * Too few public methods (R0903) +# * Too many statements (R0915) +# * Too many branches (R0912) +# pylint: disable=R0903 +# pylint: disable=R0915 +# pylint: disable=R0912 + +import argparse +import sys +import os +import stat +import time +import logging +import tempfile +import traceback +import shutil +import io +from bmaptools import BmapCreate, BmapCopy, BmapHelpers, TransRead + +VERSION = "3.5" + +log = logging.getLogger() # pylint: disable=C0103 + +def print_error_with_tb(msgformat, *args): + """Print an error message occurred along with the traceback.""" + + tback = [] + + if sys.exc_info()[0]: + lines = traceback.format_exc().splitlines() + else: + lines = [line.strip() for line in traceback.format_stack()] + + idx = 0 + last_idx = len(lines) - 1 + while idx < len(lines): + if lines[idx].startswith(' File "'): + idx += 2 + last_idx = idx + else: + idx += 1 + + tback = lines[0:last_idx] + if tback: + log.error("An error occurred, here is the traceback:\n%s\n", "\n".join(tback)) + + if args: + errmsg = msgformat % args + else: + errmsg = str(msgformat) + log.error(errmsg) + + +def error_out(msgformat, *args): + """Print an error message and terminate program execution.""" + + print_error_with_tb(str(msgformat) + "\n", *args) + raise SystemExit(1) + + +class NamedFile(object): + """ + This simple class allows us to override the 'name' attribute of a file + object. The reason is that some classes use the 'name' attribute of the + file object to print file path. But, for example, 'os.fdopen()' sets the + name to "", which is not very user-friendly. Also, sometimes we + want to substitute the file name with something else. + """ + + def __init__(self, file_obj, name): + self._file_obj = file_obj + self.name = name + + def __getattr__(self, name): + return getattr(self._file_obj, name) + +def open_block_device(path): + """ + This is a helper function for 'open_files()' which is called if the + destination file of the "copy" command is a block device. We handle block + devices a little bit different to regular files. Namely, we are trying to + make sure that we do not write to a mounted block device, otherwise the + user could corrupt, say, the root file system by a mistake. This is + achieved by opening the block device in exclusive mode, which guarantees + that we are the only users of the block device. + + This function opens a block device specified by 'path' in exclusive mode. + Returns opened file object. + """ + + try: + descriptor = os.open(path, os.O_WRONLY | os.O_EXCL) + except OSError as err: + error_out("cannot open block device '%s' in exclusive mode: %s", + path, err) + + # Turn the block device file descriptor into a file object + try: + file_obj = os.fdopen(descriptor, "wb") + except OSError as err: + os.close(descriptor) + error_out("cannot open block device '%s':\n%s", path, err) + + return NamedFile(file_obj, path) + +def report_verification_results(context, sigs): + """ + This is a helper function which reports the GPG signature verification + results. The 'context' argument is the gpgme context object, and the 'sigs' + argument contains the results of the 'gpgme.verify()' function. + """ + + for sig in sigs: + if not sig.status: + key = context.get_key(sig.fpr) + author = "%s <%s>" % (key.uids[0].name, key.uids[0].email) + log.info("successfully verified bmap file signature of %s " + "(fingerprint %s)" % (author, sig.fpr)) + else: + error_out("signature verification failed (fingerprint %s): %s\n" + "Either fix the problem or use --no-sig-verify to " + "disable signature verification", + sig.fpr, sig.status[2].lower()) + +def verify_detached_bmap_signature(args, bmap_obj, bmap_path): + """ + This is a helper function for 'verify_bmap_signature()' which handles the + detached signature case. + """ + + if args.no_sig_verify: + return None + + if args.bmap_sig: + try: + sig_obj = TransRead.TransRead(args.bmap_sig) + except TransRead.Error as err: + error_out("cannot open bmap signature file '%s':\n%s", + args.bmap_sig, err) + sig_path = args.bmap_sig + else: + # Check if there is a stand-alone signature file + try: + sig_path = bmap_path + ".asc" + sig_obj = TransRead.TransRead(sig_path) + except TransRead.Error: + try: + sig_path = bmap_path + ".sig" + sig_obj = TransRead.TransRead(sig_path) + except TransRead.Error: + # No signatures found + return None + + log.info("discovered signature file for bmap '%s'" % sig_path) + + # If the stand-alone signature file is not local, make a local copy + if sig_obj.is_url: + try: + tmp_obj = tempfile.NamedTemporaryFile("wb+") + except IOError as err: + error_out("cannot create a temporary file for the signature:\n%s", + err) + + shutil.copyfileobj(sig_obj, tmp_obj) + tmp_obj.seek(0) + sig_obj.close() + sig_obj = tmp_obj + + try: + import gpgme + except ImportError: + error_out("cannot verify the signature because the python \"gpgme\" " + "module is not installed on your system\nPlease, either " + "install the module or use --no-sig-verify") + + try: + context = gpgme.Context() + signature = io.FileIO(sig_obj.name) + signed_data = io.FileIO(bmap_obj.name) + sigs = context.verify(signature, signed_data, None) + except gpgme.GpgmeError as err: + error_out("failure when trying to verify GPG signature: %s\n" + "Make sure file \"%s\" has proper GPG format", + err[2].lower(), sig_path) + + sig_obj.close() + + if len(sigs) == 0: + log.warning("the \"%s\" signature file does not actually contain " + "any valid signatures" % sig_path) + else: + report_verification_results(context, sigs) + + return None + + +def verify_clearsign_bmap_signature(args, bmap_obj): + """ + This is a helper function for 'verify_bmap_signature()' which handles the + clarsign signature case. + """ + + if args.bmap_sig: + error_out("the bmap file has clearsign format and already contains " + "the signature, so --bmap-sig option should not be used") + + try: + import gpgme + except ImportError: + error_out("cannot verify the signature because the python \"gpgme\"" + "module is not installed on your system\nCannot extract " + "block map from the bmap file which has clearsign format, " + "please, install the module") + + try: + context = gpgme.Context() + signature = io.FileIO(bmap_obj.name) + plaintext = io.BytesIO() + sigs = context.verify(signature, None, plaintext) + except gpgme.GpgmeError as err: + error_out("failure when trying to verify GPG signature: %s\n" + "make sure the bmap file has proper GPG format", + err[2].lower()) + + if not args.no_sig_verify: + if len(sigs) == 0: + log.warning("the bmap file clearsign signature does not actually " + "contain any valid signatures") + else: + report_verification_results(context, sigs) + + try: + tmp_obj = tempfile.TemporaryFile("w+") + except IOError as err: + error_out("cannot create a temporary file for bmap:\n%s", err) + + tmp_obj.write(plaintext.getvalue()) + tmp_obj.seek(0) + return tmp_obj + + +def verify_bmap_signature(args, bmap_obj, bmap_path): + """ + Verify GPG signature of the bmap file if it is present. The signature may + be in a separate file (detached) or it may be inside the bmap file itself + (clearsign signature). + + If user specifies the --bmap-sig option, the signature is assumed to be + detached and is taken from the user-specified file. Otherwise, this + function verifies whether the bmap file has clearsign signature, and if + not, it tries to automatically discover the detached signature by searching + for a ".sig" or ".asc" file at the same path and with the same basename as + the bmap file. This function then verifies the signature and reports the + results. + + In case of the clearsign signature, the bmap file has "invalid" format, + meaning that the proper bmap XML contents is in the GPG clearsign + container. The XML contents has to be extracted from the container before + further processing. And this is be done even if user specified the + --no-sig-verify option. This function returns an open file object with the + extracted XML bmap file contents in this case. Otherwise, this function + returns None. + """ + + if not bmap_obj: + return None + + clearsign_marker = "-----BEGIN PGP SIGNED MESSAGE-----" + buf = bmap_obj.read(len(clearsign_marker)) + bmap_obj.seek(0) + + if buf == clearsign_marker: + return verify_clearsign_bmap_signature(args, bmap_obj) + else: + return verify_detached_bmap_signature(args, bmap_obj, bmap_path) + + +def find_and_open_bmap(args): + """ + This is a helper function for 'open_files()' which discovers and opens the + bmap file, then returns the corresponding file object and the bmap file + path. + + If the user specified the bmap file explicitly, we just open the provided + path. Otherwise, we try to discover the bmap file at the same place where + the image file is located. We search for a file with the same path and + basename, but with a ".bmap" extension. + + Additionally, this function makes sure that the returned file object + corresponds to a local file, not a remote file. We do this by creating a + temporary local copy of the bmap file. The reason is that further on we may + need to check the GPG signature of the file, which requires it to be a + local file. On top of that, the BmapCopy class requires the bmap file to be + memory-mappable ('mmap()'). + """ + + if args.nobmap: + return (None, None) + + if args.bmap: + try: + bmap_obj = TransRead.TransRead(args.bmap) + except TransRead.Error as err: + error_out("cannot open bmap file '%s':\n%s", args.bmap, err) + bmap_path = args.bmap + else: + # Automatically discover the bmap file + image_path = args.image + while True: + bmap_path = image_path + ".bmap" + try: + bmap_obj = TransRead.TransRead(bmap_path) + log.info("discovered bmap file '%s'" % bmap_path) + break + except TransRead.Error: + pass + + image_path, ext = os.path.splitext(image_path) + if ext == '': + return (None, None) + + if not bmap_obj.is_url: + return (bmap_obj, bmap_path) + + try: + # Create a temporary file for the bmap + tmp_obj = tempfile.NamedTemporaryFile("wb+") + except IOError as err: + error_out("cannot create a temporary file for bmap:\n%s", err) + + shutil.copyfileobj(bmap_obj, tmp_obj) + + tmp_obj.flush() + tmp_obj.seek(0) + bmap_obj.close() + return (tmp_obj, bmap_path) + + +def open_files(args): + """ + This is a helper function for 'copy_command()' which the image, bmap, and + the destination files. Returns a tuple of 5 elements: + 1 file-like object for the image + 2 file object for the destination file + 3 file-like object for the bmap + 4 full path to the bmap file + 5 image size in bytes + 6 'True' if the destination file is a block device, otherwise 'False' + """ + + # Open the image file using the TransRead module, which will automatically + # recognize whether it is compressed or whether file path is an URL, etc. + try: + image_obj = TransRead.TransRead(args.image) + except TransRead.Error as err: + error_out("cannot open image:\n%s" % err) + + # Open the bmap file. Try to discover the bmap file automatically if it + # was not specified. + (bmap_obj, bmap_path) = find_and_open_bmap(args) + + if bmap_path == args.image: + # Most probably the specified the bmap file instead of the image file + # by mistake. + log.warning("image has the same path as the bmap file, dropping bmap") + bmap_obj.close() + bmap_obj = None + bmap_path = None + args.nobmap = True + + # If the destination file is under "/dev", but does not exist, print a + # warning. This is done in order to be more user-friendly, because + # sometimes users mean to write to a block device, them misspell its name. + # We just create the "/dev/misspelled" file, write the data there, and + # report success. Later on the user finds out that the image was not really + # written to the device, and gets confused. Similar confusion may happen if + # the destination file is not a special device for some reasons. + if os.path.normpath(args.dest).startswith("/dev/"): + if not os.path.exists(args.dest): + log.warning("\"%s\" does not exist, creating a regular file " + "\"%s\"" % (args.dest, args.dest)) + elif stat.S_ISREG(os.stat(args.dest).st_mode): + log.warning("\"%s\" is under \"/dev\", but it is a regular file, " + "not a device node" % args.dest) + + # Try to open the destination file. If it does not exist, a new regular + # file will be created. If it exists and it is a regular file - it'll be + # truncated. If this is a block device, it'll just be opened. + try: + dest_obj = open(args.dest, 'wb+') + except IOError as err: + error_out("cannot open destination file '%s':\n%s", args.dest, err) + + # Check whether the destination file is a block device + dest_is_blkdev = stat.S_ISBLK(os.fstat(dest_obj.fileno()).st_mode) + if dest_is_blkdev: + dest_obj.close() + dest_obj = open_block_device(args.dest) + + return (image_obj, dest_obj, bmap_obj, bmap_path, image_obj.size, + dest_is_blkdev) + + +def copy_command(args): + """Copy an image to a block device or a regular file using bmap.""" + + if args.nobmap and args.bmap: + error_out("--nobmap and --bmap cannot be used together") + + if args.bmap_sig and args.no_sig_verify: + error_out("--bmap-sig and --no-sig-verify cannot be used together") + + image_obj, dest_obj, bmap_obj, bmap_path, image_size, dest_is_blkdev = \ + open_files(args) + + if args.bmap_sig and not bmap_obj: + error_out("the bmap signature file was specified, but bmap file was " + "not found") + + f_obj = verify_bmap_signature(args, bmap_obj, bmap_path) + if f_obj: + bmap_obj.close() + bmap_obj = f_obj + + if bmap_obj: + bmap_obj = NamedFile(bmap_obj, bmap_path) + + try: + if dest_is_blkdev: + dest_str = "block device '%s'" % args.dest + # For block devices, use the specialized class + writer = BmapCopy.BmapBdevCopy(image_obj, dest_obj, bmap_obj, + image_size) + else: + dest_str = "file '%s'" % os.path.basename(args.dest) + writer = BmapCopy.BmapCopy(image_obj, dest_obj, bmap_obj, + image_size) + except BmapCopy.Error as err: + error_out(err) + + # Print the progress indicator while copying + if not args.quiet and not args.debug and \ + sys.stderr.isatty() and sys.stdout.isatty(): + writer.set_progress_indicator(sys.stderr, "bmaptool: info: %d%% copied") + + start_time = time.time() + if not bmap_obj: + if args.nobmap: + log.info("no bmap given, copy entire image to '%s'" % args.dest) + else: + error_out("bmap file not found, please, use --nobmap option to " + "flash without bmap") + else: + log.info("block map format version %s" % writer.bmap_version) + log.info("%d blocks of size %d (%s), mapped %d blocks (%s or %.1f%%)" + % (writer.blocks_cnt, writer.block_size, + writer.image_size_human, writer.mapped_cnt, + writer.mapped_size_human, writer.mapped_percent)) + log.info("copying image '%s' to %s using bmap file '%s'" + % (os.path.basename(args.image), dest_str, + os.path.basename(bmap_path))) + + try: + try: + writer.copy(False, not args.no_verify) + except (BmapCopy.Error, TransRead.Error) as err: + error_out(err) + + # Synchronize the block device + log.info("synchronizing '%s'" % args.dest) + try: + writer.sync() + except BmapCopy.Error as err: + error_out(err) + except KeyboardInterrupt: + error_out("interrupted, exiting") + + copying_time = time.time() - start_time + copying_speed = writer.mapped_size // copying_time + log.info("copying time: %s, copying speed %s/sec" + % (BmapHelpers.human_time(copying_time), + BmapHelpers.human_size(copying_speed))) + + dest_obj.close() + if bmap_obj: + bmap_obj.close() + image_obj.close() + + +def create_command(args): + """ + Generate block map (AKA bmap) for an image. The idea is that while images + files may generally be very large (e.g., 4GiB), they may nevertheless + contain only little real data, e.g., 512MiB. This data are files, + directories, file-system meta-data, partition table, etc. When copying the + image to the target device, you do not have to copy all the 4GiB of data, + you can copy only 512MiB of it, which is 4 times less, so copying should + presumably be 4 times faster. + + The block map file is an XML file which contains a list of blocks which + have to be copied to the target device. The other blocks are not used and + there is no need to copy them. The XML file also contains some additional + information like block size, image size, count of mapped blocks, etc. There + are also many commentaries, so it is human-readable. + + The image has to be a sparse file. Generally, this means that when you + generate this image file, you should start with a huge sparse file which + contains a single hole spanning the entire file. Then you should partition + it, write all the data (probably by means of loop-back mounting the image + or parts of it), etc. The end result should be a sparse file where mapped + areas represent useful parts of the image and holes represent useless parts + of the image, which do not have to be copied when copying the image to the + target device. + """ + + # Create and setup the output stream + if args.output: + try: + output = open(args.output, "w+") + except IOError as err: + error_out("cannot open the output file '%s':\n%s", args.output, err) + else: + try: + # Create a temporary file for the bmap + output = tempfile.TemporaryFile("w+") + except IOError as err: + error_out("cannot create a temporary file:\n%s", err) + + try: + creator = BmapCreate.BmapCreate(args.image, output, "sha256") + creator.generate(not args.no_checksum) + except BmapCreate.Error as err: + error_out(err) + + if not args.output: + output.seek(0) + sys.stdout.write(output.read()) + + if creator.mapped_cnt == creator.blocks_cnt: + log.warning("all %s are mapped, no holes in '%s'" + % (creator.image_size_human, args.image)) + log.warning("was the image handled incorrectly and holes " + "were expanded?") + + +def parse_arguments(): + """A helper function which parses the input arguments.""" + text = sys.modules[__name__].__doc__ + parser = argparse.ArgumentParser(description=text, prog='bmaptool') + + # The --version option + parser.add_argument("--version", action="version", + version="%(prog)s " + "%s" % VERSION) + + # The --quiet option + text = "be quiet" + parser.add_argument("-q", "--quiet", action="store_true", help=text) + + # The --debug option + text = "print debugging information" + parser.add_argument("-d", "--debug", action="store_true", help=text) + + subparsers = parser.add_subparsers(title="commands", dest="command") + subparsers.required = True + + # + # Create parser for the "create" command + # + text = "generate bmap for an image file (which should be a sparse file)" + parser_create = subparsers.add_parser("create", help=text) + parser_create.set_defaults(func=create_command) + + # Mandatory command-line argument - image file + text = "the image to generate bmap for" + parser_create.add_argument("image", help=text) + + # The --output option + text = "the output file name (otherwise stdout is used)" + parser_create.add_argument("-o", "--output", help=text) + + # The --no-checksum option + text = "do not generate the checksum for block ranges in the bmap" + parser_create.add_argument("--no-checksum", action="store_true", help=text) + + # + # Create parser for the "copy" command + # + text = "write an image to a block device using bmap" + parser_copy = subparsers.add_parser("copy", help=text) + parser_copy.set_defaults(func=copy_command) + + # The first positional argument - image file + text = "the image file to copy. Supported formats: uncompressed, " + \ + ", ".join(TransRead.SUPPORTED_COMPRESSION_TYPES) + parser_copy.add_argument("image", help=text) + + # The second positional argument - block device node + text = "the destination file or device node to copy the image to" + parser_copy.add_argument("dest", help=text) + + # The --bmap option + text = "the block map file for the image" + parser_copy.add_argument("--bmap", help=text) + + # The --nobmap option + text = "allow copying without a bmap file" + parser_copy.add_argument("--nobmap", action="store_true", help=text) + + # The --bmap-sig option + text = "the detached GPG signature for the bmap file" + parser_copy.add_argument("--bmap-sig", help=text) + + # The --no-sig-verify option + text = "do not verify bmap file GPG signatrue" + parser_copy.add_argument("--no-sig-verify", action="store_true", help=text) + + # The --no-verify option + text = "do not verify the data checksum while writing" + parser_copy.add_argument("--no-verify", action="store_true", help=text) + + return parser.parse_args() + + +def setup_logger(loglevel): + """ + A helper function which configures the root logger. The log level is + initialized to 'loglevel'. + """ + + # Esc-sequences for coloured output + esc_red = '\033[91m' # pylint: disable=W1401 + esc_yellow = '\033[93m' # pylint: disable=W1401 + esc_green = '\033[92m' # pylint: disable=W1401 + esc_end = '\033[0m' # pylint: disable=W1401 + + class MyFormatter(logging.Formatter): + """ + A custom formatter for logging messages. The reason we have it is to + have different format for different log levels. + """ + + def __init__(self, fmt=None, datefmt=None): + """The constructor.""" + logging.Formatter.__init__(self, fmt, datefmt) + + self._orig_fmt = self._fmt + # Prefix with green-colored time-stamp, as well as with module name + # and line number + self._dbg_fmt = "[" + esc_green + "%(asctime)s" + esc_end + \ + "] [%(module)s,%(lineno)d] " + self._fmt + + def format(self, record): + """ + The formatter which which simply prefixes all debugging messages + with a time-stamp. + """ + + if record.levelno == logging.DEBUG: + self._fmt = self._dbg_fmt + + result = logging.Formatter.format(self, record) + self._fmt = self._orig_fmt + return result + + # Change log level names to something nicer than the default all-capital + # 'INFO' etc. + logging.addLevelName(logging.ERROR, esc_red + "ERROR" + esc_end) + logging.addLevelName(logging.WARNING, esc_yellow + "WARNING" + esc_end) + logging.addLevelName(logging.DEBUG, "debug") + logging.addLevelName(logging.INFO, "info") + + log.setLevel(loglevel) + formatter = MyFormatter("bmaptool: %(levelname)s: %(message)s", "%H:%M:%S") + where = logging.StreamHandler(sys.stderr) + where.setFormatter(formatter) + log.addHandler(where) + + +def main(): + """Script entry point.""" + args = parse_arguments() + + if args.quiet: + loglevel = logging.WARNING + elif args.debug: + loglevel = logging.DEBUG + else: + loglevel = logging.INFO + + setup_logger(loglevel) + + if args.quiet and args.debug: + error_out("--quiet and --debug cannot be used together") + + try: + args.func(args) + except MemoryError: + log.error("Out of memory!") + traceback.print_exc() + + log.info("The contents of /proc/meminfo:") + with open('/proc/meminfo', 'rt') as file_obj: + for line in file_obj: + print(line.strip()) + + log.info("The contents of /proc/self/status:") + with open('/proc/self/status', 'rt') as file_obj: + for line in file_obj: + print(line.strip()) + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bmaptools/Filemap.py b/bmaptools/Filemap.py new file mode 100644 index 0000000..3e56798 --- /dev/null +++ b/bmaptools/Filemap.py @@ -0,0 +1,526 @@ +# -*- coding: utf-8 -*- +# vim: ts=4 sw=4 et ai si +# +# Copyright (c) 2012-2014 Intel, Inc. +# License: GPLv2 +# Author: Artem Bityutskiy +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This module implements python implements a way to get file block. Two methods +are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of +the file seek syscall. The former is implemented by the 'FilemapFiemap' class, +the latter is implemented by the 'FilemapSeek' class. Both classes provide the +same API. The 'filemap' function automatically selects which class can be used +and returns an instance of the class. +""" + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import errno +import struct +import array +import fcntl +import tempfile +import logging +from bmaptools import BmapHelpers + +_log = logging.getLogger(__name__) # pylint: disable=C0103 + + +class ErrorNotSupp(Exception): + """ + An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature + is not supported either by the kernel or the file-system. + """ + pass + + +class Error(Exception): + """A class for all the other exceptions raised by this module.""" + pass + + +class _FilemapBase(object): + """ + This is a base class for a couple of other classes in this module. This + class simply performs the common parts of the initialization process: opens + the image file, gets its size, etc. + """ + + def __init__(self, image): + """ + Initialize a class instance. The 'image' argument is full path to the + file or file object to operate on. + """ + + self._f_image_needs_close = False + + if hasattr(image, "fileno"): + self._f_image = image + self._image_path = image.name + else: + self._image_path = image + self._open_image_file() + + try: + self.image_size = os.fstat(self._f_image.fileno()).st_size + except IOError as err: + raise Error("cannot get information about file '%s': %s" + % (self._f_image.name, err)) + + try: + self.block_size = BmapHelpers.get_block_size(self._f_image) + except IOError as err: + raise Error("cannot get block size for '%s': %s" + % (self._image_path, err)) + + self.blocks_cnt = (self.image_size + self.block_size - 1) // self.block_size + + try: + self._f_image.flush() + except IOError as err: + raise Error("cannot flush image file '%s': %s" + % (self._image_path, err)) + + try: + os.fsync(self._f_image.fileno()), + except OSError as err: + raise Error("cannot synchronize image file '%s': %s " + % (self._image_path, err.strerror)) + + _log.debug("opened image \"%s\"" % self._image_path) + _log.debug("block size %d, blocks count %d, image size %d" + % (self.block_size, self.blocks_cnt, self.image_size)) + + def __del__(self): + """The class destructor which just closes the image file.""" + if self._f_image_needs_close: + self._f_image.close() + + def _open_image_file(self): + """Open the image file.""" + try: + self._f_image = open(self._image_path, 'rb') + except IOError as err: + raise Error("cannot open image file '%s': %s" + % (self._image_path, err)) + + self._f_image_needs_close = True + + def block_is_mapped(self, block): # pylint: disable=W0613,R0201 + """ + This method has has to be implemented by child classes. It returns + 'True' if block number 'block' of the image file is mapped and 'False' + otherwise. + """ + + raise Error("the method is not implemented") + + def block_is_unmapped(self, block): # pylint: disable=W0613,R0201 + """ + This method has has to be implemented by child classes. It returns + 'True' if block number 'block' of the image file is not mapped (hole) + and 'False' otherwise. + """ + + raise Error("the method is not implemented") + + def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201 + """ + This method has has to be implemented by child classes. This is a + generator which yields ranges of mapped blocks in the file. The ranges + are tuples of 2 elements: [first, last], where 'first' is the first + mapped block and 'last' is the last mapped block. + + The ranges are yielded for the area of the file of size 'count' blocks, + starting from block 'start'. + """ + + raise Error("the method is not implemented") + + def get_unmapped_ranges(self, start, count): # pylint: disable=W0613,R0201 + """ + This method has has to be implemented by child classes. Just like + 'get_mapped_ranges()', but yields unmapped block ranges instead + (holes). + """ + + raise Error("the method is not implemented") + + +# The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call +_SEEK_DATA = 3 +_SEEK_HOLE = 4 + + +def _lseek(file_obj, offset, whence): + """This is a helper function which invokes 'os.lseek' for file object + 'file_obj' and with specified 'offset' and 'whence'. The 'whence' + argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When + there is no more data or hole starting from 'offset', this function + returns '-1'. Otherwise the data or hole position is returned.""" + + try: + return os.lseek(file_obj.fileno(), offset, whence) + except OSError as err: + # The 'lseek' system call returns the ENXIO if there is no data or + # hole starting from the specified offset. + if err.errno == errno.ENXIO: + return -1 + elif err.errno == errno.EINVAL: + raise ErrorNotSupp("the kernel or file-system does not support " + "\"SEEK_HOLE\" and \"SEEK_DATA\"") + else: + raise + + +class FilemapSeek(_FilemapBase): + """ + This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping. + Unfortunately, the current implementation requires the caller to have write + access to the image file. + """ + + def __init__(self, image): + """Refer the '_FilemapBase' class for the documentation.""" + + # Call the base class constructor first + _FilemapBase.__init__(self, image) + _log.debug("FilemapSeek: initializing") + + self._probe_seek_hole() + + def _probe_seek_hole(self): + """ + Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'. + Unfortunately, there seems to be no clean way for detecting this, + because often the system just fakes them by just assuming that all + files are fully mapped, so 'SEEK_HOLE' always returns EOF and + 'SEEK_DATA' always returns the requested offset. + + I could not invent a better way of detecting the fake 'SEEK_HOLE' + implementation than just to create a temporary file in the same + directory where the image file resides. It would be nice to change this + to something better. + """ + + directory = os.path.dirname(self._image_path) + + try: + tmp_obj = tempfile.TemporaryFile("w+", dir=directory) + except OSError as err: + raise ErrorNotSupp("cannot create a temporary in \"%s\": %s" + % (directory, err)) + + try: + os.ftruncate(tmp_obj.fileno(), self.block_size) + except OSError as err: + raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s" + % (directory, err)) + + offs = _lseek(tmp_obj, 0, _SEEK_HOLE) + if offs != 0: + # We are dealing with the stub 'SEEK_HOLE' implementation which + # always returns EOF. + _log.debug("lseek(0, SEEK_HOLE) returned %d" % offs) + raise ErrorNotSupp("the file-system does not support " + "\"SEEK_HOLE\" and \"SEEK_DATA\" but only " + "provides a stub implementation") + + tmp_obj.close() + + def block_is_mapped(self, block): + """Refer the '_FilemapBase' class for the documentation.""" + offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA) + if offs == -1: + result = False + else: + result = (offs // self.block_size == block) + + _log.debug("FilemapSeek: block_is_mapped(%d) returns %s" + % (block, result)) + return result + + def block_is_unmapped(self, block): + """Refer the '_FilemapBase' class for the documentation.""" + return not self.block_is_mapped(block) + + def _get_ranges(self, start, count, whence1, whence2): + """ + This function implements 'get_mapped_ranges()' and + 'get_unmapped_ranges()' depending on what is passed in the 'whence1' + and 'whence2' arguments. + """ + + assert whence1 != whence2 + end = start * self.block_size + limit = end + count * self.block_size + + while True: + start = _lseek(self._f_image, end, whence1) + if start == -1 or start >= limit or start == self.image_size: + break + + end = _lseek(self._f_image, start, whence2) + if end == -1 or end == self.image_size: + end = self.blocks_cnt * self.block_size + if end > limit: + end = limit + + start_blk = start // self.block_size + end_blk = end // self.block_size - 1 + _log.debug("FilemapSeek: yielding range (%d, %d)" + % (start_blk, end_blk)) + yield (start_blk, end_blk) + + def get_mapped_ranges(self, start, count): + """Refer the '_FilemapBase' class for the documentation.""" + _log.debug("FilemapSeek: get_mapped_ranges(%d, %d(%d))" + % (start, count, start + count - 1)) + return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE) + + def get_unmapped_ranges(self, start, count): + """Refer the '_FilemapBase' class for the documentation.""" + _log.debug("FilemapSeek: get_unmapped_ranges(%d, %d(%d))" + % (start, count, start + count - 1)) + return self._get_ranges(start, count, _SEEK_HOLE, _SEEK_DATA) + + +# Below goes the FIEMAP ioctl implementation, which is not very readable +# because it deals with the rather complex FIEMAP ioctl. To understand the +# code, you need to know the FIEMAP interface, which is documented in the +# "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources. + +# Format string for 'struct fiemap' +_FIEMAP_FORMAT = "=QQLLLL" +# sizeof(struct fiemap) +_FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT) +# Format string for 'struct fiemap_extent' +_FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL" +# sizeof(struct fiemap_extent) +_FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT) +# The FIEMAP ioctl number +_FIEMAP_IOCTL = 0xC020660B +# This FIEMAP ioctl flag which instructs the kernel to sync the file before +# reading the block map +_FIEMAP_FLAG_SYNC = 0x00000001 +# Size of the buffer for 'struct fiemap_extent' elements which will be used +# when invoking the FIEMAP ioctl. The larger is the buffer, the less times the +# FIEMAP ioctl will be invoked. +_FIEMAP_BUFFER_SIZE = 256 * 1024 + + +class FilemapFiemap(_FilemapBase): + """ + This class provides API to the FIEMAP ioctl. Namely, it allows to iterate + over all mapped blocks and over all holes. + + This class synchronizes the image file every time it invokes the FIEMAP + ioctl in order to work-around early FIEMAP implementation kernel bugs. + """ + + def __init__(self, image): + """ + Initialize a class instance. The 'image' argument is full the file + object to operate on. + """ + + # Call the base class constructor first + _FilemapBase.__init__(self, image) + _log.debug("FilemapFiemap: initializing") + + self._buf_size = _FIEMAP_BUFFER_SIZE + + # Calculate how many 'struct fiemap_extent' elements fit the buffer + self._buf_size -= _FIEMAP_SIZE + self._fiemap_extent_cnt = self._buf_size // _FIEMAP_EXTENT_SIZE + assert self._fiemap_extent_cnt > 0 + self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE + self._buf_size += _FIEMAP_SIZE + + # Allocate a mutable buffer for the FIEMAP ioctl + self._buf = array.array('B', [0] * self._buf_size) + + # Check if the FIEMAP ioctl is supported + self.block_is_mapped(0) + + def _invoke_fiemap(self, block, count): + """ + Invoke the FIEMAP ioctl for 'count' blocks of the file starting from + block number 'block'. + + The full result of the operation is stored in 'self._buf' on exit. + Returns the unpacked 'struct fiemap' data structure in form of a python + list (just like 'struct.upack()'). + """ + + if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt): + raise Error("bad block number %d, should be within [0, %d]" + % (block, self.blocks_cnt)) + + # Initialize the 'struct fiemap' part of the buffer. We use the + # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is + # synchronized. The reason for this is that early FIEMAP + # implementations had many bugs related to cached dirty data, and + # synchronizing the file is a necessary work-around. + struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size, + count * self.block_size, _FIEMAP_FLAG_SYNC, 0, + self._fiemap_extent_cnt, 0) + + try: + fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1) + except IOError as err: + # Note, the FIEMAP ioctl is supported by the Linux kernel starting + # from version 2.6.28 (year 2008). + if err.errno == errno.EOPNOTSUPP: + errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \ + "by the file-system" + _log.debug(errstr) + raise ErrorNotSupp(errstr) + if err.errno == errno.ENOTTY: + errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \ + "by the kernel" + _log.debug(errstr) + raise ErrorNotSupp(errstr) + raise Error("the FIEMAP ioctl failed for '%s': %s" + % (self._image_path, err)) + + return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE]) + + def block_is_mapped(self, block): + """Refer the '_FilemapBase' class for the documentation.""" + struct_fiemap = self._invoke_fiemap(block, 1) + + # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field. + # If it contains zero, the block is not mapped, otherwise it is + # mapped. + result = bool(struct_fiemap[3]) + _log.debug("FilemapFiemap: block_is_mapped(%d) returns %s" + % (block, result)) + return result + + def block_is_unmapped(self, block): + """Refer the '_FilemapBase' class for the documentation.""" + return not self.block_is_mapped(block) + + def _unpack_fiemap_extent(self, index): + """ + Unpack a 'struct fiemap_extent' structure object number 'index' from + the internal 'self._buf' buffer. + """ + + offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index + return struct.unpack(_FIEMAP_EXTENT_FORMAT, + self._buf[offset: offset + _FIEMAP_EXTENT_SIZE]) + + def _do_get_mapped_ranges(self, start, count): + """ + Implements most the functionality for the 'get_mapped_ranges()' + generator: invokes the FIEMAP ioctl, walks through the mapped extents + and yields mapped block ranges. However, the ranges may be consecutive + (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges + them. + """ + + block = start + while block < start + count: + struct_fiemap = self._invoke_fiemap(block, count) + + mapped_extents = struct_fiemap[3] + if mapped_extents == 0: + # No more mapped blocks + return + + extent = 0 + while extent < mapped_extents: + fiemap_extent = self._unpack_fiemap_extent(extent) + + # Start of the extent + extent_start = fiemap_extent[0] + # Starting block number of the extent + extent_block = extent_start // self.block_size + # Length of the extent + extent_len = fiemap_extent[2] + # Count of blocks in the extent + extent_count = extent_len // self.block_size + + # Extent length and offset have to be block-aligned + assert extent_start % self.block_size == 0 + assert extent_len % self.block_size == 0 + + if extent_block > start + count - 1: + return + + first = max(extent_block, block) + last = min(extent_block + extent_count, start + count) - 1 + yield (first, last) + + extent += 1 + + block = extent_block + extent_count + + def get_mapped_ranges(self, start, count): + """Refer the '_FilemapBase' class for the documentation.""" + _log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))" + % (start, count, start + count - 1)) + iterator = self._do_get_mapped_ranges(start, count) + first_prev, last_prev = next(iterator) + + for first, last in iterator: + if last_prev == first - 1: + last_prev = last + else: + _log.debug("FilemapFiemap: yielding range (%d, %d)" + % (first_prev, last_prev)) + yield (first_prev, last_prev) + first_prev, last_prev = first, last + + _log.debug("FilemapFiemap: yielding range (%d, %d)" + % (first_prev, last_prev)) + yield (first_prev, last_prev) + + def get_unmapped_ranges(self, start, count): + """Refer the '_FilemapBase' class for the documentation.""" + _log.debug("FilemapFiemap: get_unmapped_ranges(%d, %d(%d))" + % (start, count, start + count - 1)) + hole_first = start + for first, last in self._do_get_mapped_ranges(start, count): + if first > hole_first: + _log.debug("FilemapFiemap: yielding range (%d, %d)" + % (hole_first, first - 1)) + yield (hole_first, first - 1) + + hole_first = last + 1 + + if hole_first < start + count: + _log.debug("FilemapFiemap: yielding range (%d, %d)" + % (hole_first, start + count - 1)) + yield (hole_first, start + count - 1) + + +def filemap(image): + """ + Create and return an instance of a Filemap class - 'FilemapFiemap' or + 'FilemapSeek', depending on what the system we run on supports. If the + FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is + returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the + 'FilemapSeek' class is returned. If none of these are supported, the + function generates an 'Error' type exception. + """ + + try: + return FilemapFiemap(image) + except ErrorNotSupp: + return FilemapSeek(image) diff --git a/bmaptools/TransRead.py b/bmaptools/TransRead.py new file mode 100644 index 0000000..fbf1712 --- /dev/null +++ b/bmaptools/TransRead.py @@ -0,0 +1,611 @@ +# -*- coding: utf-8 -*- +# vim: ts=4 sw=4 et ai si +# +# Copyright (c) 2012-2014 Intel, Inc. +# License: GPLv2 +# Author: Artem Bityutskiy +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This module allows opening and reading local and remote files and decompress +them on-the-fly if needed. Remote files are read using urllib (except of +"ssh://" URLs, which are handled differently). Supported file extentions are: +'bz2', 'gz', 'xz', 'lzo' and a "tar" version of them: 'tar.bz2', 'tbz2', 'tbz', +'tb2', 'tar.gz', 'tgz', 'tar.xz', 'txz', 'tar.lzo', 'tzo', 'tar.lz4', 'tlz4'. +This module uses the following system programs for decompressing: pbzip2, bzip2, +gzip, pigz, xz, lzop, lz4, tar and unzip. +""" + +import os +import io +import errno +import sys +import logging +import threading +import subprocess +from six.moves.urllib import parse as urlparse +from bmaptools import BmapHelpers + +_log = logging.getLogger(__name__) # pylint: disable=C0103 + +# Disable the following pylint errors and recommendations: +# * Instance of X has no member Y (E1101), because it produces +# false-positives for many of 'subprocess' class members, e.g. +# "Instance of 'Popen' has no 'wait' member". +# * Too many instance attributes (R0902) +# * Too many branches (R0912) +# * Too many local variables (R0914) +# * Too many statements (R0915) +# pylint: disable=E1101 +# pylint: disable=R0902 +# pylint: disable=R0912 +# pylint: disable=R0914 +# pylint: disable=R0915 + +# A list of supported compression types +SUPPORTED_COMPRESSION_TYPES = ('bz2', 'gz', 'xz', 'lzo', 'lz4', 'tar.gz', + 'tar.bz2', 'tar.xz', 'tar.lzo', 'tar.lz4', + 'zip') + + +def _fake_seek_forward(file_obj, cur_pos, offset, whence=os.SEEK_SET): + """ + This function implements the 'seek()' method for file object 'file_obj'. + Only seeking forward and is allowed, and 'whence' may be either + 'os.SEEK_SET' or 'os.SEEK_CUR'. + """ + + if whence == os.SEEK_SET: + new_pos = offset + elif whence == os.SEEK_CUR: + new_pos = cur_pos + offset + else: + raise Error("'seek()' method requires the 'whence' argument " + "to be %d or %d, but %d was passed" + % (os.SEEK_SET, os.SEEK_CUR, whence)) + + if new_pos < cur_pos: + raise Error("''seek()' method supports only seeking forward, " + "seeking from %d to %d is not allowed" + % (cur_pos, new_pos)) + + length = new_pos - cur_pos + to_read = length + while to_read > 0: + chunk_size = min(to_read, 1024 * 1024) + buf = file_obj.read(chunk_size) + if not buf: + break + to_read -= len(buf) + + if to_read < 0: + raise Error("seeked too far: %d instead of %d" + % (new_pos - to_read, new_pos)) + + return new_pos - to_read + + +class Error(Exception): + """ + A class for exceptions generated by this module. We currently support only + one type of exceptions, and we basically throw human-readable problem + description in case of errors. + """ + pass + + +def _decode_sshpass_exit_code(code): + """ + A helper function which converts "sshpass" command-line tool's exit code + into a human-readable string. See "man sshpass". + """ + + if code == 1: + result = "invalid command line argument" + elif code == 2: + result = "conflicting arguments given" + elif code == 3: + result = "general run-time error" + elif code == 4: + result = "unrecognized response from ssh (parse error)" + elif code == 5: + result = "invalid/incorrect password" + elif code == 6: + result = "host public key is unknown. sshpass exits without " \ + "confirming the new key" + elif code == 255: + # SSH result =s 255 on any error + result = "ssh error" + else: + result = "unknown" + + return result + + +class TransRead(object): + """ + This class implement the transparent reading functionality. Instances of + this class are file-like objects which you can read and seek only forward. + """ + + def __init__(self, filepath): + """ + Class constructor. The 'filepath' argument is the full path to the file + to read transparently. + """ + + self.name = filepath + # Size of the file (in uncompressed form), may be 'None' if the size is + # unknown + self.size = None + # Type of the compression of the file + self.compression_type = 'none' + # Whether the 'bz2file' PyPI module was found + self.bz2file_found = False + # Whether the file is behind an URL + self.is_url = False + # List of child processes we forked + self._child_processes = [] + # The reader thread + self._rthread = None + # This variable becomes 'True' when the instance of this class is not + # usable any longer. + self._done = False + # There may be a chain of open files, and we save the intermediate file + # objects in the 'self._f_objs' list. The final file object is stored + # in th elast element of the list. + # + # For example, when the path is an URL to a bz2 file, the chain of + # opened file will be: + # o self._f_objs[0] is the urllib2 file-like object + # o self._f_objs[1] is the stdout of the 'bzip2' process + self._f_objs = [] + + self._fake_seek = False + self._pos = 0 + + try: + self._f_objs.append(open(self.name, "rb")) + except IOError as err: + if err.errno == errno.ENOENT: + # This is probably an URL + self._open_url(filepath) + else: + raise Error("cannot open file '%s': %s" % (filepath, err)) + + self._open_compressed_file() + + def __del__(self): + """The class destructor which closes opened files.""" + self._done = True + + for child in self._child_processes: + child.kill() + + if self._rthread: + self._rthread.join() + + for file_obj in self._f_objs: + file_obj.close() + + def _read_thread(self, f_from, f_to): + """ + This function is used when reading compressed files. It runs in a + spearate thread, reads data from the 'f_from' file-like object, and + writes them to the 'f_to' file-like object. 'F_from' may be a urllib + object, while 'f_to' is usually stdin of the decompressor process. + """ + + chunk_size = 1024 * 1024 + while not self._done: + buf = f_from.read(chunk_size) + if not buf: + break + + f_to.write(buf) + + # This will make sure the process decompressor gets EOF and exits, as + # well as ublocks processes waiting on decompressor's stdin. + f_to.close() + + def _open_compressed_file(self): + """ + Detect file compression type and open it with the corresponding + compression module, or just plain 'open() if the file is not + compressed. + """ + + def is_gzip(name): + """Returns 'True' if file 'name' is compressed with 'gzip'.""" + if name.endswith('.gzip') or \ + (name.endswith('.gz') and not name.endswith('.tar.gz')): + return True + return False + + def is_bzip2(name): + """Returns 'True' if file 'name' is compressed with 'bzip2'.""" + if name.endswith('.bz2') and not name.endswith('.tar.bz2'): + return True + return False + + def is_xz(name): + """Returns 'True' if file 'name' is compressed with 'xz'.""" + if name.endswith('.xz') and not name.endswith('.tar.xz'): + return True + return False + + def is_lzop(name): + """Returns 'True' if file 'name' is compressed with 'lzop'.""" + if name.endswith('.lzo') and not name.endswith('.tar.lzo'): + return True + return False + + def is_lz4(name): + """Returns 'True' if file 'name' is compressed with 'lz4'.""" + if name.endswith('.lz4') and not name.endswith('.tar.lz4'): + return True + return False + + def is_tar_gz(name): + """ + Returns 'True' if file 'name' is a tar archive compressed with + 'gzip'. + """ + + if name.endswith('.tar.gz') or name.endswith('.tgz'): + return True + return False + + def is_tar_bz2(name): + """ + Returns 'True' if file 'name' is a tar archive compressed with + 'bzip2'. + """ + + if name.endswith('.tar.bz2') or name.endswith('.tbz') or \ + name.endswith('.tbz2') or name.endswith('.tb2'): + return True + return False + + def is_tar_xz(name): + """ + Returns 'True' if file 'name' is a tar archive compressed with 'xz'. + """ + + if name.endswith('.tar.xz') or name.endswith('.txz'): + return True + return False + + def is_tar_lzo(name): + """ + Returns 'True' if file 'name' is a tar archive compressed with + 'lzop'. + """ + + if name.endswith('.tar.lzo') or name.endswith('.tzo'): + return True + return False + + def is_tar_lz4(name): + """ + Returns 'True' if file 'name' is a tar archive compressed with + 'lz4'. + """ + + if name.endswith('.tar.lz4') or name.endswith('.tlz4'): + return True + return False + + archiver = None + if is_tar_gz(self.name) or is_gzip(self.name): + self.compression_type = 'gzip' + if BmapHelpers.program_is_available("pigz"): + decompressor = "pigz" + else: + decompressor = "gzip" + + if is_gzip(self.name): + args = "-d -c" + else: + archiver = "tar" + args = "-x -z -O" + elif is_tar_bz2(self.name) or is_bzip2(self.name): + self.compression_type = 'bzip2' + if BmapHelpers.program_is_available("pbzip2"): + decompressor = "pbzip2" + else: + decompressor = "bzip2" + + if is_bzip2(self.name): + args = "-d -c" + else: + archiver = "tar" + args = "-x -j -O" + elif is_tar_xz(self.name) or is_xz(self.name): + self.compression_type = 'xz' + decompressor = "xz" + if is_xz(self.name): + args = "-d -c" + else: + archiver = "tar" + args = "-x -J -O" + elif is_tar_lzo(self.name) or is_lzop(self.name): + self.compression_type = 'lzo' + decompressor = "lzop" + if is_lzop(self.name): + args = "-d -c" + else: + archiver = "tar" + args = "-x --lzo -O" + elif self.name.endswith(".zip"): + self.compression_type = 'zip' + decompressor = "funzip" + args = "" + elif is_tar_lz4(self.name) or is_lz4(self.name): + self.compression_type = 'lz4' + decompressor = "lz4" + if is_lz4(self.name): + args = "-d -c" + else: + archiver = "tar" + args = "-x -Ilz4 -O" + else: + if not self.is_url: + self.size = os.fstat(self._f_objs[-1].fileno()).st_size + return + + # Make sure decompressor and the archiver programs are available + if not BmapHelpers.program_is_available(decompressor): + raise Error("the \"%s\" program is not available but it is " + "required decompressing \"%s\"" + % (decompressor, self.name)) + if archiver and not BmapHelpers.program_is_available(archiver): + raise Error("the \"%s\" program is not available but it is " + "required reading \"%s\"" % (archiver, self.name)) + + # Start the decompressor process. We'll send the data to its stdin and + # read the decompressed data from its stdout. + if archiver: + args = archiver + " " + args + else: + args = decompressor + " " + args + + if self.is_url: + child_stdin = subprocess.PIPE + else: + child_stdin = self._f_objs[-1].fileno() + + child_process = subprocess.Popen(args, shell=True, + bufsize=1024 * 1024, + stdin=child_stdin, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + if child_stdin == subprocess.PIPE: + # A separate reader thread is created only when we are reading via + # urllib2. + args = (self._f_objs[-1], child_process.stdin, ) + self._rthread = threading.Thread(target=self._read_thread, args=args) + self._rthread.daemon = True + self._rthread.start() + + self._fake_seek = True + self._f_objs.append(child_process.stdout) + self._child_processes.append(child_process) + + def _open_url_ssh(self, parsed_url): + """ + This function opens a file on a remote host using SSH. The URL has to + have this format: "ssh://username@hostname:path". Currently we only + support password-based authentication. + """ + + username = parsed_url.username + password = parsed_url.password + path = parsed_url.path + hostname = parsed_url.hostname + if username: + hostname = username + "@" + hostname + + # Make sure the ssh client program is installed + if not BmapHelpers.program_is_available("ssh"): + raise Error("the \"ssh\" program is not available but it is " + "required for downloading over the ssh protocol") + + # Prepare the commands that we are going to run + if password: + # In case of password we have to use the sshpass tool to pass the + # password to the ssh client utility + popen_args = ["sshpass", + "-p" + password, + "ssh", + "-o StrictHostKeyChecking=no", + "-o PubkeyAuthentication=no", + "-o PasswordAuthentication=yes", + hostname] + + # Make sure the sshpass program is installed + if not BmapHelpers.program_is_available("ssh"): + raise Error("the \"sshpass\" program is not available but it " + "is required for password-based SSH authentication") + else: + popen_args = ["ssh", + "-o StrictHostKeyChecking=no", + "-o PubkeyAuthentication=yes", + "-o PasswordAuthentication=no", + "-o BatchMode=yes", + hostname] + + # Test if we can successfully connect + child_process = subprocess.Popen(popen_args + ["true"]) + child_process.wait() + retcode = child_process.returncode + if retcode != 0: + decoded = _decode_sshpass_exit_code(retcode) + raise Error("cannot connect to \"%s\": %s (error code %d)" + % (hostname, decoded, retcode)) + + # Test if file exists by running "test -f path && test -r path" on the + # host + command = "test -f " + path + " && test -r " + path + child_process = subprocess.Popen(popen_args + [command], + bufsize=1024 * 1024, + stdout=subprocess.PIPE) + child_process.wait() + if child_process.returncode != 0: + raise Error("\"%s\" on \"%s\" cannot be read: make sure it " + "exists, is a regular file, and you have read " + "permissions" % (path, hostname)) + + # Read the entire file using 'cat' + child_process = subprocess.Popen(popen_args + ["cat " + path], + stdout=subprocess.PIPE) + + # Now the contents of the file should be available from sub-processes + # stdout + self._f_objs.append(child_process.stdout) + + self._child_processes.append(child_process) + self.is_url = True + self._fake_seek = True + + def _open_url(self, url): + """ + Open an URL 'url' and return the file-like object of the opened URL. + """ + + def _print_warning(timeout): + """ + This is a small helper function for printing a warning if we cannot + open the URL for some time. + """ + _log.warning("failed to open the URL with %d sec timeout, is the " + "proxy configured correctly? Keep trying ..." % + timeout) + + import socket + + from six.moves import http_client as httplib + from six.moves.urllib import request as urllib + from six.moves.urllib.error import URLError + + parsed_url = urlparse.urlparse(url) + + if parsed_url.scheme == "ssh": + # Unfortunately, urllib2 does not handle "ssh://" URLs + self._open_url_ssh(parsed_url) + return + + username = parsed_url.username + password = parsed_url.password + + if username and password: + # Unfortunately, in order to handle URLs which contain user name + # and password (e.g., http://user:password@my.site.org), we need to + # do few extra things. + new_url = list(parsed_url) + if parsed_url.port: + new_url[1] = "%s:%s" % (parsed_url.hostname, parsed_url.port) + else: + new_url[1] = parsed_url.hostname + url = urlparse.urlunparse(new_url) + + # Build an URL opener which will do the authentication + password_manager = urllib.HTTPPasswordMgrWithDefaultRealm() + password_manager.add_password(None, url, username, password) + auth_handler = urllib.HTTPBasicAuthHandler(password_manager) + opener = urllib.build_opener(auth_handler) + else: + opener = urllib.build_opener() + + opener.addheaders = [('User-Agent', 'Mozilla/5.0')] + urllib.install_opener(opener) + + # Open the URL. First try with a short timeout, and print a message + # which should supposedly give the a clue that something may be going + # wrong. + # The overall purpose of this is to improve user experience. For + # example, if one tries to open a file but did not setup the proxy + # environment variables propely, there will be a very long delay before + # the failure message. And it is much nicer to pre-warn the user early + # about something possibly being wrong. + for timeout in (10, None): + try: + f_obj = opener.open(url, timeout=timeout) + # Handling the timeout case in Python 2.7 + except socket.timeout as err: + if timeout is not None: + _print_warning(timeout) + else: + raise Error("cannot open URL '%s': %s" % (url, err)) + except URLError as err: + # Handling the timeout case in Python 2.6 + if timeout is not None and \ + isinstance(err.reason, socket.timeout): + _print_warning(timeout) + else: + raise Error("cannot open URL '%s': %s" % (url, err)) + except (IOError, ValueError, httplib.InvalidURL) as err: + raise Error("cannot open URL '%s': %s" % (url, err)) + except httplib.BadStatusLine: + raise Error("cannot open URL '%s': server responds with an " + "HTTP status code that we don't understand" % url) + + self.is_url = True + self._f_objs.append(f_obj) + + def read(self, size=-1): + """ + Read the data from the file or URL and and uncompress it on-the-fly if + necessary. + """ + + if size < 0: + size = 0xFFFFFFFFFFFFFFFF + buf = self._f_objs[-1].read(size) + self._pos += len(buf) + + return buf + + def seek(self, offset, whence=os.SEEK_SET): + """The 'seek()' method, similar to the one file objects have.""" + if self._fake_seek or not hasattr(self._f_objs[-1], "seek"): + self._pos = _fake_seek_forward(self._f_objs[-1], self._pos, + offset, whence) + else: + try: + self._f_objs[-1].seek(offset, whence) + except io.UnsupportedOperation: + self._fake_seek = True + self._pos = _fake_seek_forward(self._f_objs[-1], self._pos, + offset, whence) + + def tell(self): + """The 'tell()' method, similar to the one file objects have.""" + if self._fake_seek or not hasattr(self._f_objs[-1], "tell"): + return self._pos + else: + return self._f_objs[-1].tell() + + def close(self): + """Close the file-like object.""" + self.__del__() + + def __getattr__(self, name): + """ + If we are backed by a local uncompressed file, then fall-back to using + its operations. + """ + + if self.compression_type == 'none' and not self.is_url: + return getattr(self._f_objs[-1], name) + else: + raise AttributeError diff --git a/bmaptools/__init__.py b/bmaptools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/contrib/bmap_write.py b/contrib/bmap_write.py new file mode 100755 index 0000000..41ba04c --- /dev/null +++ b/contrib/bmap_write.py @@ -0,0 +1,70 @@ +#!/usr/bin/python + +# A super-simple standalone script (works with both Python2 / Python3 and has +# no external dependencies) to show how easily .bmap files can be parsed. +# (Also demonstrates how little code it takes - which might be a useful starting +# point for other languages) +# +# This is effectively a minimal version of 'bmaptool copy'. It only supports +# uncompressed images, it does no verification, and if the image is named +# mydata.img it assumes the corresponding bmap is named mydata.bmap + +# Copyright (C) 2018 Andrew Scheller +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import sys +import xml.etree.ElementTree as ET +import re +import os + +if len(sys.argv) != 3: + print("Usage: %s " % os.path.basename(sys.argv[0])) + sys.exit(1) +raw_file = sys.argv[1] +output_file = sys.argv[2] +if not os.path.isfile(raw_file): + print("raw-file '%s' doesn't exist" % raw_file) + sys.exit(1) +file_root, file_ext = os.path.splitext(raw_file) +bmap_file = file_root + '.bmap' +if not os.path.isfile(bmap_file): + print("bmap-file '%s' doesn't exist" % bmap_file) + sys.exit(1) + +bmap_root = ET.parse(bmap_file).getroot() +blocksize = int(bmap_root.find('BlockSize').text) +with open(raw_file, 'rb') as filedata: + with open(output_file, 'wb') as outdata: + try: + outdata.truncate(int(bmap_root.find('ImageSize').text)) # optional + except: + pass + for bmap_range in bmap_root.find('BlockMap').findall('Range'): + blockrange = bmap_range.text + m = re.match('^\s*(\d+)\s*-\s*(\d+)\s*$', blockrange) + if m: + start = int(m.group(1)) + end = int(m.group(2)) + else: + start = int(blockrange) + end = start + start_offset = start * blocksize + filedata.seek(start_offset, 0) + outdata.seek(start_offset, 0) + for i in range(end - start + 1): + outdata.write(filedata.read(blocksize)) + outdata.flush() + os.fsync(outdata.fileno()) diff --git a/debian/bmap-tools.docs b/debian/bmap-tools.docs new file mode 100644 index 0000000..d608dd0 --- /dev/null +++ b/debian/bmap-tools.docs @@ -0,0 +1 @@ +docs/RELEASE_NOTES diff --git a/debian/bmap-tools.install b/debian/bmap-tools.install new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/debian/bmap-tools.install @@ -0,0 +1 @@ + diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..8ccd270 --- /dev/null +++ b/debian/changelog @@ -0,0 +1,207 @@ +bmap-tools (3.5) unstable; urgency=low + + * Fixed copying of compressed files from URLs + * Python 3.x support fixes and improvements. + + -- Artem Bityutskiy Thu, 23 Aug 2018 10:34:31 +0300 + +bmap-tools (3.4) unstable; urgency=low + + * New homepage: https://github.com/01org/bmap-tools + * Python 3.x support. + * bmaptool can now be shipped as standalone application. + * Added support for ZIP archives. + * Added support for LZ4 archives. + * Fixed bugs related to specific filesystems. + + -- Alexander Kanevskiy Thu, 31 Aug 2017 15:40:12 +0300 + +bmap-tools (3.2) unstable; urgency=low + + * Add support for LZO and archives ('.lzo' and '.tar.lzo'). + * Add support for multi-stream bzip2 archives (creted with "pbzip2"). + * Support tmpfs by using the SEEK_HOLE method instead of FIEMAP. + * Use external tools like 'gzip' and 'bzip2' for decompressing, instead of + using internal python libraries. + + -- Artem Bityutskiy Wed, 19 Feb 2014 16:50:12 +0200 + +bmap-tools (3.2~rc2) unstable; urgency=low + + * Bump the version number to 3.2~rc2. + + -- Artem Bityutskiy Fri, 31 Jan 2014 12:54:42 +0200 + +bmap-tools (3.1) unstable; urgency=low + + * Change bmap format version from 1.4 to 2.0, because there are incompatible + changes in 1.4 comparing to 1.3, so the right version number is 2.0 + * Add backward and forward bmap format compatibility unit-tests + + -- Artem Bityutskiy Thu, 07 Nov 2013 17:26:57 +0200 + +bmap-tools (3.0) unstable; urgency=low + + * Switch from using SHA1 for checksumming to SHA256. + * Start supporting OpenPGP signatures. Both detached and clearsign signatures + are supported. + * Always sync the image file before creating the bmap for it, to work-around + kernel bugs in early FIEMAP implementations. + + -- Artem Bityutskiy Wed, 02 Oct 2013 09:30:22 +0300 + +bmap-tools (2.6) unstable; urgency=low + + * Add support for on-the-fly decompression of '.xz' and '.tar.xz' files. + + -- Artem Bityutskiy Tue, 13 Aug 2013 14:53:49 +0300 + +bmap-tools (2.5) unstable; urgency=low + + * Do not fail when lacking permisssions for accessing block device's sysfs + files. + * Improve debian packaging. + + -- Artem Bityutskiy Mon, 05 Aug 2013 10:05:09 +0300 + +bmap-tools (2.4) unstable; urgency=low + + * Add support for ssh:// URLs. + + -- Artem Bityutskiy Wed, 05 Jun 2013 18:15:41 +0300 + +bmap-tools (2.3) unstable; urgency=low + + * Add bmap file SHA1 verification, make tests work on btrfs. + + -- Artem Bityutskiy Mon, 06 May 2013 10:58:32 +0300 + +bmap-tools (2.2) unstable; urgency=low + + * Support username and password in URLs. + + -- Artem Bityutskiy Mon, 11 Mar 2013 14:40:17 +0200 + +bmap-tools (2.1) unstable; urgency=low + + * Fix out of memory issues when copying .bz2 files. + + -- Artem Bityutskiy Mon, 18 Feb 2013 16:38:32 +0200 + +bmap-tools (2.0) unstable; urgency=low + + * Fix the an issue with running out of memory in TransRead.py. + + -- Artem Bityutskiy Thu, 17 Jan 2013 11:33:15 +0200 + +bmap-tools (2.0~rc5) unstable; urgency=low + + * When block device optimzations fail - raise an exception except of muting + the error, because we really want to know about these failures and possibly + fix them. + + -- Artem Bityutskiy Tue, 15 Jan 2013 14:51:27 +0200 + +bmap-tools (2.0~rc4) unstable; urgency=low + + * Fix bmap autodiscovery. + + -- Artem Bityutskiy Thu, 10 Jan 2013 13:58:07 +0200 + +bmap-tools (2.0~rc3) unstable; urgency=low + + * Fix uncaught urllib2 exception bug introduced in rc1. + + -- Artem Bityutskiy Mon, 07 Jan 2013 10:19:49 +0200 + +bmap-tools (2.0~rc2) unstable; urgency=low + + * Fix writing to block devices, which was broken in rc1. + * Make the informational messages a bit nicer. + + -- Artem Bityutskiy Fri, 04 Jan 2013 09:52:41 +0200 + +bmap-tools (2.0~rc1) unstable; urgency=low + + * Allow copying without bmap only if --nobmap was specified. + * Auto-discover the bmap file. + * Support reading from URLs. + * Implement progress bar. + * Highlight error and warning messages with red and yellow labels. + + -- Artem Bityutskiy Thu, 20 Dec 2012 10:47:00 +0200 + +bmap-tools (1.0) unstable; urgency=low + + * Release version 1.0 of the tools - almost identical to 1.0~rc7 except of few + minor differences like spelling fixes. + + -- Artem Bityutskiy Mon, 03 Dec 2012 10:00:33 +0200 + +bmap-tools (1.0~rc7) unstable; urgency=low + + * Add a Fiemap.py module which implements python API to the linux FIEMAP ioct. + * Use the FIEMAP ioctl properly and optimally. + * Add unit-tests, current test coverage is 66%. + * A lot of core rerafactoring. + * Several bug fixes in 'BmapCopy' (e.g., .tar.gz format support was broken). + * Add README and RELEASE_NOTES files. + + -- Artem Bityutskiy Thu, 29 Nov 2012 12:29:39 +0200 + +bmap-tools (0.6) unstable; urgency=low + + * Improve the base API test to cover the case when there is no bmap. + * Fix a bug when copying without bmap. + + -- Artem Bityutskiy Wed, 21 Nov 2012 16:43:49 +0200 + +bmap-tools (0.5) unstable; urgency=low + + * Fix handling of bmap files which contain ranges with only one block. + * Restore the block device settings which we change on exit. + * Change block device settings correctly for partitions. + * Rework API modules to accept file-like objects, not only paths. + * Fix and silence pylint warnings. + * Implement the base API test-case. + + -- Artem Bityutskiy Tue, 20 Nov 2012 15:40:30 +0200 + +bmap-tools (0.4) unstable; urgency=low + + * Improved compressed images flashing speed by exploiting multiple threads: + now we read/decompress the image in one thread and write it in a different + thread. + + -- Artem Bityutskiy Wed, 14 Nov 2012 12:35:06 +0200 + +bmap-tools (0.3) unstable; urgency=low + + * Fix flashing speed calculations + * Fix the Ctrl-C freeze issue - now we synchronize the block device + periodically so if a Ctrl-C interruption happens, we terminate withen few + seconds. + + -- Artem Bityutskiy Tue, 13 Nov 2012 10:56:11 +0200 + +bmap-tools (0.2) unstable; urgency=low + + * Release 0.2 - mostly internal code re-structuring and renamings, + not much functional changes. + * The 'bmap-flasher' and 'bmap-creator' tools do not exist anymore. Now + we have 'bmaptool' which supports 'copy' and 'create' sub-commands instead. + * The BmapFlasher module was also re-named to BmapCopy. + + -- Artem Bityutskiy Fri, 09 Nov 2012 12:20:37 +0200 + +bmap-tools (0.1.1) unstable; urgency=low + + * Release 0.1.1 - a lot of fixes and speed improvements. + + -- Artem Bityutskiy Wed, 07 Nov 2012 11:36:29 +0200 + +bmap-tools (0.1.0) unstable; urgency=low + + * Initial release. + + -- Ed Bartosh Sun, 27 Oct 2012 22:31:28 +0300 diff --git a/debian/compat b/debian/compat new file mode 100644 index 0000000..ec63514 --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +9 diff --git a/debian/control b/debian/control new file mode 100644 index 0000000..9de85fa --- /dev/null +++ b/debian/control @@ -0,0 +1,31 @@ +Source: bmap-tools +Maintainer: Artem Bityutskiy +Section: utils +Priority: optional +Build-Depends: debhelper (>= 9), + python-all (>= 2.7), + python-setuptools, +Standards-Version: 3.8.4 +XS-Python-Version: >= 2.7 + +Package: bmap-tools +Architecture: all +Depends: python (>=2.7), + python-gpgme, + ${misc:Depends}, + ${python:Depends}, + bzip2, + pbzip2, + gzip, + pigz, + lzop, + liblz4-tool, + xz-utils, + tar, + unzip +Description: Tools to generate block map (AKA bmap) and flash images using + bmap. Bmaptool is a generic tool for creating the block map (bmap) for a file, + and copying files using the block map. The idea is that large file containing + unused blocks, like raw system image files, can be copied or flashed a lot + faster with bmaptool than with traditional tools like "dd" or "cp". See + source.tizen.org/documentation/reference/bmaptool for more information. diff --git a/debian/copyright b/debian/copyright new file mode 100644 index 0000000..ce6fbb5 --- /dev/null +++ b/debian/copyright @@ -0,0 +1,28 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-name: bmap-tools +Upstream-Contact: Artem Bityutskiy +Source: https://github.com/01org/bmap-tools + . + The initial package was put together by Ed Bartosh + on Sun Oct 27 22:32:19 EEST 2012. + +Files: * +Copyright: © 2012-2013 Intel, Inc. +License: GPL-2 + +Files: debian/* +Copyright: © 2012-2013 Intel, Inc. +License: GPL-2 + +License: GPL-2 + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + . + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. +Comment: + On Debian systems, the full text of the GPL v2 can be found + in /usr/share/common-licenses/GPL-2. diff --git a/debian/manpages b/debian/manpages new file mode 100644 index 0000000..8a9b6db --- /dev/null +++ b/debian/manpages @@ -0,0 +1 @@ +docs/man1/bmaptool.1 diff --git a/debian/rules b/debian/rules new file mode 100755 index 0000000..9c13336 --- /dev/null +++ b/debian/rules @@ -0,0 +1,4 @@ +#!/usr/bin/make -f + +%: + dh $@ --with=python2 diff --git a/docs/README b/docs/README new file mode 100644 index 0000000..e1fb7fa --- /dev/null +++ b/docs/README @@ -0,0 +1,341 @@ +Summary +~~~~~~~ + +The bmap-tools project implements bmap-related tools and API modules. The +entire project is written in python and supports python 2.7 and python 3.x. + +The project author and maintainer is Artem Bityutskiy . +Please, feel free to contact me if you have questions. + +Project git repository is here: +https://github.com/01org/bmap-tools.git + + +Introduction +~~~~~~~~~~~~ + +Bmaptool is a generic tool for creating the block map (bmap) for a file and +copying files using the block map. The idea is that large files, like raw +system image files, can be copied or flashed a lot faster and more reliably +with bmaptool than with traditional tools, like "dd" or "cp". + +Bmaptool was originally created for the "Tizen IVI" project and it was used for +flashing system images to USB sticks and other block devices. Bmaptool can also +be used for general image flashing purposes, for example, flashing Fedora Linux +OS distribution images to USB sticks. + +Originally Tizen IVI images had been flashed using the "dd" tool, but bmaptool +brought a number of advantages. + +* Faster. Depending on various factors, like write speed, image size, how full + is the image, and so on, bmaptool was 5-7 times faster than "dd" in the Tizen + IVI project. +* Integrity. Bmaptool verifies data integrity while flashing, which means that + possible data corruptions will be noticed immediately. +* Usability. Bmaptool can read images directly from the remote server, so users + do not have to download images and save them locally. +* Protects user's data. Unlike "dd", if you make a mistake and specify a wrong + block device name, bmaptool will less likely destroy your data because it has + protection mechanisms which, for example, prevent bmaptool from writing to a + mounted block device. + + +Usage +~~~~~ + +Bmaptool supports 2 subcommands: +* "copy" - copy a file to another file using bmap or flash an image to a block + device +* "create" - create a bmap for a file + +You can get usage reference for bmaptool and all the supported command using +the "-h" or "--help" options: + +$ bmaptool -h # General bmaptool help +$ bmaptool cmd -h # Help on the "cmd" sub-command + +You can also refer to the bmaptool manual page: +$ man bmaptool + + +Concept +~~~~~~~ + +This section provides general information about the block map (bmap) necessary +for understanding how bmaptool works. The structure of the section is: + +* "Sparse files" - the bmap ideas are based on sparse files, so it is important + to understand what sparse files are. +* "The block map" - explains what bmap is. +* "Raw images" - the main usage scenario for bmaptool is flashing raw images, + which this section discusses. +* "Usage scenarios" - describes various possible bmap and bmaptool usage + scenarios. + +Sparse files + +One of the main roles of a filesystem, generally speaking, is to map blocks of +file data to disk sectors. Different file-systems do this mapping differently, +and filesystem performance largely depends on how well the filesystem can do +the mapping. The filesystem block size is usually 4KiB, but may also be 8KiB or +larger. + +Obviously, to implement the mapping, the file-system has to maintain some kind +of on-disk index. For any file on the file-system, and any offset within the +file, the index allows you to find the corresponding disk sector, which stores +the file's data. Whenever we write to a file, the filesystem looks up the index +and writes to the corresponding disk sectors. Sometimes the filesystem has to +allocate new disk sectors and update the index (such as when appending data to +the file). The filesystem index is sometimes referred to as the "filesystem +metadata". + +What happens if a file area is not mapped to any disk sectors? Is this +possible? The answer is yes. It is possible and these unmapped areas are often +called "holes". And those files which have holes are often called "sparse +files". + +All reasonable file-systems like Linux ext[234], btrfs, XFS, or Solaris XFS, +and even Windows' NTFS, support sparse files. Old and less reasonable +filesystems, like FAT, do not support holes. + +Reading holes returns zeroes. Writing to a hole causes the filesystem to +allocate disk sectors for the corresponding blocks. Here is how you can create +a 4GiB file with all blocks unmapped, which means that the file consists of a +huge 4GiB hole: + +$ truncate -s4G image.raw +$ stat image.raw + File: image.raw + Size: 4294967296 Blocks: 0 IO Block: 4096 regular file + +Notice that "image.raw" is a 4GiB file, which occupies 0 blocks on the disk! +So, the entire file's contents are not mapped anywhere. Reading this file would +result in reading 4GiB of zeroes. If you write to the middle of the image.raw +file, you'll end up with 2 holes and a mapped area in the middle. + +Therefore: +* Sparse files are files with holes. +* Sparse files help save disk space, because, roughly speaking, holes do not + occupy disk space. +* A hole is an unmapped area of a file, meaning that it is not mapped anywhere + on the disk. +* Reading data from a hole returns zeroes. +* Writing data to a hole destroys it by forcing the filesystem to map + corresponding file areas to disk sectors. +* Filesystems usually operate with blocks, so sizes and offsets of holes are + aligned to the block boundary. + +It is also useful to know that you should work with sparse files carefully. It +is easy to accidentally expand a sparse file, that is, to map all holes to +zero-filled disk areas. For example, "scp" always expands sparse files, the +"tar" and "rsync" tools do the same, by default, unless you use the "--sparse" +option. Compressing and then decompressing a sparse file usually expands it. + +There are 2 ioctl's in Linux which allow you to find mapped and unmapped areas: +"FIBMAP" and "FIEMAP". The former is very old and is probably supported by all +Linux systems, but it is rather limited and requires root privileges. The +latter is a lot more advanced and does not require root privileges, but it is +relatively new (added in Linux kernel, version 2.6.28). + +Recent versions of the Linux kernel (starting from 3.1) also support the +"SEEK_HOLE" and "SEEK_DATA" values for the "whence" argument of the standard +"lseek()" system call. They allow positioning to the next hole and the next +mapped area of the file. + +Advanced Linux filesystems, in modern kernels, also allow "punching holes", +meaning that it is possible to unmap any aligned area and turn it into a hole. +This is implemented using the "FALLOC_FL_PUNCH_HOLE" "mode" of the +"fallocate()" system call. + +The bmap + +The bmap is an XML file, which contains a list of mapped areas, plus some +additional information about the file it was created for, for example: +* SHA256 checksum of the bmap file itself +* SHA256 checksum of the mapped areas +* the original file size +* amount of mapped data + +The bmap file is designed to be both easily machine-readable and +human-readable. All the machine-readable information is provided by XML tags. +The human-oriented information is in XML comments, which explain the meaning of +XML tags and provide useful information like amount of mapped data in percent +and in MiB or GiB. + +So, the best way to understand bmap is to just to read it. Here is an example +of a Tizen IVI 2.0 alpha snapshot bmap file. The vast amount of block ranges +have been removed, though, to keep it shorter. + +Raw images + +Raw images are the simplest type of system images which may be flashed to the +target block device, block-by-block, without any further processing. Raw images +just "mirror" the target block device: they usually start with the MBR sector. +There is a partition table at the beginning of the image and one or more +partitions containing filesystems, like ext4. Usually, no special tools are +required to flash a raw image to the target block device. The standard "dd" +command can do the job: + +$ dd if=tizen-ivi-image.raw of=/dev/usb_stick + +At first glance, raw images do not look very appealing because they are large +and it takes a lot of time to flash them. However, with bmap, raw images become +a much more attractive type of image. We will demonstrate this, using Tizen IVI +as an example. + +The Tizen IVI project uses raw images which take 3.7GiB in Tizen IVI 2.0 alpha. +The images are created by the MIC tool. Here is a brief description of how MIC +creates them: + +* create a 3.7GiB sparse file, which will become the Tizen IVI image in the end +* partition the file using the "parted" tool +* format the partitions using the "mkfs.ext4" tool +* loop-back mount all the partitions +* install all the required packages to the partitions: copy all the needed + files and do all the tweaks +* unmount all loop-back-mounted image partitions, the image is ready +* generate the block map file for the image +* compress the image using "bzip2", turning them into a small file, around + 300MiB + +The Tizen IVI raw images are initially sparse files. All the mapped blocks +represent useful data and all the holes represent unused regions, which +"contain" zeroes and do not have to be copied when flashing the image. Although +information about holes is lost once the image gets compressed, the bmap file +still has it and it can be used to reconstruct the uncompressed image or to +flash the image quickly, by copying only the mapped regions. + +Raw images compress extremely well because the holes are essentially zeroes, +which compress perfectly. This is why 3.7GiB Tizen IVI raw images, which +contain about 1.1GiB of mapped blocks, take only 300MiB in a compressed form. +And the important point is that you need to decompress them only while +flashing. The bmaptool does this "on-the-fly". + +Therefore: +* raw images are distributed in a compressed form, and they are almost as small + as a tarball (that includes all the data the image would take) +* the bmap file and the bmaptool make it possible to quickly flash the + compressed raw image to the target block device +* optionally, the bmaptool can reconstruct the original uncompressed sparse raw + image file + +And, what is even more important, is that flashing raw images is extremely fast +because you write directly to the block device, and write sequentially. + +Another great thing about raw images is that they may be 100% ready-to-go and +all you need to do is to put the image on your device "as-is". You do not have +to know the image format, which partitions and filesystems it contains, etc. +This is simple and robust. + +Usage scenarios + +Flashing or copying large images is the main bmaptool use case. The idea is +that if you have a raw image file and its bmap, you can flash it to a device by +writing only the mapped blocks and skipping the unmapped blocks. + +What this basically means is that with bmap it is not necessary to try to +minimize the raw image size by making the partitions small, which would require +resizing them. The image can contain huge multi-gigabyte partitions, just like +the target device requires. The image will then be a huge sparse file, with +little mapped data. And because unmapped areas "contain" zeroes, the huge image +will compress extremely well, so the huge image will be very small in +compressed form. It can then be distributed in compressed form, and flashed +very quickly with bmaptool and the bmap file, because bmaptool will decompress +the image on-the-fly and write only mapped areas. + +The additional benefit of using bmap for flashing is the checksum verification. +Indeed, the "bmaptool create" command generates SHA256 checksums for all mapped +block ranges, and the "bmaptool copy" command verifies the checksums while +writing. Integrity of the bmap file itself is also protected by a SHA256 +checksum and bmaptool verifies it before starting flashing. + +On top of this, the bmap file can be signed using OpenPGP (gpg) and bmaptool +automatically verifies the signature if it is present. This allows for +verifying the bmap file integrity and authoring. And since the bmap file +contains SHA256 checksums for all the mapped image data, the bmap file +signature verification should be enough to guarantee integrity and authoring of +the image file. + +The second usage scenario is reconstructing sparse files Generally speaking, if +you had a sparse file but then expanded it, there is no way to reconstruct it. +In some cases, something like + +$ cp --sparse=always expanded.file reconstructed.file + +would be enough. However, a file reconstructed this way will not necessarily be +the same as the original sparse file. The original sparse file could have +contained mapped blocks filled with all zeroes (not holes), and, in the +reconstructed file, these blocks will become holes. In some cases, this does +not matter. For example, if you just want to save disk space. However, for raw +images, flashing it does matter, because it is essential to write zero-filled +blocks and not skip them. Indeed, if you do not write the zero-filled block to +corresponding disk sectors which, presumably, contain garbage, you end up with +garbage in those blocks. In other words, when we are talking about flashing raw +images, the difference between zero-filled blocks and holes in the original +image is essential because zero-filled blocks are the required blocks which are +expected to contain zeroes, while holes are just unneeded blocks with no +expectations regarding the contents. + +Bmaptool may be helpful for reconstructing sparse files properly. Before the +sparse file is expanded, you should generate its bmap (for example, by using +the "bmaptool create" command). Then you may compress your file or, otherwise, +expand it. Later on, you may reconstruct it using the "bmaptool copy" command. + + +Project structure +~~~~~~~~~~~~~~~~~ + +-------------------------------------------------------------------------------- +| - bmaptool | A tools to create bmap and copy with bmap. Based | +| | on the 'BmapCreate.py' and 'BmapCopy.py' modules. | +| - setup.py | A script to turn the entire bmap-tools project | +| | into a python egg. | +| - setup.cfg | contains a piece of nose tests configuration | +| - .coveragerc | lists files to include into test coverage report | +| - TODO | Just a list of things to be done for the project. | +| - make_a_release.sh | Most people may ignore this script. It is used by | +| | maintainer when creating a new release. | +| - tests/ | Contains the project unit-tests. | +| | - test_api_base.py | Tests the base API modules: 'BmapCreate.py' and | +| | | 'BmapCopy.py'. | +| | - test_filemap.py | Tests the 'Filemap.py' module. | +| | - test_compat.py | Tests that new BmapCopy implementations support old | +| | | bmap formats, and old BmapCopy implementations | +| | | support new compatible bmap fomrats. | +| | - helpers.py | Helper functions shared between the unit-tests. | +| | - test-data/ | Data files for the unit-tests | +| | - oldcodebase/ | Copies of old BmapCopy implementations for bmap | +| | | format forward-compatibility verification. | +| - bmaptools/ | The API modules which implement all the bmap | +| | | functionality. | +| | - BmapCreate.py | Creates a bmap for a given file. | +| | - BmapCopy.py | Implements copying of an image using its bmap. | +| | - Filemap.py | Allows for reading files' block map. | +| | - BmapHelpers.py | Just helper functions used all over the project. | +| | - TransRead.py | Provides a transparent way to read various kind of | +| | | files (compressed, etc) | +| - debian/* | Debian packaging for the project. | +| - doc/* | Project documentation. | +| - packaging/* | RPM packaging (Fedora & OpenSuse) for the project. | +| - contrib/* | Various contributions that may be useful, but | +| | project maintainers do not really test or maintain. | +-------------------------------------------------------------------------------- + +How to run unit tests +~~~~~~~~~~~~~~~~~~~~~ + +Just install the 'nose' python test framework and run the 'nosetests' command in +the project root directory. If you want to see tests coverage report, run +'nosetests --with-coverage'. + +Credits +~~~~~~~ + +* Ed Bartosh for helping me with learning python + (this is my first python project) and working with the Tizen IVI + infrastructure. Ed also implemented the packaging. +* Alexander Kanevskiy and + Kevin Wang for helping with integrating this stuff + to the Tizen IVI infrastructure. +* Simon McVittie for improving Debian + packaging and fixing bmaptool. diff --git a/docs/RELEASE_NOTES b/docs/RELEASE_NOTES new file mode 100644 index 0000000..51af187 --- /dev/null +++ b/docs/RELEASE_NOTES @@ -0,0 +1,289 @@ +Release 3.5 +~~~~~~~~~~~ + +1. Fixed copying of compressed files from URLs, it was a regression introduced + in bmap-tools 3.4. +2. Python 3.x support fixes and improvements. +3. RPM packaging fixes. +4. Improved help and error messages. + +Release 3.4 +~~~~~~~~~~~ + +1. bmap-tools has now new home: https://github.com/01org/bmap-tools + +2. Python 3.x support: bmap-tools now compatible with Python 3.3+ + +3. bmaptool now can be shipped as standalone application. + See PEP441 (zipapp) for implementation details. + +4. ZIP archives now supported. Similar to tar.* archives, image must be + first file in archive. + +5. LZ4 compression now supported. Files with the following extensions are + recognized as LZ4-compressed: ".lz4", ".tar.lz4" and ".tlz4". + +6. Fixed copying images on XFS file system where predictive caching lead + to more blocks to be mapped than needed. + +7. Fixed detection of block size on file systems that do not report it + correctly via ioctl FIGETBSZ. + +Release 3.2 +~~~~~~~~~~~ + +1. Multi-stream bzip2 archives are now supported. These are usually created + with the 'pbzip2' compressor. + +2. LZO archives are now supported too. Files with the following extensions are + recognized as LZO-compressed: ".lzo", ".tar.lzo", ".tzo". + +3. Make 'bmaptool create' (and hence, the BmapCreate module) work with the + "tmpfs" file-system. Tmpfs does not, unfortunately, support the "FIEMAP" + ioctl, but it supports the "SEEK_HOLE" option of the "lseek" system call, + which is now used for finding where the holes are. However, this works only + with Linux kernels of version 3.8 or higher. + + Generally, "FIEMAP" is faster than "SEEK_HOLE" for large files, so we always + try to start with using FIEMAP, and if it is not supported, we fall-back to + using "SEEK_HOLE". Therefore, the "Fiemap" module was re-named to "Filemap", + since it is now supports more than just the FIEMAP ioctl. + + Unfortunately, our "SEEK_HOLE" method requires the directory where the image + resides to be accessible for writing, because in current implementation we + need to create a temporary file there for a short time. The temporary file + is used to detect whether tmpfs really supports SEEK_HOLE, or the system + just fakes it by always returning EOF (this is what happens in pre-3.8 + kernels). + +4. Decompression should now require less memory, which should fix + out-of-memory problems reported by some users recently. Namely, users + reported that decompressing large bz2-compressed sparse files caused + out-of-memory situation on machines with 2GB RAM. This should be fixed now. + +5. Reading and decompressing is now faster because we now use more parallelism: + reading the data form the source URL is done in separate thread, + decompressing happens in a separate process too. My measurement with Tizen + IVI images from 'tizen.org' showed 10% read speed improvement, but this + depends a lot on where the bottle-neck is: the USB stick, the network, or + the CPU load. + +Bug-fix release 3.1 +~~~~~~~~~~~~~~~~~~~ + +This bug-fix release is about fixing a small screw-up in version 3.0, where we +introduced incompatible bmap format changes, but did not properly increase the +bmap format version number. Instead of making it to be version 2.0, we made it +to be version 1.4. The result is that bmap-tools v2.x crash with those +1.4-formatted bmap files. + +This release changes the bmap format version from 1.4 to 2.0 in order to +lessen the versioning screw-up. Increased major bmap format version number will +make sure that older bmap-tools fail with a readable error message, instead of +crashing. + +Thus, the situation as follows: + * bmap-tools v2.x: handle bmap format versions 1.0-1.3, crash with 1.4, and + nicely exit with 2.0 + * bmap-tools v3.0: handles all 1.x bmap format versions, exits nicely with 2.0 + * bmap-tools v3.1: handles all bmap format versions + +Release 3.0 +~~~~~~~~~~~ + +1. Switch from using SHA1 checksums in the bmap file to SHA256. This required + bmap format change. The new format version is 1.4. BmapCopy (and thus, + bmaptool supports all the older versions too). Now it is possible to use any + hash functions for checksumming, not only SHA256, but SHA256 is the default + for BmapCreate. + +2. Support OpenPGP (AKA gpg) signatures for the bmap file. From now on the bmap + file can be signed with gpg, in which case bmaptool verifies the bmap file + signature. If the signature is bad, bmaptool exits with an error message. + The verification can be disabled with the --no-sig-verify option. + + Both detached and "in-band" clearsign signatures are supported. Bmaptool + automatically discovers detached signatures by checking ".sig" and ".asc" + files. + +3. The Fiemap module (and thus, bmaptool) now always synchronizes the image + before scanning it for mapped areas. This is done by using the + "FIEMAP_FLAG_SYNC" flag of the FIEMAP ioctl. + + The reason for synchronizing the file is bugs in early implementations of + FIEMAP in the kernel and file-systems, and synchronizing the image is a + known way to work around the bugs. + +Bug-fix release 2.6 +~~~~~~~~~~~~~~~~~~~ + +1. Add on-the-fly decompression support for '.xz' and '.tar.xz' files. + +Bug-fix release 2.5 +~~~~~~~~~~~~~~~~~~~ + +1. bmaptool (or more precisely, the BmapCopy class) has an optimization where + we switch to the "noop" I/O scheduler when writing directly to block + devices. We also lessen the allowed amount of dirty data for this block + device in order to create less memory pressure on the system. These tweaks + are done by touching the corresponding sysfs files of the block device. The + old bmaptool behavior was that it failed when it could not modify these + files. However, there are systems where users can write to some block + devices (USB sticks, for example), but they do not have permissions to + change the sysfs files, and bmaptool did not work for normal users on such + systems. In version 2.5 we change the behavior and do not fail anymore if we + do not have enough permissions for changing sysfs files, simply because this + is an optimization, although a quite important one. However, we do print a + warning message. + +2. Many improvements and fixes in the Debian packaging, which should make it + simpler for distributions to package bmap-tools. + +Bug-fix release 2.4 +~~~~~~~~~~~~~~~~~~~ + +1. Add SSH URLs support. These URLs start with "ssh://" and have the following + format: ssh://user:password@host:path, where + * user - user name (optional) + * password - the password (optional) + * host - hostname + * path - path to the image file on the remote host + + If the password was given in the URL, bmaptool will use password-based SSH + authentication, otherwise key-based SSH authentication will be used. + +Bug-fix release 2.3 +~~~~~~~~~~~~~~~~~~~ + +1. Add bmap file SHA1 checksum into the bmap file itself in order to improve + robustness of bmaptool. Now we verify bmap file integrity before using it, + and if it is corrupted or incomplete, we should be able to detect this. + + The reason for this change was a bug report from a user who somehow ended + up with a corrupted bmap file and experienced weird issues. + + This also means that manual changes the bmap file will end up with a SHA1 + mismatch failure. In order to prevent the failure, one has to update the bmap + file's SHA1 by putting all ASCII "0" symbols (should be 40 zeroes) to the + "BmapFileSHA1" tag, then generating SHA1 of the resulting file, and then + put the calculated real SHA1 back to the "BmapFileSHA1" tag. + + In the future, if needed, we can create a "bmaptool checksum" command which + could update SHA1s in the bmap file. + +2. Re-structure the bmap file layout and put information about mapped blocks + count at the beginning of the bmap XML file, not after the block map table. + This will make it possible to optimize bmap file parsing in the future. This + also makes the bmap file a little bit more human-readable. + +2. Make the test-suite work on btrfs. + + +Bug-fix release 2.2 +~~~~~~~~~~~~~~~~~~~ + +1. Made bmaptool understand URLs which include user name and password + (the format is: https://user:password@server.com) + + +Bug-fix release 2.1 +~~~~~~~~~~~~~~~~~~~ + +1. Fixed the out of memory problems when copying .bz2 files. +2. Added CentOS 6 support in packaging. + + +Release 2.0 +~~~~~~~~~~~ + +There are several user-visible changes in 'bmaptool copy': + +1. In order to copy an image without bmap, the user now has to explicitly + specify the "--nobmap" option. In v1.0 this was not necessary. The reason + for this change is that users forget to use --bmap and do not realize that + they are copying entire the image. IOW, this is a usability improvement. + +2. The bmap file auto-discovery feature has been added. Now when the user does + not specify the bmap file using the --bmap option, 'bmaptool copy' will try + to find it at the same place where the image resides. It will look for files + with a similar base name and ".bmap" extension. This should make it easier + to use bmaptool. + +3. 'bmaptool copy' now can open remote files, so it is not necessary to + download the images anymore, and you can specify the URL to bmaptool. For + example: + + bmaptool copy download.tizen.org/snapshots/ivi/.../ivi-2.0.raw.bz2 + + The tool will automatically discover the bmap file, read from the image from + the 'download.tizen.org' server, decompress it on-the-fly, and copy to the + target file/device. The proxy is supported via the standard environment + variables like 'http_proxy', 'https_proxy', 'no_proxy', etc. + +4. Now 'bmaptool' prints the progress while copying. This improves usability + as well: copying may take minutes, and it is nice to let the user know how + much has already been copied. + +5. Warnings and errors are high-lighted using yellow and red labels now. + +6. Added bmaptool man page. + +'bmaptool create' has no changes comparing to release v1.0. + + +Release 1.0 +~~~~~~~~~~~ + +The first bmap-tools release. All the planned features are implemented, +automated tests are implemented. We provide nice API modules for bmap creation +('BmapCreate.py') and copying with bmap ('BmapCopy.py'). The 'Fiemap.py' API +module provides python API to the FIEMAP Linux ioctl. + +The 'bmaptool' command-line tool is a basically a small wrapper over the +API modules. It implements the 'create' and 'copy' sub-commands, which +allow creating bmap for a given file and copying a file to another file +or to a block device using bmap. + +The 'bmaptools copy' command (and thus, 'BmapCopy.py' module) support +accept compressed files and transparently de-compress them. The following +compression types are supported: .bz2, .gz, .tar.bz2, .tar.gz. + +The original user of this project is Tizen IVI where the OS images are +sparse 2.6GiB files which are distributed as .bz2 file. Since the images +are only 40% full, the .bz2 file weights about 300MiB. Tizen IVI uses the +'BmapCreate.py' API module to generate the bmap file for the 2.6GiB images +(before the image was compressed, because once it is compressed with bzip2, +the information about holes gets lost). Then the bmap file is distributed +together with the .bz2 image. And Tizen IVI users are able to flash the +images to USB stick using the following command: + + $ bmaptool copy --bmap image.bmap image.bz2 /dev/usb_stick + +This command decompresses the image (image.bz2) on-the-fly, and copies all +the mapped blocks (listed in 'image.bmap') to the USB stick (the +'/dev/usb_stick' block device). + +This is a lot faster than the old method: + + $ bzcat image.bz2 | dd of=/dev/usb_stick + +Additionally, 'bmaptool copy' verifies the image - the bmap stores SHA1 +checksums for all mapped regions. + +However, bmap-tools may be useful for other projects as well - it is generic +and just implements the idea of fast block-based flashing (as opposed to +file-based flashing). Block-based flashing has a lot of benefits. + +The 'BmapCopy.py' module implements a couple of important optimization when +copying to block device: + 1. Switch the block device I/O scheduler to 'Noop', which is a lot faster + than 'CFQ' for sequential writes. + 2. Limits the amount of memory which the kernel uses for buffering, in + order to have less impact on the overall system performance. + 3. Reads in a separate thread, which is a lot faster when copying compressed + images, because we read/uncompress/verify SHA1 in parallel to writing + to a potentially slow block device. + +We support bmap format versioning. The current format is 1.2. The minor version +number must not break backward compatibility, while the major numbers indicates +some incompatibility. diff --git a/docs/man1/bmaptool.1 b/docs/man1/bmaptool.1 new file mode 100644 index 0000000..313c17a --- /dev/null +++ b/docs/man1/bmaptool.1 @@ -0,0 +1,315 @@ +.TH BMAPTOOL "1" "February 2014" "bmap-tools 3.2" "User Commands" + +.SH NAME + +.PP +bmaptool - create block map (bmap) for a file or copy a file using bmap + +.SH SYNOPSIS + +.PP +.B bmaptool +[\-\-help] [\-\-version] [\-\-quiet] [\-\-debug] [] + +.SH DESCRIPTION + +.PP +\fIBmaptool\fR is a generic tool for creating the block map (bmap) for a file and +copying files using the block map. The idea is that large files, like raw +system image files, can be copied or flashed a lot faster with \fIbmaptool\fR than +with traditional tools, like "dd" or "cp". + +.PP +\fIBmaptool\fR supports 2 commands: +.RS 2 +1. \fBcopy\fR - copy a file to another file using bmap or flash an image to a block device +.RE +.RS 2 +2. \fBcreate\fR - create a bmap for a file +.RE + +.PP +Please, find full documentation for the project online. + +.\" =========================================================================== +.\" Global options +.\" =========================================================================== +.SH OPTIONS + +.PP +\-\-version +.RS 2 +Print \fIbmaptool\fR version and exit. +.RE + +.PP +\-h, \-\-help +.RS 2 +Print short help text and exit. +.RE + +.PP +\-q, \-\-quiet +.RS 2 +Be quiet, do not print extra information. +.RE + +.PP +\-d, \-\-debug +.RS 2 +Print debugging messages. +.RE + +.\" =========================================================================== +.\" Commands descriptions +.\" =========================================================================== +.SH COMMANDS + +.\" +.\" The "copy" command description +.\" +.SS \fBcopy\fR [options] IMAGE DEST + +.RS 2 +Copy file IMAGE to the destination regular file or block device DEST +using bmap. IMAGE may either be a local path or an URL. DEST may either +be a regular file or a block device (only local). + +.PP +Unless the bmap file is explicitly specified with the "--bmap" option, \fIbmaptool\fR +automatically discovers it by looking for a file with the same basename as IMAGE +but with the ".bmap" extension. The bmap file is only looked for in +IMAGE's directory (or base URL, in case IMAGE was specified as an URL). If the +bmap file is not found, \fIbmaptool\fR fails. To copy without bmap, use +the "--nobmap" option. + +.PP +Both IMAGE and the bmap file may be specified as an URL (http://, ftp://, +https://, file://, ssh://). In order to make \fIbmaptool\fR use a proxy server, +please, specify the proxy using the standard "$http_proxy", "$https_proxy", +"$ftp_proxy" or "$no_proxy" environment variables. + +.PP +If the server requires authentication, user name and password may be specified +in the URL, for example "https://user:password@my.server.org/image.raw.bz2", or +"ssh://user:password@host:path/to/image.raw". + +.PP +IMAGE may be compressed, in which case \fIbmaptool\fR decompresses it on-the-fly. +The compression type is detected by the file extension and the following +extensions are supported: + +.RS 4 +1. ".gz", ".gzip", ".tar.gz" and ".tgz" for files and tar archives compressed with "\fIgzip\fR" program +.RE +.RS 4 +2. ".bz2", "tar.bz2", ".tbz2", ".tbz", and ".tb2" for files and tar archives compressed with "\fIbzip2\fR" program +.RE +.RS 4 +3. ".xz", ".tar.xz", ".txz" for files and tar archives compressed with "\fIxz\fR" program +.RE +.RS 4 +4. ".lzo", "tar.lzo", ".tzo" for files and tar archives compressed with "\fIlzo\fR" program +.RE +.RS 4 +4. ".lz4", "tar.lz4", ".tlz4" for files and tar archives compressed with "\fIlz4\fR" program +.RE + +.PP +IMAGE files with other extensions are assumed to be uncompressed. Note, +\fIbmaptool\fR uses "\fIpbzip2\fR" and "\fIpigz\fR" programs for decompressing +bzip2 and gzip archives faster, unless they are not available, in which case if +falls-back to using "\fIbzip2\fR" and "\fIgzip\fR". + +.PP +If DEST is a block device node (e.g., "/dev/sdg"), \fIbmaptool\fR opens it in +exclusive mode. This means that it will fail if any other process has IMAGE +block device node opened. This also means that no other processes will be able +to open IMAGE until \fIbmaptool\fR finishes the copying. Please, see semantics +of the "O_EXCL" flag of the "open()" syscall. + +.PP +The bmap file typically contains SHA-256 checksum for itself as well as SHA-256 +checksum for all the mapped data regions, which makes it possible to guarantee +data integrity. \fIbmaptool\fR verifies the checksums and exits with an error +in case of a mismatch. Checksum verification can be disabled using the +"--no-verify" option. \fIbmaptool\fR does not verify that unampped areas +contain only zeroes, because these areas are anyway dropped and are not used for +anything. + +.PP +The bmap file may be signed with OpenPGP (gpg). The signature may be either +detached (a separate file) or "built into" the bmap file (so called "clearsign" +signature). + +.PP +The detached signature can be specified with the "--bmap-sig" option, otherwise +\fIbmaptool\fR tries to automatically discover it by looking for a file with +the same basename as the bmap file but with the ".asc" or ".sig" extension. +This is very similar to the bmap file auto-discovery. So if a ".asc" or ".sig" +file exists, \fIbmaptool\fR will verify the signature. + +.PP +The clearsign signature is part of the bmap file and \fIbmaptool\fR +automatically detected and verifies it. + +.PP +If the signature is bad, \fIbmaptool\fR exits with an error. Bmap file +signature verification can be disabled using the "--no-sig-verify" option. +.RE + +.\" +.\" The "copy" command's options +.\" +.RS 2 +\fBOPTIONS\fR +.RS 2 +\-h, \-\-help +.RS 2 +Print short help text about the "copy" command and exit. +.RE + +.PP +\-\-bmap BMAP +.RS 2 +Use bmap file "BMAP" for copying. If this option is not specified, \fIbmaptool\fR +tries to automatically discover the bmap file. +.RE + +.PP +\-\-bmap-sig SIG +.RS 2 +Use a detached OpenPGP signature file "SIG" for verifying the bmap file +integrity and publisher. If this option is not specified, \fIbmaptool\fR +tries to automatically discover the signature file. +.RE + +.PP +\-\-nobmap +.RS 2 +Disable automatic bmap file discovery and force flashing entire IMAGE without bmap. +.RE + +.PP +\-\-no-verify +.RS 2 +Do not verify data checksums when copying (not recommended). The checksums are +stored in the bmap file, and normally \fIbmaptool\fR verifies that the data in +IMAGE matches the checksums. +.RE + +.PP +\-\-no-sig-verify +.RS 2 +Do not verify the OpenPGP bmap file signature (not recommended). +.RE +.RE + +.\" +.\" The "copy" command's examples +.\" +.RS 2 +\fBEXAMPLES\fR +.RS 2 +\fIbmaptool\fR copy image.raw.bz2 /dev/sdg +.RS 2 +Copy bz2-compressed local file "image.raw.bz2" to block device "/dev/sdg". The +image file is uncompressed on-the-fly. The bmap file is discovered +automatically. The OpenPGP signature is detected/discovered automatically +too. +.RE +.RE + +.RS 2 +\fIbmaptool\fR copy http://my-server.com/files/image.raw.bz2 $HOME/tmp/file +.RS 2 +Copy bz2-compressed remote "image.raw.bz2" to regular file "$HOME/tmp/file". +The image file is uncompressed on-the-fly. The bmap file is discovered +automatically. The OpenPGP signature is detected/discovered automatically +too. +.RE +.RE + +.RS 2 +\fIbmaptool\fR copy --bmap image.bmap --bmap-sig image.bmap.asc image.raw /dev/sdg +.RS 2 +Copy non-compressed local file "image.raw" to block device "/dev/sdg" using bmap file +"image.bmap". Verify the bmap file signature using a detached OpenPGP signature +from "imag.bmap.asc". +.RE +.RE + +.\" +.\" The "create" command description +.\" +.SS \fBcreate\fR [options] IMAGE + +.PP +Generate bmap for a regular file IMAGE. Internally, this command uses the +Linux "FIEMAP" ioctl to find out which IMAGE blocks are mapped. However, if +"FIEMAP" is not supported, the "SEEK_HOLE" feature of the "lseek" system call +is used instead. By default, the resulting bmap file is printed to stdout, +unless the "--output" option is used. + +.PP +The IMAGE file is always synchronized before the block map is generated. And it +is important to make sure that the IMAGE file is not modified when the bmap +file is being generated, and after the bmap file has been generated. Otherwise +the bmap file becomes invalid and checksum verification will fail. + +.PP +The image file can further be signed using OpenPGP. + +.\" +.\" The "create" command's options +.\" +.RS 2 +\fBOPTIONS\fR +.RS 2 +\-h, \-\-help +.RS 2 +Print short help text about the "create" command and exit. +.RE + +.PP +\-o, \-\-output OUTPUT +.RS 2 +Save the generated bmap in the OUTPUT file (by default the bmap is printed to +stdout). +.RE + +.PP +\-\-no-checksum +.RS 2 +Generate a bmap file without SHA1 checksums (not recommended). +.RE +.RE +.RE + +.\" +.\" The "create" command's examples +.\" +.RS 2 +\fBEXAMPLES\fR +.RS 2 +\fIbmaptool\fR create image.raw +.RS 2 +Generate bmap for the "image.raw" file and print it to stdout. +.RE +.RE + +.RS 2 +\fIbmaptool\fR create -o image.bmap image.raw +.RS 2 +Generate bmap for the "image.raw" file and save it in "image.bmap". +.RE +.RE + +.SH AUTHOR + +Artem Bityutskiy . + +.SH REPORTING BUGS + +Please, report bugs to Artem Bityutskiy or +to the bmap-tools mailing list . diff --git a/make_a_release.sh b/make_a_release.sh new file mode 100755 index 0000000..2b59e35 --- /dev/null +++ b/make_a_release.sh @@ -0,0 +1,169 @@ +#!/bin/sh -euf +# +# Copyright (c) 2012-2013 Intel, Inc. +# License: GPLv2 +# Author: Artem Bityutskiy +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +# This script automates the process of releasing the bmap-tools project. The +# idea is that it should be enough to run this script with few parameters and +# the release is ready. + +# +# This script is supposed to be executed in the root of the bmap-tools +# project's source code tree. + +PROG="make_a_release.sh" + +fatal() { + printf "Error: %s\n" "$1" >&2 + exit 1 +} + +usage() { + cat < + + - new bmap-tools version to make in X.Y format +EOF + exit 0 +} + +ask_question() { + local question=$1 + + while true; do + printf "%s\n" "$question (yes/no)?" + IFS= read answer + if [ "$answer" == "yes" ]; then + printf "%s\n" "Very good!" + return + elif [ "$answer" == "no" ]; then + printf "%s\n" "Please, do that!" + exit 1 + else + printf "%s\n" "Please, answer \"yes\" or \"no\"" + fi + done +} + +format_changelog() { + local logfile="$1"; shift + local pfx1="$1"; shift + local pfx2="$1"; shift + local pfx_len="$(printf "%s" "$pfx1" | wc -c)" + local width="$((80-$pfx_len))" + + while IFS= read -r line; do + printf "%s\n" "$line" | fold -c -s -w "$width" | \ + sed -e "1 s/^/$pfx1/" | sed -e "1! s/^/$pfx2/" | \ + sed -e "s/[\t ]\+$//" + done < "$logfile" +} + +[ $# -eq 0 ] && usage +[ $# -eq 1 ] || fatal "insufficient or too many argumetns" + +new_ver="$1"; shift + +# Validate the new version +printf "%s" "$new_ver" | egrep -q -x '[[:digit:]]+\.[[:digit:]]+' || + fatal "please, provide new version in X.Y format" + +# Make sure that the current branch is 'master' +current_branch="$(git branch | sed -n -e '/^*/ s/^* //p')" +if [ "$current_branch" != "master" ]; then + fatal "current branch is '$current_branch' but must be 'master'" +fi + +# Make sure the git index is up-to-date +[ -z "$(git status --porcelain)" ] || fatal "git index is not up-to-date" + +# Remind the maintainer about various important things +ask_question "Did you update the docs/RELEASE_NOTES file" +ask_question "Did you update the docs/README file" +ask_question "Did you update the man page" +ask_question "Did you update tests: test-data and oldcodebase" + +# Change the version in the 'bmaptools/CLI.py' file +sed -i -e "s/^VERSION = \"[0-9]\+\.[0-9]\+\"$/VERSION = \"$new_ver\"/" bmaptools/CLI.py +# Sed the version in the RPM spec file +sed -i -e "s/^Version: [0-9]\+\.[0-9]\+$/Version: $new_ver/" packaging/bmap-tools.spec +# Remove the "rc_num" macro from the RPM spec file to make sure we do not have +# the "-rcX" part in the release version +sed -i -e '/^%define[[:blank:]]\+rc_num[[:blank:]]\+[[:digit:]]\+[[:blank:]]*$/d' packaging/bmap-tools.spec + +# Ask the maintainer for changelog lines +logfile="$(mktemp -t "$PROG.XXXX")" +cat > "$logfile" < "$deblogfile" +format_changelog "$logfile" " * " " " >> "$deblogfile" +printf "\n%s\n\n" " -- Artem Bityutskiy $(date -R)" >> "$deblogfile" +cat debian/changelog >> "$deblogfile" +mv "$deblogfile" debian/changelog + +# Prepare RPM changelog +rpmlogfile="$(mktemp -t "$PROG.XXXX")" +printf "%s\n" "$(date --utc) - Artem Bityutskiy ${new_ver}-1" > "$rpmlogfile" +format_changelog "$logfile" "- " " " >> "$rpmlogfile" +printf "\n" >> "$rpmlogfile" +cat packaging/bmap-tools.changes >> "$rpmlogfile" +mv "$rpmlogfile" packaging/bmap-tools.changes + +rm "$logfile" + +# Commit the changes +git commit -a -s -m "Release version $new_ver" + +outdir="." +tag_name="v$new_ver" +release_name="bmap-tools-$new_ver" + +# Create new signed tag +printf "%s\n" "Signing tag $tag_name" +git tag -m "$release_name" -s "$tag_name" + +# Get the name of the release branch corresponding to this version +release_branch="release-$(printf "%s" "$new_ver" | sed -e 's/\(.*\)\..*/\1.0/')" + +cat < 3.5-1 +- Fixed copying of compressed files from URLs +- Python 3.x support fixes and improvements. + +Thu Aug 31 12:40:00 UTC 2017 Alexander Kanevskiy 3.4-1 +- New homepage: https://github.com/01org/bmap-tools +- Python 3.x support. +- bmaptool can now be shipped as standalone application. +- Added support for ZIP archives. +- Added support for LZ4 archives. +- Fixed bugs related to specific filesystems. + +Wed Feb 19 14:50:12 UTC 2014 - Artem Bityutskiy 3.2-1 +- Add support for LZO and archives ('.lzo' and '.tar.lzo'). +- Add support for multi-stream bzip2 archives (creted with "pbzip2"). +- Support tmpfs by using the SEEK_HOLE method instead of FIEMAP. +- Use external tools like 'gzip' and 'bzip2' for decompressing, instead of + using internal python libraries. + +Thu Nov 7 15:26:57 UTC 2013 - Artem Bityutskiy 3.1-1 +- Change bmap format version from 1.4 to 2.0, because there are incompatible + changes in 1.4 comparing to 1.3, so the right version number is 2.0 +- Add backward and forward bmap format compatibility unit-tests + +Wed Oct 2 06:30:22 UTC 2013 - Artem Bityutskiy 3.0-1 +- Switch from using SHA1 for checksumming to SHA256. +- Start supporting OpenPGP signatures. Both detached and clearsign signatures + are supported. +- Always sync the image file before creating the bmap for it, to work-around + kernel bugs in early FIEMAP implementations. + +Tue Aug 13 11:54:31 UTC 2013 - Artem Bityutskiy 2.6-1 +- Add support for on-the-fly decompression of '.xz' and '.tar.xz' files. + +Mon Aug 5 07:05:59 UTC 2013 - Artem Bityutskiy 2.5-1 +- Do not fail when lacking permisssions for accessing block device's sysfs + files. +- Improve debian packaging. + +Wed Jun 5 15:16:42 UTC 2013 - Artem Bityutskiy 2.4-1 +- Add ssh:// URLs support. + +Mon May 6 07:59:26 UTC 2013 - Artem Bityutskiy 2.3-1 +-Add bmap file SHA1 verification, make tests work on btrfs. + +Mon Mar 11 12:42:03 UTC 2013 - Artem Bityutskiy 2.2-1 +- Support username and password in URLs. + +Mon Feb 18 14:39:11 UTC 2013 - Artem Bityutskiy 2.1-1 +- Fix out of memory issues when copying .bz2 files. + +Thu Jan 17 09:34:00 UTC 2013 - Artem Bityutskiy 2.0-1 +- Fix the an issue with running out of memory in TransRead.py. + +Tue Jan 15 12:52:25 UTC 2013 - Artem Bityutskiy 2.0-0.rc5 +- When block device optimzations fail - raise an exception except of muting + the error, because we really want to know about these failures and possibly + fix them. + +Thu Jan 10 11:58:57 UTC 2013 - Artem Bityutskiy 2.0-0.rc4 +- Fix bmap autodiscovery. + +Mon Jan 7 08:20:37 UTC 2013 - Artem Bityutskiy 2.0-0.rc3 +- Fix uncaught urllib2 exception bug introduced in rc1. + +Fri Jan 4 07:55:05 UTC 2013 - Artem Bityutskiy 2.0-0.rc2 +- Fix writing to block devices, which was broken in rc1. +- Make the informational messages a bit nicer. + +Thu Dec 20 08:48:26 UTC 2012 - Artem Bityutskiy 2.0-0.rc1 +- Allow copying without bmap only if --nobmap was specified. +- Auto-discover the bmap file. +- Support reading from URLs. +- Implement progress bar. +- Highlight error and warning messages with red and yellow labels. + +Mon Dec 3 08:02:03 UTC 2012 - Artem Bityutskiy 1.0-1 +- Release version 1.0 of the tools - almost identical to 1.0-rc7 except of few + minor differences like spelling fixes. + +Thu Nov 29 10:30:20 UTC 2012 - Artem Bityutskiy 1.0-0.rc7 +- Add a Fiemap.py module which implements python API to the linux FIEMAP ioct. +- Use the FIEMAP ioctl properly and optimally. +- Add unit-tests, current test coverage is 66%. +- A lot of core rerafactoring. +- Several bug fixes in 'BmapCopy' (e.g., .tar.gz format support was broken). +- Add README and RELEASE_NOTES files. +- Change the versioning scheme. + +Wed Nov 21 14:45:48 UTC 2012 - Artem Bityutskiy 0.6 +- Improve the base API test to cover the case when there is no bmap. +- Fix a bug when copying without bmap. + +Tue Nov 20 15:40:30 UTC 2012 - Artem Bityutskiy 0.5 +- Fix handling of bmap files which contain ranges with only one block. +- Restore the block device settings which we change on exit. +- Change block device settings correctly for partitions. +- Rework API modules to accept file-like objects, not only paths. +- Fix and silence pylint warnings. +- Implement the base API test-case. + +Wed Nov 14 10:36:10 UTC 2012 - Artem Bityutskiy 0.4 +- Improved compressed images flashing speed by exploiting multiple threads: + now we read/decompress the image in one thread and write it in a different + thread. + +Tue Nov 13 08:56:49 UTC 2012 - Artem Bityutskiy 0.3 +- Fix flashing speed calculations +- Fix the Ctrl-C freeze issue - now we synchronize the block device + periodically so if a Ctrl-C interruption happens, we terminate withen few + seconds. + +Fri Nov 9 10:21:31 UTC 2012 - Artem Bityutskiy 0.2 +- Release 0.2 - mostly internal code re-structuring and renamings, + not much functional changes. +- The 'bmap-flasher' and 'bmap-creator' tools do not exist anymore. Now + we have 'bmaptool' which supports 'copy' and 'create' sub-commands instead. +- The BmapFlasher module was also re-named to BmapCopy. + +Wed Nov 7 09:37:59 UTC 2012 - Artem Bityutskiy 0.1.0 +- Release 0.1.1 - a lot of fixes and speed improvements. + +Sat Oct 27 19:13:31 UTC 2012 - Eduard Bartoch 0.0.1 +- Initial packaging. diff --git a/packaging/bmap-tools.spec b/packaging/bmap-tools.spec new file mode 100644 index 0000000..08bac10 --- /dev/null +++ b/packaging/bmap-tools.spec @@ -0,0 +1,89 @@ +# We follow the Fedora guide for versioning. Fedora recommends to use something +# like '1.0-0.rc7' for release candidate rc7 and '1.0-1' for the '1.0' release. +%define rc_str %{?rc_num:0.rc%{rc_num}}%{!?rc_num:1} + +Name: bmap-tools +Summary: Tools to generate block map (AKA bmap) and flash images using bmap +Version: 3.5 +%if 0%{?opensuse_bs} +Release: %{rc_str}.. +%else +Release: %{rc_str}.0.0 +%endif + +Group: Development/Tools/Other +License: GPL-2.0 +BuildArch: noarch +URL: https://github.com/intel/bmap-tools +Source0: %{name}-%{version}.tar.gz + +Requires: bzip2 +Requires: pbzip2 +Requires: gzip +Requires: xz +Requires: tar +Requires: unzip +Requires: lzop +%if ! 0%{?tizen_version:1} +# pigz is not present in Tizen +Requires: pigz +%endif + +%if 0%{?suse_version} +BuildRequires: python-distribute +%endif +%if 0%{?fedora_version} +BuildRequires: python-setuptools +%endif +BuildRequires: python2-rpm-macros + +%if 0%{?suse_version} +# In OpenSuse the xml.etree module is provided by the python-xml package +Requires: python-xml +# The gpgme python module is in python-gpgme +Requires: python-gpgme +%endif + +%if 0%{?fedora_version} +# In Fedora the xml.etree module is provided by the python-libs package +Requires: python-libs +# Tha gpgme python module is in pygpgme package +Requires: pygpgme +%endif + +# Centos6 uses python 2.6, which does not have the argparse module. However, +# argparse is available as a separate package there. +%if 0%{?centos_version} == 600 +Requires: python-argparse +%endif + +%description +Tools to generate block map (AKA bmap) and flash images using bmap. Bmaptool is +a generic tool for creating the block map (bmap) for a file, and copying files +using the block map. The idea is that large file containing unused blocks, like +raw system image files, can be copied or flashed a lot faster with bmaptool +than with traditional tools like "dd" or "cp". See +source.tizen.org/documentation/reference/bmaptool for more information. + +%prep +%setup -q -n %{name}-%{version} + +%build + +%install +rm -rf %{buildroot} +%{__python2} setup.py install --prefix=%{_prefix} --root=%{buildroot} + +mkdir -p %{buildroot}/%{_mandir}/man1 +install -m644 docs/man1/bmaptool.1 %{buildroot}/%{_mandir}/man1 + +%files +%defattr(-,root,root,-) +%license COPYING +%dir /usr/lib/python*/site-packages/bmaptools +/usr/lib/python*/site-packages/bmap_tools* +/usr/lib/python*/site-packages/bmaptools/* +%{_bindir}/* + +%doc docs/RELEASE_NOTES +%{_mandir}/man1/* diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..1e75e9c --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[nosetests] +cover-package=bmaptools diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..c8bd22c --- /dev/null +++ b/setup.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +import re +from setuptools import setup, find_packages + + +def get_version(): + """Fetch the project version number from the 'bmaptool' file.""" + with open("bmaptools/CLI.py", "r") as fobj: + for line in fobj: + matchobj = re.match(r'^VERSION = "(\d+.\d+)"$', line) + if matchobj: + return matchobj.group(1) + + return None + +setup( + name="bmap-tools", + description="Tools to generate block map (AKA bmap) and copy images " + "using bmap", + author="Artem Bityutskiy", + author_email="artem.bityutskiy@linux.intel.com", + version=get_version(), + entry_points={ + 'console_scripts': ['bmaptool=bmaptools.CLI:main'], + }, + packages=find_packages(exclude=["test*"]), + license='GPLv2', + long_description="Tools to generate block map (AKA bmap) and flash " + "images using bmap. Bmaptool is a generic tool for " + "creating the block map (bmap) for a file, and copying " + "files using the block map. The idea is that large file " + "containing unused blocks, like raw system image files, " + "can be copied or flashed a lot faster with bmaptool " + "than with traditional tools like \"dd\" or \"cp\". See " + "source.tizen.org/documentation/reference/bmaptool for " + "more information.", + classifiers=[ + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3" + ] +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/helpers.py b/tests/helpers.py new file mode 100644 index 0000000..e3710b2 --- /dev/null +++ b/tests/helpers.py @@ -0,0 +1,304 @@ +# -*- coding: utf-8 -*- +# vim: ts=4 sw=4 et ai si +# +# Copyright (c) 2012-2014 Intel, Inc. +# License: GPLv2 +# Author: Artem Bityutskiy +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This module contains independent functions shared between various +tests. +""" + +# Disable the following pylint recommendations: +# * Too many statements (R0915) +# pylint: disable=R0915 + +import tempfile +import random +import itertools +import hashlib +import struct +import sys +import os +from bmaptools import BmapHelpers, BmapCopy, TransRead + + +def _create_random_sparse_file(file_obj, size): + """ + Create a sparse file with randomly distributed holes. The mapped areas are + filled with semi-random data. Returns a tuple containing 2 lists: + 1. a list of mapped block ranges, same as 'Filemap.get_mapped_ranges()' + 2. a list of unmapped block ranges (holes), same as + 'Filemap.get_unmapped_ranges()' + """ + + file_obj.truncate(size) + block_size = BmapHelpers.get_block_size(file_obj) + blocks_cnt = (size + block_size - 1) // block_size + + def process_block(block): + """ + This is a helper function which processes a block. It randomly decides + whether the block should be filled with random data or should become a + hole. Returns 'True' if the block was mapped and 'False' otherwise. + """ + + map_the_block = random.getrandbits(1) + + if map_the_block: + # Randomly select how much we are going to write + seek = random.randint(0, block_size - 1) + write = random.randint(1, block_size - seek) + assert seek + write <= block_size + file_obj.seek(block * block_size + seek) + file_obj.write(struct.pack("=B", random.getrandbits(8)) * write) + return map_the_block + + mapped = [] + unmapped = [] + iterator = range(0, blocks_cnt) + for was_mapped, group in itertools.groupby(iterator, process_block): + # Start of a mapped region or a hole. Find the last element in the + # group. + first = next(group) + last = first + for last in group: + pass + + if was_mapped: + mapped.append((first, last)) + else: + unmapped.append((first, last)) + + file_obj.truncate(size) + file_obj.flush() + + return (mapped, unmapped) + + +def _create_random_file(file_obj, size): + """ + Fill the 'file_obj' file object with semi-random data up to the size 'size'. + """ + + chunk_size = 1024 * 1024 + written = 0 + + while written < size: + if written + chunk_size > size: + chunk_size = size - written + + file_obj.write(struct.pack("=B", random.getrandbits(8)) * chunk_size) + + written += chunk_size + + file_obj.flush() + + +def generate_test_files(max_size=4 * 1024 * 1024, directory=None, delete=True): + """ + This is a generator which yields files which other tests use as the input + for the testing. The generator tries to yield "interesting" files which + cover various corner-cases. For example, a large hole file, a file with + no holes, files of unaligned length, etc. + + The 'directory' argument specifies the directory path where the yielded + test files should be created. The 'delete' argument specifies whether the + yielded test files have to be automatically deleted. + + The generator yields tuples consisting of the following elements: + 1. the test file object + 2. file size in bytes + 3. a list of mapped block ranges, same as 'Filemap.get_mapped_ranges()' + 4. a list of unmapped block ranges (holes), same as + 'Filemap.get_unmapped_ranges()' + """ + + # + # Generate sparse files with one single hole spanning the entire file + # + + # A block-sized hole + file_obj = tempfile.NamedTemporaryFile("wb+", prefix="4Khole_", + delete=delete, dir=directory, + suffix=".img") + block_size = BmapHelpers.get_block_size(file_obj) + file_obj.truncate(block_size) + yield (file_obj, block_size, [], [(0, 0)]) + file_obj.close() + + # A block size + 1 byte hole + file_obj = tempfile.NamedTemporaryFile("wb+", prefix="4Khole_plus_1_", + delete=delete, dir=directory, + suffix=".img") + file_obj.truncate(block_size + 1) + yield (file_obj, block_size + 1, [], [(0, 1)]) + file_obj.close() + + # A block size - 1 byte hole + file_obj = tempfile.NamedTemporaryFile("wb+", prefix="4Khole_minus_1_", + delete=delete, dir=directory, + suffix=".img") + file_obj.truncate(block_size - 1) + yield (file_obj, block_size - 1, [], [(0, 0)]) + file_obj.close() + + # A 1-byte hole + file_obj = tempfile.NamedTemporaryFile("wb+", prefix="1byte_hole_", + delete=delete, dir=directory, + suffix=".img") + file_obj.truncate(1) + yield (file_obj, 1, [], [(0, 0)]) + file_obj.close() + + # And 10 holes of random size + for i in range(10): + size = random.randint(1, max_size) + file_obj = tempfile.NamedTemporaryFile("wb+", suffix=".img", + delete=delete, dir=directory, + prefix="rand_hole_%d_" % i) + file_obj.truncate(size) + blocks_cnt = (size + block_size - 1) // block_size + yield (file_obj, size, [], [(0, blocks_cnt - 1)]) + file_obj.close() + + # + # Generate a random sparse files + # + + # The maximum size + file_obj = tempfile.NamedTemporaryFile("wb+", prefix="sparse_", + delete=delete, dir=directory, + suffix=".img") + mapped, unmapped = _create_random_sparse_file(file_obj, max_size) + yield (file_obj, max_size, mapped, unmapped) + file_obj.close() + + # The maximum size + 1 byte + file_obj = tempfile.NamedTemporaryFile("wb+", prefix="sparse_plus_1_", + delete=delete, dir=directory, + suffix=".img") + mapped, unmapped = _create_random_sparse_file(file_obj, max_size + 1) + yield (file_obj, max_size + 1, mapped, unmapped) + file_obj.close() + + # The maximum size - 1 byte + file_obj = tempfile.NamedTemporaryFile("wb+", prefix="sparse_minus_1_", + delete=delete, dir=directory, + suffix=".img") + mapped, unmapped = _create_random_sparse_file(file_obj, max_size - 1) + yield (file_obj, max_size - 1, mapped, unmapped) + file_obj.close() + + # And 10 files of random size + for i in range(10): + size = random.randint(1, max_size) + file_obj = tempfile.NamedTemporaryFile("wb+", suffix=".img", + delete=delete, dir=directory, + prefix="sparse_%d_" % i) + mapped, unmapped = _create_random_sparse_file(file_obj, size) + yield (file_obj, size, mapped, unmapped) + file_obj.close() + + # + # Generate random fully-mapped files + # + + # A block-sized file + file_obj = tempfile.NamedTemporaryFile("wb+", prefix="4Kmapped_", + delete=delete, dir=directory, + suffix=".img") + _create_random_file(file_obj, block_size) + yield (file_obj, block_size, [(0, 0)], []) + file_obj.close() + + # A block size + 1 byte file + file_obj = tempfile.NamedTemporaryFile("wb+", prefix="4Kmapped_plus_1_", + delete=delete, dir=directory, + suffix=".img") + _create_random_file(file_obj, block_size + 1) + yield (file_obj, block_size + 1, [(0, 1)], []) + file_obj.close() + + # A block size - 1 byte file + file_obj = tempfile.NamedTemporaryFile("wb+", prefix="4Kmapped_minus_1_", + delete=delete, dir=directory, + suffix=".img") + _create_random_file(file_obj, block_size - 1) + yield (file_obj, block_size - 1, [(0, 0)], []) + file_obj.close() + + # A 1-byte file + file_obj = tempfile.NamedTemporaryFile("wb+", prefix="1byte_mapped_", + delete=delete, dir=directory, + suffix=".img") + _create_random_file(file_obj, 1) + yield (file_obj, 1, [(0, 0)], []) + file_obj.close() + + # And 10 mapped files of random size + for i in range(10): + size = random.randint(1, max_size) + file_obj = tempfile.NamedTemporaryFile("wb+", suffix=".img", + delete=delete, dir=directory, + prefix="rand_mapped_%d_" % i) + _create_random_file(file_obj, size) + blocks_cnt = (size + block_size - 1) // block_size + yield (file_obj, size, [(0, blocks_cnt - 1)], []) + file_obj.close() + + +def calculate_chksum(file_path): + """Calculates checksum for the contents of file 'file_path'.""" + + file_obj = TransRead.TransRead(file_path) + hash_obj = hashlib.new("sha256") + + chunk_size = 1024 * 1024 + + while True: + chunk = file_obj.read(chunk_size) + if not chunk: + break + hash_obj.update(chunk) + + file_obj.close() + return hash_obj.hexdigest() + + +def copy_and_verify_image(image, dest, bmap, image_chksum, image_size): + """ + Copy image 'image' using bmap file 'bmap' to the destination file 'dest' + and verify the resulting image checksum. + """ + + f_image = TransRead.TransRead(image) + f_dest = open(dest, "w+b") + if (bmap): + f_bmap = open(bmap, "r") + else: + f_bmap = None + + writer = BmapCopy.BmapCopy(f_image, f_dest, f_bmap, image_size) + # Randomly decide whether we want the progress bar or not + if bool(random.getrandbits(1)) and sys.stdout.isatty(): + writer.set_progress_indicator(sys.stdout, None) + writer.copy(bool(random.getrandbits(1)), bool(random.getrandbits(1))) + + # Compare the original file and the copy are identical + assert calculate_chksum(dest) == image_chksum + + if f_bmap: + f_bmap.close() + f_dest.close() + f_image.close() diff --git a/tests/oldcodebase/BmapCopy1_0.py b/tests/oldcodebase/BmapCopy1_0.py new file mode 100644 index 0000000..ff1e6b4 --- /dev/null +++ b/tests/oldcodebase/BmapCopy1_0.py @@ -0,0 +1,710 @@ +# pylint: disable-all + +""" This module implements copying of images with bmap and provides the +following API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. """ + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# * Too many statements (R0915) +# * Too many branches (R0912) +# pylint: disable=R0902 +# pylint: disable=R0915 +# pylint: disable=R0912 + +import os +import stat +import sys +import hashlib +from six import reraise +from six.moves import queue as Queue +from six.moves import _thread as thread +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# A list of supported image formats +SUPPORTED_IMAGE_FORMATS = ('bz2', 'gz', 'tar.gz', 'tgz', 'tar.bz2') + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = "1.0" + +class Error(Exception): + """ A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. """ + pass + +class BmapCopy: + """ This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file-like object of the destination file copy the image to + * full path or a file-like object of the bmap file (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + The image file may either be an uncompressed raw image or a compressed + image. Compression type is defined by the image file extension. Supported + types are listed by 'SUPPORTED_IMAGE_FORMATS'. + + IMPORTANT: if the image is given as a file-like object, the compression + type recognition is not performed - the file-like object's 'read()' method + is used directly instead. + + Once an instance of 'BmapCopy' is created, all the 'bmap_*' attributes are + initialized and available. They are read from the bmap. + + However, if bmap was not provided, this is not always the case and some of + the 'bmap_*' attributes are not initialize by the class constructor. + Instead, they are initialized only in the 'copy()' method. The reason for + this is that when bmap is absent, 'BmapCopy' uses sensible fall-back values + for the 'bmap_*' attributes assuming the entire image is "mapped". And if + the image is compressed, it cannot easily find out the image size. Thus, + this is postponed until the 'copy()' method decompresses the image for the + first time. + + The 'copy()' method implements the copying. You may choose whether to + verify the SHA1 checksum while copying or not. Note, this is done only in + case of bmap-based copying and only if bmap contains the SHA1 checksums + (e.g., bmap version 1.0 did not have SHA1 checksums). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. """ + + def _initialize_sizes(self, image_size): + """ This function is only used when the there is no bmap. It + initializes attributes like 'blocks_cnt', 'mapped_cnt', etc. Normally, + the values are read from the bmap file, but in this case they are just + set to something reasonable. """ + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + + def _parse_bmap(self): + """ Parse the bmap file and initialize the 'bmap_*' attributes. """ + + bmap_pos = self._f_bmap.tell() + self._f_bmap.seek(0) + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + raise Error("cannot parse the bmap file '%s' which should be a " \ + "proper XML file: %s" % (self._bmap_path, err)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + major = int(self.bmap_version.split('.', 1)[0]) + if major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " \ + "version %d is not supported" \ + % (SUPPORTED_BMAP_VERSION, major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " \ + "blocks count (%d bytes != %d blocks * %d bytes)" \ + % (self.image_size, self.blocks_cnt, self.block_size)) + + self._f_bmap.seek(bmap_pos) + + def _open_image_file(self): + """ Open the image file which may be compressed or not. The compression + type is recognized by the file extension. Supported types are defined + by 'SUPPORTED_IMAGE_FORMATS'. """ + + try: + is_regular_file = stat.S_ISREG(os.stat(self._image_path).st_mode) + except OSError as err: + raise Error("cannot access image file '%s': %s" \ + % (self._image_path, err.strerror)) + + if not is_regular_file: + raise Error("image file '%s' is not a regular file" \ + % self._image_path) + + try: + if self._image_path.endswith('.tar.gz') \ + or self._image_path.endswith('.tar.bz2') \ + or self._image_path.endswith('.tgz'): + import tarfile + + tar = tarfile.open(self._image_path, 'r') + # The tarball is supposed to contain only one single member + members = tar.getnames() + if len(members) > 1: + raise Error("the image tarball '%s' contains more than " \ + "one file" % self._image_path) + elif len(members) == 0: + raise Error("the image tarball '%s' is empty (no files)" \ + % self._image_path) + self._f_image = tar.extractfile(members[0]) + elif self._image_path.endswith('.gz'): + import gzip + self._f_image = gzip.GzipFile(self._image_path, 'rb') + elif self._image_path.endswith('.bz2'): + import bz2 + self._f_image = bz2.BZ2File(self._image_path, 'rb') + else: + self._image_is_compressed = False + self._f_image = open(self._image_path, 'rb') + except IOError as err: + raise Error("cannot open image file '%s': %s" \ + % (self._image_path, err)) + + self._f_image_needs_close = True + + def _validate_image_size(self): + """ Make sure that image size from bmap matches real image size. """ + + image_size = os.fstat(self._f_image.fileno()).st_size + if image_size != self.image_size: + raise Error("Size mismatch, bmap '%s' was created for an image " \ + "of size %d bytes, but image '%s' has size %d bytes" \ + % (self._bmap_path, self.image_size, + self._image_path, image_size)) + + def _open_destination_file(self): + """ Open the destination file. """ + + try: + self._f_dest = open(self._dest_path, 'w') + except IOError as err: + raise Error("cannot open destination file '%s': %s" \ + % (self._dest_path, err)) + + self._f_dest_needs_close = True + + def _open_bmap_file(self): + """ Open the bmap file. """ + + try: + self._f_bmap = open(self._bmap_path, 'r') + except IOError as err: + raise Error("cannot open bmap file '%s': %s" \ + % (self._bmap_path, err.strerror)) + + self._f_bmap_needs_close = True + + def __init__(self, image, dest, bmap = None): + """ The class constructor. The parameters are: + image - full path or file object of the image which should be copied + dest - full path or file-like object of the destination file to + copy the image to + bmap - full path or file-like object of the bmap file to use for + copying """ + + self._xml = None + self._image_is_compressed = True + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 2 + + self.bmap_version = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_dest_needs_close = False + self._f_image_needs_close = False + self._f_bmap_needs_close = False + + self._f_bmap = None + self._f_bmap_path = None + + if hasattr(dest, "write"): + self._f_dest = dest + self._dest_path = dest.name + else: + self._dest_path = dest + self._open_destination_file() + + if hasattr(image, "read"): + self._f_image = image + self._image_path = image.name + else: + self._image_path = image + self._open_image_file() + + st_mode = os.fstat(self._f_dest.fileno()).st_mode + self._dest_is_regfile = stat.S_ISREG(st_mode) + + if bmap: + if hasattr(bmap, "read"): + self._f_bmap = bmap + self._bmap_path = bmap.name + else: + self._bmap_path = bmap + self._open_bmap_file() + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + # We can initialize size-related attributes only if we the image is + # uncompressed. + if not self._image_is_compressed: + image_size = os.fstat(self._f_image.fileno()).st_size + self._initialize_sizes(image_size) + + if not self._image_is_compressed: + self._validate_image_size() + + self._batch_blocks = self._batch_bytes / self.block_size + + def __del__(self): + """ The class destructor which closes the opened files. """ + + if self._f_image_needs_close: + self._f_image.close() + if self._f_dest_needs_close: + self._f_dest.close() + if self._f_bmap_needs_close: + self._f_bmap.close() + + def _get_block_ranges(self): + """ This is a helper generator that parses the bmap XML file and for + each block range in the XML file it yields ('first', 'last', 'sha1') + tuples, where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'sha1' is the SHA1 checksum of the range ('None' is used if it is + missing. + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown (the image is + compressed), the generator infinitely yields continuous ranges of + size '_batch_blocks'. """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if 'sha1' in xml_element.attrib: + sha1 = xml_element.attrib['sha1'] + else: + sha1 = None + + yield (first, last, sha1) + + def _get_batches(self, first, last): + """ This is a helper generator which splits block ranges from the bmap + file to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ This is generator which reads the image file in '_batch_blocks' + chunks and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. """ + + try: + for (first, last, sha1) in self._get_block_ranges(): + if verify and sha1: + hash_obj = hashlib.new('sha1') + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " \ + "image file '%s': %s" \ + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and sha1: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and sha1 and hash_obj.hexdigest() != sha1: + raise Error("checksum mismatch for blocks range %d-%d: " \ + "calculated %s, should be %s" \ + % (first, last, hash_obj.hexdigest(), sha1)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + reraise(exc_info[0], exc_info[1], exc_info[2]) + + self._batch_queue.put(None) + + def copy(self, sync = True, verify = True): + """ Copy the image to the destination file using bmap. The sync + argument defines whether the destination file has to be synchronized + upon return. The 'verify' argument defines whether the SHA1 checksum + has to be verified while copying. """ + + # Save file positions in order to restore them at the end + image_pos = self._f_image.tell() + dest_pos = self._f_dest.tell() + if self._f_bmap: + bmap_pos = self._f_bmap.tell() + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[1].with_traceback(exc_info[2]) + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" \ + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + if not self.image_size: + # The image size was unknown up until now, probably because this is + # a compressed image. Initialize the corresponding class attributes + # now, when we know the size. + self._initialize_sizes(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks, but should have %u - inconsistent " \ + "bmap file" % (blocks_written, self.mapped_cnt)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" \ + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + # Restore file positions + self._f_image.seek(image_pos) + self._f_dest.seek(dest_pos) + if self._f_bmap: + self._f_bmap.seek(bmap_pos) + + def sync(self): + """ Synchronize the destination file to make sure all the data are + actually written to the disk. """ + + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " \ + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ This class is a specialized version of 'BmapCopy' which copies the + image to a block device. Unlike the base 'BmapCopy' class, this class does + various optimizations specific to block devices, e.g., switching to the + 'noop' I/O scheduler. """ + + def _open_destination_file(self): + """ Open the block device in exclusive mode. """ + + try: + self._f_dest = os.open(self._dest_path, os.O_WRONLY | os.O_EXCL) + except OSError as err: + raise Error("cannot open block device '%s' in exclusive mode: %s" \ + % (self._dest_path, err.strerror)) + + try: + os.fstat(self._f_dest).st_mode + except OSError as err: + raise Error("cannot access block device '%s': %s" \ + % (self._dest_path, err.strerror)) + + # Turn the block device file descriptor into a file object + try: + self._f_dest = os.fdopen(self._f_dest, "wb") + except OSError as err: + os.close(self._f_dest) + raise Error("cannot open block device '%s': %s" \ + % (self._dest_path, err)) + + self._f_dest_needs_close = True + + def _tune_block_device(self): + """" Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError: + # No problem, this is just an optimization. + return + + # The file contains a list of scheduler with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the current scheduler name + import re + + match = re.match(r'.*\[(.+)\].*', contents) + self._old_scheduler_value = match.group(1) + + # Limit the write buffering + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError: + return + + def _restore_bdev_settings(self): + """ Restore old block device settings which we changed in + '_tune_block_device()'. """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError: + # No problem, this is just an optimization. + return + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError: + return + + def copy(self, sync = True, verify = True): + """ The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. """ + + try: + self._tune_block_device() + BmapCopy.copy(self, sync, verify) + except: + self._restore_bdev_settings() + raise + + def __init__(self, image, dest, bmap = None): + """ The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap) + + self._batch_bytes = 1024 * 1024 + self._batch_blocks = self._batch_bytes / self.block_size + self._batch_queue_len = 6 + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known (i.e., it is not compressed) - check that + # it fits the block device. + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " \ + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " \ + "fit the block device '%s' which has %s capacity" \ + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" \ + % (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + try: + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + except OSError: + # No problem, this is just an optimization. + pass + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" diff --git a/tests/oldcodebase/BmapCopy2_0.py b/tests/oldcodebase/BmapCopy2_0.py new file mode 100644 index 0000000..f376af1 --- /dev/null +++ b/tests/oldcodebase/BmapCopy2_0.py @@ -0,0 +1,634 @@ +# pylint: disable-all + +""" This module implements copying of images with bmap and provides the +following API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. """ + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import stat +import sys +import hashlib +import datetime +from six import reraise +from six.moves import queue as Queue +from six.moves import _thread as thread +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = "1.0" + +class Error(Exception): + """ A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. """ + pass + +class BmapCopy: + """ This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file-like object of the destination file copy the image to + * full path or a file-like object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the SHA1 checksum while copying or not. Note, this is done only in + case of bmap-based copying and only if bmap contains the SHA1 checksums + (e.g., bmap version 1.0 did not have SHA1 checksums). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. """ + + def set_progress_indicator(self, file_obj, format_string): + """ Setup the progress indicator which shows how much data has been + copied in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ Set image size and initialize various other geometry-related + attributes. """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " \ + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _parse_bmap(self): + """ Parse the bmap file and initialize corresponding class instance + attributs. """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + raise Error("cannot parse the bmap file '%s' which should be a " \ + "proper XML file: %s" % (self._bmap_path, err)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + major = int(self.bmap_version.split('.', 1)[0]) + if major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " \ + "version %d is not supported" \ + % (SUPPORTED_BMAP_VERSION, major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " \ + "blocks count (%d bytes != %d blocks * %d bytes)" \ + % (self.image_size, self.blocks_cnt, self.block_size)) + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file-like object of the destination file to copy the + image to. + bmap - file-like object of the bmap file to use for copying. + image_size - size of the image in bytes. """ + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 2 + + self.bmap_version = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes / self.block_size + + def _update_progress(self, blocks_written): + """ Print the progress indicator if the mapped area size is known and + if the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. """ + + if not self._progress_file: + return + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds = 250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ This is a helper generator that parses the bmap XML file and for + each block range in the XML file it yields ('first', 'last', 'sha1') + tuples, where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'sha1' is the SHA1 checksum of the range ('None' is used if it is + missing. + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if 'sha1' in xml_element.attrib: + sha1 = xml_element.attrib['sha1'] + else: + sha1 = None + + yield (first, last, sha1) + + def _get_batches(self, first, last): + """ This is a helper generator which splits block ranges from the bmap + file to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ This is generator which reads the image file in '_batch_blocks' + chunks and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. """ + + try: + for (first, last, sha1) in self._get_block_ranges(): + if verify and sha1: + hash_obj = hashlib.new('sha1') + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " \ + "image file '%s': %s" \ + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and sha1: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and sha1 and hash_obj.hexdigest() != sha1: + raise Error("checksum mismatch for blocks range %d-%d: " \ + "calculated %s, should be %s (image file %s)" \ + % (first, last, hash_obj.hexdigest(), \ + sha1, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + reraise(exc_info[0], exc_info[1], exc_info[2]) + + self._batch_queue.put(None) + + def copy(self, sync = True, verify = True): + """ Copy the image to the destination file using bmap. The 'sync' + argument defines whether the destination file has to be synchronized + upon return. The 'verify' argument defines whether the SHA1 checksum + has to be verified while copying. """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[1].with_traceback(exc_info[2]) + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" \ + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " \ + "have %u - inconsistent bmap file '%s'" \ + % (blocks_written, self._image_path, self._dest_path, \ + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" \ + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ Synchronize the destination file to make sure all the data are + actually written to the disk. """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " \ + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ This class is a specialized version of 'BmapCopy' which copies the + image to a block device. Unlike the base 'BmapCopy' class, this class does + various optimizations specific to block devices, e.g., switching to the + 'noop' I/O scheduler. """ + + def _tune_block_device(self): + """" Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + # No problem, this is just an optimization + raise Error("cannot enable the 'noop' I/O scheduler: %s" % err) + + # The file contains a list of scheduler with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the current scheduler name + import re + + match = re.match(r'.*\[(.+)\].*', contents) + self._old_scheduler_value = match.group(1) + + # Limit the write buffering + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + raise Error("cannot set max. I/O ratio to '1': %s" % err) + + def _restore_bdev_settings(self): + """ Restore old block device settings which we changed in + '_tune_block_device()'. """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" \ + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" \ + % (self._old_max_ratio_value, err)) + + def copy(self, sync = True, verify = True): + """ The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size) + + self._batch_bytes = 1024 * 1024 + self._batch_blocks = self._batch_bytes / self.block_size + self._batch_queue_len = 6 + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " \ + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " \ + "fit the block device '%s' which has %s capacity" \ + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" \ + % (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" diff --git a/tests/oldcodebase/BmapCopy2_1.py b/tests/oldcodebase/BmapCopy2_1.py new file mode 100644 index 0000000..ee75ae8 --- /dev/null +++ b/tests/oldcodebase/BmapCopy2_1.py @@ -0,0 +1,633 @@ +# pylint: disable-all + +""" This module implements copying of images with bmap and provides the +following API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. """ + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import stat +import sys +import hashlib +import datetime +from six import reraise +from six.moves import queue as Queue +from six.moves import _thread as thread +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = "1.0" + +class Error(Exception): + """ A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. """ + pass + +class BmapCopy: + """ This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file-like object of the destination file copy the image to + * full path or a file-like object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the SHA1 checksum while copying or not. Note, this is done only in + case of bmap-based copying and only if bmap contains the SHA1 checksums + (e.g., bmap version 1.0 did not have SHA1 checksums). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. """ + + def set_progress_indicator(self, file_obj, format_string): + """ Setup the progress indicator which shows how much data has been + copied in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ Set image size and initialize various other geometry-related + attributes. """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " \ + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = (self.image_size + self.block_size - 1) // self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _parse_bmap(self): + """ Parse the bmap file and initialize corresponding class instance + attributs. """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + raise Error("cannot parse the bmap file '%s' which should be a " \ + "proper XML file: %s" % (self._bmap_path, err)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + major = int(self.bmap_version.split('.', 1)[0]) + if major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " \ + "version %d is not supported" \ + % (SUPPORTED_BMAP_VERSION, major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) // self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " \ + "blocks count (%d bytes != %d blocks * %d bytes)" \ + % (self.image_size, self.blocks_cnt, self.block_size)) + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file-like object of the destination file to copy the + image to. + bmap - file-like object of the bmap file to use for copying. + image_size - size of the image in bytes. """ + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 2 + + self.bmap_version = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes // self.block_size + + def _update_progress(self, blocks_written): + """ Print the progress indicator if the mapped area size is known and + if the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. """ + + if not self._progress_file: + return + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds = 250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ This is a helper generator that parses the bmap XML file and for + each block range in the XML file it yields ('first', 'last', 'sha1') + tuples, where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'sha1' is the SHA1 checksum of the range ('None' is used if it is + missing. + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if 'sha1' in xml_element.attrib: + sha1 = xml_element.attrib['sha1'] + else: + sha1 = None + + yield (first, last, sha1) + + def _get_batches(self, first, last): + """ This is a helper generator which splits block ranges from the bmap + file to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ This is generator which reads the image file in '_batch_blocks' + chunks and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. """ + + try: + for (first, last, sha1) in self._get_block_ranges(): + if verify and sha1: + hash_obj = hashlib.new('sha1') + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " \ + "image file '%s': %s" \ + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and sha1: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) // self.block_size + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and sha1 and hash_obj.hexdigest() != sha1: + raise Error("checksum mismatch for blocks range %d-%d: " \ + "calculated %s, should be %s (image file %s)" \ + % (first, last, hash_obj.hexdigest(), \ + sha1, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + reraise(exc_info[0], exc_info[1], exc_info[2]) + + self._batch_queue.put(None) + + def copy(self, sync = True, verify = True): + """ Copy the image to the destination file using bmap. The 'sync' + argument defines whether the destination file has to be synchronized + upon return. The 'verify' argument defines whether the SHA1 checksum + has to be verified while copying. """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[1].with_traceback(exc_info[2]) + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" \ + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " \ + "have %u - inconsistent bmap file '%s'" \ + % (blocks_written, self._image_path, self._dest_path, \ + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" \ + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ Synchronize the destination file to make sure all the data are + actually written to the disk. """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " \ + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ This class is a specialized version of 'BmapCopy' which copies the + image to a block device. Unlike the base 'BmapCopy' class, this class does + various optimizations specific to block devices, e.g., switching to the + 'noop' I/O scheduler. """ + + def _tune_block_device(self): + """" Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + # No problem, this is just an optimization + raise Error("cannot enable the 'noop' I/O scheduler: %s" % err) + + # The file contains a list of scheduler with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the current scheduler name + import re + + match = re.match(r'.*\[(.+)\].*', contents) + self._old_scheduler_value = match.group(1) + + # Limit the write buffering + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + raise Error("cannot set max. I/O ratio to '1': %s" % err) + + def _restore_bdev_settings(self): + """ Restore old block device settings which we changed in + '_tune_block_device()'. """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" \ + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" \ + % (self._old_max_ratio_value, err)) + + def copy(self, sync = True, verify = True): + """ The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size) + + self._batch_bytes = 1024 * 1024 + self._batch_blocks = self._batch_bytes // self.block_size + self._batch_queue_len = 6 + self._dest_fsync_watermark = (6 * 1024 * 1024) // self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " \ + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " \ + "fit the block device '%s' which has %s capacity" \ + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" \ + % (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" diff --git a/tests/oldcodebase/BmapCopy2_2.py b/tests/oldcodebase/BmapCopy2_2.py new file mode 100644 index 0000000..b5f537c --- /dev/null +++ b/tests/oldcodebase/BmapCopy2_2.py @@ -0,0 +1,635 @@ +# pylint: disable-all + +""" This module implements copying of images with bmap and provides the +following API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. """ + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import stat +import sys +import hashlib +import datetime +from six import reraise +from six.moves import queue as Queue +from six.moves import _thread as thread +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = "1.0" + +class Error(Exception): + """ A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. """ + pass + +class BmapCopy: + """ This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file-like object of the destination file copy the image to + * full path or a file-like object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the SHA1 checksum while copying or not. Note, this is done only in + case of bmap-based copying and only if bmap contains the SHA1 checksums + (e.g., bmap version 1.0 did not have SHA1 checksums). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. """ + + def set_progress_indicator(self, file_obj, format_string): + """ Setup the progress indicator which shows how much data has been + copied in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ Set image size and initialize various other geometry-related + attributes. """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " \ + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _parse_bmap(self): + """ Parse the bmap file and initialize corresponding class instance + attributs. """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + raise Error("cannot parse the bmap file '%s' which should be a " \ + "proper XML file: %s" % (self._bmap_path, err)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + major = int(self.bmap_version.split('.', 1)[0]) + if major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " \ + "version %d is not supported" \ + % (SUPPORTED_BMAP_VERSION, major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " \ + "blocks count (%d bytes != %d blocks * %d bytes)" \ + % (self.image_size, self.blocks_cnt, self.block_size)) + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file-like object of the destination file to copy the + image to. + bmap - file-like object of the bmap file to use for copying. + image_size - size of the image in bytes. """ + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 2 + + self.bmap_version = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes / self.block_size + + def _update_progress(self, blocks_written): + """ Print the progress indicator if the mapped area size is known and + if the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. """ + + if not self._progress_file: + return + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds = 250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') # pylint: disable=W1401 + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ This is a helper generator that parses the bmap XML file and for + each block range in the XML file it yields ('first', 'last', 'sha1') + tuples, where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'sha1' is the SHA1 checksum of the range ('None' is used if it is + missing. + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if 'sha1' in xml_element.attrib: + sha1 = xml_element.attrib['sha1'] + else: + sha1 = None + + yield (first, last, sha1) + + def _get_batches(self, first, last): + """ This is a helper generator which splits block ranges from the bmap + file to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ This is generator which reads the image file in '_batch_blocks' + chunks and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. """ + + try: + for (first, last, sha1) in self._get_block_ranges(): + if verify and sha1: + hash_obj = hashlib.new('sha1') + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " \ + "image file '%s': %s" \ + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and sha1: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and sha1 and hash_obj.hexdigest() != sha1: + raise Error("checksum mismatch for blocks range %d-%d: " \ + "calculated %s, should be %s (image file %s)" \ + % (first, last, hash_obj.hexdigest(), \ + sha1, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + reraise(exc_info[0], exc_info[1], exc_info[2]) + + self._batch_queue.put(None) + + def copy(self, sync = True, verify = True): + """ Copy the image to the destination file using bmap. The 'sync' + argument defines whether the destination file has to be synchronized + upon return. The 'verify' argument defines whether the SHA1 checksum + has to be verified while copying. """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[1].with_traceback(exc_info[2]) + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" \ + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " \ + "have %u - inconsistent bmap file '%s'" \ + % (blocks_written, self._image_path, self._dest_path, \ + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" \ + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ Synchronize the destination file to make sure all the data are + actually written to the disk. """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " \ + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ This class is a specialized version of 'BmapCopy' which copies the + image to a block device. Unlike the base 'BmapCopy' class, this class does + various optimizations specific to block devices, e.g., switching to the + 'noop' I/O scheduler. """ + + def _tune_block_device(self): + """" Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + # No problem, this is just an optimization + raise Error("cannot enable the 'noop' I/O scheduler: %s" % err) + + # The file contains a list of scheduler with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the current scheduler name + import re + + match = re.match(r'.*\[(.+)\].*', contents) + if match: + self._old_scheduler_value = match.group(1) + + # Limit the write buffering + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + raise Error("cannot set max. I/O ratio to '1': %s" % err) + + def _restore_bdev_settings(self): + """ Restore old block device settings which we changed in + '_tune_block_device()'. """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" \ + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" \ + % (self._old_max_ratio_value, err)) + + def copy(self, sync = True, verify = True): + """ The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size) + + self._batch_bytes = 1024 * 1024 + self._batch_blocks = self._batch_bytes / self.block_size + self._batch_queue_len = 6 + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " \ + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " \ + "fit the block device '%s' which has %s capacity" \ + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" \ + % (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" diff --git a/tests/oldcodebase/BmapCopy2_3.py b/tests/oldcodebase/BmapCopy2_3.py new file mode 100644 index 0000000..18393af --- /dev/null +++ b/tests/oldcodebase/BmapCopy2_3.py @@ -0,0 +1,670 @@ +# pylint: disable-all + +""" This module implements copying of images with bmap and provides the +following API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. """ + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import stat +import sys +import hashlib +import datetime +from six import reraise +from six.moves import queue as Queue +from six.moves import _thread as thread +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = "1.0" + +class Error(Exception): + """ A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. """ + pass + +class BmapCopy: + """ This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file object of the destination file copy the image to + * full path or a file object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the SHA1 checksum while copying or not. Note, this is done only in + case of bmap-based copying and only if bmap contains the SHA1 checksums + (e.g., bmap version 1.0 did not have SHA1 checksums). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. """ + + def set_progress_indicator(self, file_obj, format_string): + """ Setup the progress indicator which shows how much data has been + copied in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ Set image size and initialize various other geometry-related + attributes. """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " \ + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _verify_bmap_checksum(self): + """ This is a helper function which verifies SHA1 checksum of the bmap + file. """ + + import mmap + + correct_sha1 = self._xml.find("BmapFileSHA1").text.strip() + + # Before verifying the shecksum, we have to substitute the SHA1 value + # stored in the file with all zeroes. For these purposes we create + # private memory mapping of the bmap file. + mapped_bmap = mmap.mmap(self._f_bmap.fileno(), 0, + access = mmap.ACCESS_COPY) + + sha1_pos = mapped_bmap.find(correct_sha1) + assert sha1_pos != -1 + + mapped_bmap[sha1_pos:sha1_pos + 40] = '0' * 40 + calculated_sha1 = hashlib.sha1(mapped_bmap).hexdigest() + + mapped_bmap.close() + + if calculated_sha1 != correct_sha1: + raise Error("checksum mismatch for bmap file '%s': calculated " \ + "'%s', should be '%s'" % \ + (self._bmap_path, calculated_sha1, correct_sha1)) + + def _parse_bmap(self): + """ Parse the bmap file and initialize corresponding class instance + attributs. """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + raise Error("cannot parse the bmap file '%s' which should be a " \ + "proper XML file: %s" % (self._bmap_path, err)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + self.bmap_version_major = int(self.bmap_version.split('.', 1)[0]) + self.bmap_version_minor = int(self.bmap_version.split('.', 1)[1]) + if self.bmap_version_major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " \ + "version %d is not supported" \ + % (SUPPORTED_BMAP_VERSION, self.bmap_version_major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " \ + "blocks count (%d bytes != %d blocks * %d bytes)" \ + % (self.image_size, self.blocks_cnt, self.block_size)) + + if self.bmap_version_major >= 1 and self.bmap_version_minor >= 3: + # Bmap file checksum appeard in format 1.3 + self._verify_bmap_checksum() + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file object of the destination file to copy the image + to. + bmap - file object of the bmap file to use for copying. + image_size - size of the image in bytes. """ + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 2 + + self.bmap_version = None + self.bmap_version_major = None + self.bmap_version_minor = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes / self.block_size + + def _update_progress(self, blocks_written): + """ Print the progress indicator if the mapped area size is known and + if the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. """ + + if not self._progress_file: + return + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds = 250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') # pylint: disable=W1401 + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ This is a helper generator that parses the bmap XML file and for + each block range in the XML file it yields ('first', 'last', 'sha1') + tuples, where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'sha1' is the SHA1 checksum of the range ('None' is used if it is + missing. + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if 'sha1' in xml_element.attrib: + sha1 = xml_element.attrib['sha1'] + else: + sha1 = None + + yield (first, last, sha1) + + def _get_batches(self, first, last): + """ This is a helper generator which splits block ranges from the bmap + file to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ This is generator which reads the image file in '_batch_blocks' + chunks and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. """ + + try: + for (first, last, sha1) in self._get_block_ranges(): + if verify and sha1: + hash_obj = hashlib.new('sha1') + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " \ + "image file '%s': %s" \ + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and sha1: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and sha1 and hash_obj.hexdigest() != sha1: + raise Error("checksum mismatch for blocks range %d-%d: " \ + "calculated %s, should be %s (image file %s)" \ + % (first, last, hash_obj.hexdigest(), \ + sha1, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + reraise(exc_info[0], exc_info[1], exc_info[2]) + + self._batch_queue.put(None) + + def copy(self, sync = True, verify = True): + """ Copy the image to the destination file using bmap. The 'sync' + argument defines whether the destination file has to be synchronized + upon return. The 'verify' argument defines whether the SHA1 checksum + has to be verified while copying. """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[1].with_traceback(exc_info[2]) + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" \ + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " \ + "have %u - bmap file '%s' does not belong to this" \ + "image" \ + % (blocks_written, self._image_path, self._dest_path, \ + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" \ + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ Synchronize the destination file to make sure all the data are + actually written to the disk. """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " \ + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ This class is a specialized version of 'BmapCopy' which copies the + image to a block device. Unlike the base 'BmapCopy' class, this class does + various optimizations specific to block devices, e.g., switching to the + 'noop' I/O scheduler. """ + + def _tune_block_device(self): + """" Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + # No problem, this is just an optimization + raise Error("cannot enable the 'noop' I/O scheduler: %s" % err) + + # The file contains a list of scheduler with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the current scheduler name + import re + + match = re.match(r'.*\[(.+)\].*', contents) + if match: + self._old_scheduler_value = match.group(1) + + # Limit the write buffering + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + raise Error("cannot set max. I/O ratio to '1': %s" % err) + + def _restore_bdev_settings(self): + """ Restore old block device settings which we changed in + '_tune_block_device()'. """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" \ + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" \ + % (self._old_max_ratio_value, err)) + + def copy(self, sync = True, verify = True): + """ The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size) + + self._batch_bytes = 1024 * 1024 + self._batch_blocks = self._batch_bytes / self.block_size + self._batch_queue_len = 6 + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " \ + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " \ + "fit the block device '%s' which has %s capacity" \ + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" \ + % (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" diff --git a/tests/oldcodebase/BmapCopy2_4.py b/tests/oldcodebase/BmapCopy2_4.py new file mode 100644 index 0000000..18393af --- /dev/null +++ b/tests/oldcodebase/BmapCopy2_4.py @@ -0,0 +1,670 @@ +# pylint: disable-all + +""" This module implements copying of images with bmap and provides the +following API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. """ + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import stat +import sys +import hashlib +import datetime +from six import reraise +from six.moves import queue as Queue +from six.moves import _thread as thread +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = "1.0" + +class Error(Exception): + """ A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. """ + pass + +class BmapCopy: + """ This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file object of the destination file copy the image to + * full path or a file object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the SHA1 checksum while copying or not. Note, this is done only in + case of bmap-based copying and only if bmap contains the SHA1 checksums + (e.g., bmap version 1.0 did not have SHA1 checksums). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. """ + + def set_progress_indicator(self, file_obj, format_string): + """ Setup the progress indicator which shows how much data has been + copied in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ Set image size and initialize various other geometry-related + attributes. """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " \ + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _verify_bmap_checksum(self): + """ This is a helper function which verifies SHA1 checksum of the bmap + file. """ + + import mmap + + correct_sha1 = self._xml.find("BmapFileSHA1").text.strip() + + # Before verifying the shecksum, we have to substitute the SHA1 value + # stored in the file with all zeroes. For these purposes we create + # private memory mapping of the bmap file. + mapped_bmap = mmap.mmap(self._f_bmap.fileno(), 0, + access = mmap.ACCESS_COPY) + + sha1_pos = mapped_bmap.find(correct_sha1) + assert sha1_pos != -1 + + mapped_bmap[sha1_pos:sha1_pos + 40] = '0' * 40 + calculated_sha1 = hashlib.sha1(mapped_bmap).hexdigest() + + mapped_bmap.close() + + if calculated_sha1 != correct_sha1: + raise Error("checksum mismatch for bmap file '%s': calculated " \ + "'%s', should be '%s'" % \ + (self._bmap_path, calculated_sha1, correct_sha1)) + + def _parse_bmap(self): + """ Parse the bmap file and initialize corresponding class instance + attributs. """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + raise Error("cannot parse the bmap file '%s' which should be a " \ + "proper XML file: %s" % (self._bmap_path, err)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + self.bmap_version_major = int(self.bmap_version.split('.', 1)[0]) + self.bmap_version_minor = int(self.bmap_version.split('.', 1)[1]) + if self.bmap_version_major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " \ + "version %d is not supported" \ + % (SUPPORTED_BMAP_VERSION, self.bmap_version_major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " \ + "blocks count (%d bytes != %d blocks * %d bytes)" \ + % (self.image_size, self.blocks_cnt, self.block_size)) + + if self.bmap_version_major >= 1 and self.bmap_version_minor >= 3: + # Bmap file checksum appeard in format 1.3 + self._verify_bmap_checksum() + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file object of the destination file to copy the image + to. + bmap - file object of the bmap file to use for copying. + image_size - size of the image in bytes. """ + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 2 + + self.bmap_version = None + self.bmap_version_major = None + self.bmap_version_minor = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes / self.block_size + + def _update_progress(self, blocks_written): + """ Print the progress indicator if the mapped area size is known and + if the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. """ + + if not self._progress_file: + return + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds = 250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') # pylint: disable=W1401 + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ This is a helper generator that parses the bmap XML file and for + each block range in the XML file it yields ('first', 'last', 'sha1') + tuples, where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'sha1' is the SHA1 checksum of the range ('None' is used if it is + missing. + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if 'sha1' in xml_element.attrib: + sha1 = xml_element.attrib['sha1'] + else: + sha1 = None + + yield (first, last, sha1) + + def _get_batches(self, first, last): + """ This is a helper generator which splits block ranges from the bmap + file to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ This is generator which reads the image file in '_batch_blocks' + chunks and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. """ + + try: + for (first, last, sha1) in self._get_block_ranges(): + if verify and sha1: + hash_obj = hashlib.new('sha1') + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " \ + "image file '%s': %s" \ + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and sha1: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and sha1 and hash_obj.hexdigest() != sha1: + raise Error("checksum mismatch for blocks range %d-%d: " \ + "calculated %s, should be %s (image file %s)" \ + % (first, last, hash_obj.hexdigest(), \ + sha1, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + reraise(exc_info[0], exc_info[1], exc_info[2]) + + self._batch_queue.put(None) + + def copy(self, sync = True, verify = True): + """ Copy the image to the destination file using bmap. The 'sync' + argument defines whether the destination file has to be synchronized + upon return. The 'verify' argument defines whether the SHA1 checksum + has to be verified while copying. """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[1].with_traceback(exc_info[2]) + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" \ + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " \ + "have %u - bmap file '%s' does not belong to this" \ + "image" \ + % (blocks_written, self._image_path, self._dest_path, \ + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" \ + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ Synchronize the destination file to make sure all the data are + actually written to the disk. """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " \ + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ This class is a specialized version of 'BmapCopy' which copies the + image to a block device. Unlike the base 'BmapCopy' class, this class does + various optimizations specific to block devices, e.g., switching to the + 'noop' I/O scheduler. """ + + def _tune_block_device(self): + """" Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + # No problem, this is just an optimization + raise Error("cannot enable the 'noop' I/O scheduler: %s" % err) + + # The file contains a list of scheduler with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the current scheduler name + import re + + match = re.match(r'.*\[(.+)\].*', contents) + if match: + self._old_scheduler_value = match.group(1) + + # Limit the write buffering + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + raise Error("cannot set max. I/O ratio to '1': %s" % err) + + def _restore_bdev_settings(self): + """ Restore old block device settings which we changed in + '_tune_block_device()'. """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" \ + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" \ + % (self._old_max_ratio_value, err)) + + def copy(self, sync = True, verify = True): + """ The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size) + + self._batch_bytes = 1024 * 1024 + self._batch_blocks = self._batch_bytes / self.block_size + self._batch_queue_len = 6 + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " \ + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " \ + "fit the block device '%s' which has %s capacity" \ + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" \ + % (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" diff --git a/tests/oldcodebase/BmapCopy2_5.py b/tests/oldcodebase/BmapCopy2_5.py new file mode 100644 index 0000000..da6bab6 --- /dev/null +++ b/tests/oldcodebase/BmapCopy2_5.py @@ -0,0 +1,727 @@ +# pylint: disable-all + +# Copyright (c) 2012-2013 Intel, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This module implements copying of images with bmap and provides the following +API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. +""" + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import stat +import sys +import hashlib +import logging +import datetime +from six import reraise +from six.moves import queue as Queue +from six.moves import _thread as thread +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = "1.0" + +class Error(Exception): + """ + A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. + """ + pass + +class BmapCopy: + """ + This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file object of the destination file copy the image to + * full path or a file object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the SHA1 checksum while copying or not. Note, this is done only in + case of bmap-based copying and only if bmap contains the SHA1 checksums + (e.g., bmap version 1.0 did not have SHA1 checksums). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. + """ + + def set_progress_indicator(self, file_obj, format_string): + """ + Setup the progress indicator which shows how much data has been copied + in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. + """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ + Set image size and initialize various other geometry-related attributes. + """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _verify_bmap_checksum(self): + """ + This is a helper function which verifies SHA1 checksum of the bmap file. + """ + + import mmap + + correct_sha1 = self._xml.find("BmapFileSHA1").text.strip() + + # Before verifying the shecksum, we have to substitute the SHA1 value + # stored in the file with all zeroes. For these purposes we create + # private memory mapping of the bmap file. + mapped_bmap = mmap.mmap(self._f_bmap.fileno(), 0, + access = mmap.ACCESS_COPY) + + sha1_pos = mapped_bmap.find(correct_sha1) + assert sha1_pos != -1 + + mapped_bmap[sha1_pos:sha1_pos + 40] = '0' * 40 + calculated_sha1 = hashlib.sha1(mapped_bmap).hexdigest() + + mapped_bmap.close() + + if calculated_sha1 != correct_sha1: + raise Error("checksum mismatch for bmap file '%s': calculated " + "'%s', should be '%s'" + % (self._bmap_path, calculated_sha1, correct_sha1)) + + def _parse_bmap(self): + """ + Parse the bmap file and initialize corresponding class instance attributs. + """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + raise Error("cannot parse the bmap file '%s' which should be a " + "proper XML file: %s" % (self._bmap_path, err)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + self.bmap_version_major = int(self.bmap_version.split('.', 1)[0]) + self.bmap_version_minor = int(self.bmap_version.split('.', 1)[1]) + if self.bmap_version_major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " + "version %d is not supported" + % (SUPPORTED_BMAP_VERSION, self.bmap_version_major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " + "blocks count (%d bytes != %d blocks * %d bytes)" + % (self.image_size, self.blocks_cnt, self.block_size)) + + if self.bmap_version_major >= 1 and self.bmap_version_minor >= 3: + # Bmap file checksum appeard in format 1.3 + self._verify_bmap_checksum() + + def __init__(self, image, dest, bmap=None, image_size=None, logger=None): + """ + The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file object of the destination file to copy the image + to. + bmap - file object of the bmap file to use for copying. + image_size - size of the image in bytes. + logger - the logger object to use for printing messages. + """ + + self._logger = logger + if self._logger is None: + self._logger = logging.getLogger(__name__) + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 2 + + self.bmap_version = None + self.bmap_version_major = None + self.bmap_version_minor = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes / self.block_size + + def _update_progress(self, blocks_written): + """ + Print the progress indicator if the mapped area size is known and if + the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. + """ + + if not self._progress_file: + return + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds=250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') # pylint: disable=W1401 + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ + This is a helper generator that parses the bmap XML file and for each + block range in the XML file it yields ('first', 'last', 'sha1') tuples, + where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'sha1' is the SHA1 checksum of the range ('None' is used if it is + missing. + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. + """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if 'sha1' in xml_element.attrib: + sha1 = xml_element.attrib['sha1'] + else: + sha1 = None + + yield (first, last, sha1) + + def _get_batches(self, first, last): + """ + This is a helper generator which splits block ranges from the bmap file + to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). + """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ + This is generator which reads the image file in '_batch_blocks' chunks + and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. + """ + + try: + for (first, last, sha1) in self._get_block_ranges(): + if verify and sha1: + hash_obj = hashlib.new('sha1') + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " + "image file '%s': %s" + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and sha1: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and sha1 and hash_obj.hexdigest() != sha1: + raise Error("checksum mismatch for blocks range %d-%d: " + "calculated %s, should be %s (image file %s)" + % (first, last, hash_obj.hexdigest(), + sha1, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + reraise(exc_info[0], exc_info[1], exc_info[2]) + + self._batch_queue.put(None) + + def copy(self, sync=True, verify=True): + """ + Copy the image to the destination file using bmap. The 'sync' argument + defines whether the destination file has to be synchronized upon + return. The 'verify' argument defines whether the SHA1 checksum has to + be verified while copying. + """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[1].with_traceback(exc_info[2]) + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " + "have %u - bmap file '%s' does not belong to this" + "image" + % (blocks_written, self._image_path, self._dest_path, + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ + Synchronize the destination file to make sure all the data are actually + written to the disk. + """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ + This class is a specialized version of 'BmapCopy' which copies the image to + a block device. Unlike the base 'BmapCopy' class, this class does various + optimizations specific to block devices, e.g., switching to the 'noop' I/O + scheduler. + """ + + def _tune_block_device(self): + """ + Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + self._logger.warning("failed to enable I/O optimization, expect " + "suboptimal speed (reason: cannot switch " + "to the 'noop' I/O scheduler: %s)" % err) + else: + # The file contains a list of schedulers with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the name of the current scheduler. + import re + + match = re.match(r'.*\[(.+)\].*', contents) + if match: + self._old_scheduler_value = match.group(1) + + # Limit the write buffering, because we do not need too much of it when + # writing sequntially. Excessive buffering makes some systems not very + # responsive, e.g., this was observed in Fedora 17. + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + self._logger.warning("failed to disable excessive buffering, " + "expect worse system responsiveness " + "(reason: cannot set max. I/O ratio to " + "1: %s)" % err) + + def _restore_bdev_settings(self): + """ + Restore old block device settings which we changed in + '_tune_block_device()'. + """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" + % (self._old_max_ratio_value, err)) + + def copy(self, sync=True, verify=True): + """ + The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. + """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() + + def __init__(self, image, dest, bmap=None, image_size=None, logger=None): + """ + The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. + """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size, logger=logger) + + self._batch_bytes = 1024 * 1024 + self._batch_blocks = self._batch_bytes / self.block_size + self._batch_queue_len = 6 + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " + "fit the block device '%s' which has %s capacity" + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" \ + % (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" diff --git a/tests/oldcodebase/BmapCopy2_6.py b/tests/oldcodebase/BmapCopy2_6.py new file mode 100644 index 0000000..d08dd51 --- /dev/null +++ b/tests/oldcodebase/BmapCopy2_6.py @@ -0,0 +1,727 @@ +# pylint: disable-all + +# Copyright (c) 2012-2013 Intel, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This module implements copying of images with bmap and provides the following +API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. +""" + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import stat +import sys +import hashlib +import logging +import datetime +from six import reraise +from six.moves import queue as Queue +from six.moves import _thread as thread +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = "1.0" + +class Error(Exception): + """ + A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. + """ + pass + +class BmapCopy: + """ + This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file object of the destination file copy the image to + * full path or a file object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the SHA1 checksum while copying or not. Note, this is done only in + case of bmap-based copying and only if bmap contains the SHA1 checksums + (e.g., bmap version 1.0 did not have SHA1 checksums). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. + """ + + def __init__(self, image, dest, bmap=None, image_size=None, logger=None): + """ + The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file object of the destination file to copy the image + to. + bmap - file object of the bmap file to use for copying. + image_size - size of the image in bytes. + logger - the logger object to use for printing messages. + """ + + self._logger = logger + if self._logger is None: + self._logger = logging.getLogger(__name__) + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 2 + + self.bmap_version = None + self.bmap_version_major = None + self.bmap_version_minor = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes / self.block_size + + def set_progress_indicator(self, file_obj, format_string): + """ + Setup the progress indicator which shows how much data has been copied + in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. + """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ + Set image size and initialize various other geometry-related attributes. + """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _verify_bmap_checksum(self): + """ + This is a helper function which verifies SHA1 checksum of the bmap file. + """ + + import mmap + + correct_sha1 = self._xml.find("BmapFileSHA1").text.strip() + + # Before verifying the shecksum, we have to substitute the SHA1 value + # stored in the file with all zeroes. For these purposes we create + # private memory mapping of the bmap file. + mapped_bmap = mmap.mmap(self._f_bmap.fileno(), 0, + access = mmap.ACCESS_COPY) + + sha1_pos = mapped_bmap.find(correct_sha1) + assert sha1_pos != -1 + + mapped_bmap[sha1_pos:sha1_pos + 40] = '0' * 40 + calculated_sha1 = hashlib.sha1(mapped_bmap).hexdigest() + + mapped_bmap.close() + + if calculated_sha1 != correct_sha1: + raise Error("checksum mismatch for bmap file '%s': calculated " + "'%s', should be '%s'" + % (self._bmap_path, calculated_sha1, correct_sha1)) + + def _parse_bmap(self): + """ + Parse the bmap file and initialize corresponding class instance attributs. + """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + raise Error("cannot parse the bmap file '%s' which should be a " + "proper XML file: %s" % (self._bmap_path, err)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + self.bmap_version_major = int(self.bmap_version.split('.', 1)[0]) + self.bmap_version_minor = int(self.bmap_version.split('.', 1)[1]) + if self.bmap_version_major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " + "version %d is not supported" + % (SUPPORTED_BMAP_VERSION, self.bmap_version_major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " + "blocks count (%d bytes != %d blocks * %d bytes)" + % (self.image_size, self.blocks_cnt, self.block_size)) + + if self.bmap_version_major >= 1 and self.bmap_version_minor >= 3: + # Bmap file checksum appeard in format 1.3 + self._verify_bmap_checksum() + + def _update_progress(self, blocks_written): + """ + Print the progress indicator if the mapped area size is known and if + the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. + """ + + if not self._progress_file: + return + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds=250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') # pylint: disable=W1401 + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ + This is a helper generator that parses the bmap XML file and for each + block range in the XML file it yields ('first', 'last', 'sha1') tuples, + where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'sha1' is the SHA1 checksum of the range ('None' is used if it is + missing. + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. + """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if 'sha1' in xml_element.attrib: + sha1 = xml_element.attrib['sha1'] + else: + sha1 = None + + yield (first, last, sha1) + + def _get_batches(self, first, last): + """ + This is a helper generator which splits block ranges from the bmap file + to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). + """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ + This is generator which reads the image file in '_batch_blocks' chunks + and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. + """ + + try: + for (first, last, sha1) in self._get_block_ranges(): + if verify and sha1: + hash_obj = hashlib.new('sha1') + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " + "image file '%s': %s" + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and sha1: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and sha1 and hash_obj.hexdigest() != sha1: + raise Error("checksum mismatch for blocks range %d-%d: " + "calculated %s, should be %s (image file %s)" + % (first, last, hash_obj.hexdigest(), + sha1, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + reraise(exc_info[0], exc_info[1], exc_info[2]) + + self._batch_queue.put(None) + + def copy(self, sync=True, verify=True): + """ + Copy the image to the destination file using bmap. The 'sync' argument + defines whether the destination file has to be synchronized upon + return. The 'verify' argument defines whether the SHA1 checksum has to + be verified while copying. + """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[1].with_traceback(exc_info[2]) + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " + "have %u - bmap file '%s' does not belong to this " + "image" + % (blocks_written, self._image_path, self._dest_path, + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ + Synchronize the destination file to make sure all the data are actually + written to the disk. + """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ + This class is a specialized version of 'BmapCopy' which copies the image to + a block device. Unlike the base 'BmapCopy' class, this class does various + optimizations specific to block devices, e.g., switching to the 'noop' I/O + scheduler. + """ + + def __init__(self, image, dest, bmap=None, image_size=None, logger=None): + """ + The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. + """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size, logger=logger) + + self._batch_bytes = 1024 * 1024 + self._batch_blocks = self._batch_bytes / self.block_size + self._batch_queue_len = 6 + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " + "fit the block device '%s' which has %s capacity" + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" \ + % (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" + + def _tune_block_device(self): + """ + Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + self._logger.warning("failed to enable I/O optimization, expect " + "suboptimal speed (reason: cannot switch " + "to the 'noop' I/O scheduler: %s)" % err) + else: + # The file contains a list of schedulers with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the name of the current scheduler. + import re + + match = re.match(r'.*\[(.+)\].*', contents) + if match: + self._old_scheduler_value = match.group(1) + + # Limit the write buffering, because we do not need too much of it when + # writing sequntially. Excessive buffering makes some systems not very + # responsive, e.g., this was observed in Fedora 17. + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + self._logger.warning("failed to disable excessive buffering, " + "expect worse system responsiveness " + "(reason: cannot set max. I/O ratio to " + "1: %s)" % err) + + def _restore_bdev_settings(self): + """ + Restore old block device settings which we changed in + '_tune_block_device()'. + """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" + % (self._old_max_ratio_value, err)) + + def copy(self, sync=True, verify=True): + """ + The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. + """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() diff --git a/tests/oldcodebase/BmapCopy3_0.py b/tests/oldcodebase/BmapCopy3_0.py new file mode 100644 index 0000000..51c9460 --- /dev/null +++ b/tests/oldcodebase/BmapCopy3_0.py @@ -0,0 +1,767 @@ +# pylint: disable-all + +# Copyright (c) 2012-2013 Intel, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This module implements copying of images with bmap and provides the following +API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. +""" + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import stat +import sys +import hashlib +import logging +import datetime +from six import reraise +from six.moves import queue as Queue +from six.moves import _thread as thread +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = "1.0" + +class Error(Exception): + """ + A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. + """ + pass + +class BmapCopy: + """ + This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file object of the destination file copy the image to + * full path or a file object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the checksum while copying or not. Note, this is done only in case + of bmap-based copying and only if bmap contains checksums (e.g., bmap + version 1.0 did not have checksums support). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. + """ + + def __init__(self, image, dest, bmap=None, image_size=None, log=None): + """ + The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file object of the destination file to copy the image + to. + bmap - file object of the bmap file to use for copying. + image_size - size of the image in bytes. + log - the logger object to use for printing messages. + """ + + self._log = log + if self._log is None: + self._log = logging.getLogger(__name__) + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 6 + + self.bmap_version = None + self.bmap_version_major = None + self.bmap_version_minor = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # The bmap file checksum type and length + self._cs_type = None + self._cs_len = None + self._cs_attrib_name = None + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes / self.block_size + + def set_progress_indicator(self, file_obj, format_string): + """ + Setup the progress indicator which shows how much data has been copied + in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. + """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ + Set image size and initialize various other geometry-related attributes. + """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _verify_bmap_checksum(self): + """ + This is a helper function which verifies the bmap file checksum. + """ + + import mmap + + if self.bmap_version_minor == 3: + correct_chksum = self._xml.find("BmapFileSHA1").text.strip() + else: + correct_chksum = self._xml.find("BmapFileChecksum").text.strip() + + # Before verifying the shecksum, we have to substitute the checksum + # value stored in the file with all zeroes. For these purposes we + # create private memory mapping of the bmap file. + mapped_bmap = mmap.mmap(self._f_bmap.fileno(), 0, + access = mmap.ACCESS_COPY) + + chksum_pos = mapped_bmap.find(correct_chksum) + assert chksum_pos != -1 + + mapped_bmap[chksum_pos:chksum_pos + self._cs_len] = '0' * self._cs_len + + hash_obj = hashlib.new(self._cs_type) + hash_obj.update(mapped_bmap) + calculated_chksum = hash_obj.hexdigest() + + mapped_bmap.close() + + if calculated_chksum != correct_chksum: + raise Error("checksum mismatch for bmap file '%s': calculated " + "'%s', should be '%s'" + % (self._bmap_path, calculated_chksum, correct_chksum)) + + def _parse_bmap(self): + """ + Parse the bmap file and initialize corresponding class instance attributs. + """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + # Extrace the erroneous line with some context + self._f_bmap.seek(0) + xml_extract = "" + for num, line in enumerate(self._f_bmap): + if num >= err.position[0] - 4 and num <= err.position[0] + 4: + xml_extract += "Line %d: %s" % (num, line) + + raise Error("cannot parse the bmap file '%s' which should be a " + "proper XML file: %s, the XML extract:\n%s" % + (self._bmap_path, err, xml_extract)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + self.bmap_version_major = int(self.bmap_version.split('.', 1)[0]) + self.bmap_version_minor = int(self.bmap_version.split('.', 1)[1]) + if self.bmap_version_major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " + "version %d is not supported" + % (SUPPORTED_BMAP_VERSION, self.bmap_version_major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " + "blocks count (%d bytes != %d blocks * %d bytes)" + % (self.image_size, self.blocks_cnt, self.block_size)) + + if self.bmap_version_major >= 1 and self.bmap_version_minor >= 3: + # Bmap file checksum appeard in format 1.3 and the only supported + # checksum type was SHA1. Version 1.4 started supporting arbitrary + # checksum types. A new "ChecksumType" tag was introduce to specify + # the checksum function name. And all XML tags which contained + # "sha1" in their name were renamed to something more neutral. + if self.bmap_version_minor == 3: + self._cs_type = "sha1" + self._cs_attrib_name = "sha1" + else: + self._cs_type = xml.find("ChecksumType").text.strip() + self._cs_attrib_name = "chksum" + + try: + self._cs_len = len(hashlib.new(self._cs_type).hexdigest()) + except ValueError as err: + raise Error("cannot initialize hash function \"%s\": %s" % + (self._cs_type, err)) + self._verify_bmap_checksum() + + def _update_progress(self, blocks_written): + """ + Print the progress indicator if the mapped area size is known and if + the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. + """ + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + self._log.debug("wrote %d blocks out of %d (%d%%)" % + (blocks_written, self.mapped_cnt, percent)) + else: + self._log.debug("wrote %d blocks" % blocks_written) + + if not self._progress_file: + return + + if self.mapped_cnt: + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds=250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') # pylint: disable=W1401 + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ + This is a helper generator that parses the bmap XML file and for each + block range in the XML file it yields ('first', 'last', 'chksum') + tuples, where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'chksum' is the checksum of the range ('None' is used if it is + missing). + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. + """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if self._cs_attrib_name in xml_element.attrib: + chksum = xml_element.attrib[self._cs_attrib_name] + else: + chksum = None + + yield (first, last, chksum) + + def _get_batches(self, first, last): + """ + This is a helper generator which splits block ranges from the bmap file + to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). + """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ + This is generator which reads the image file in '_batch_blocks' chunks + and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. + """ + + try: + for (first, last, chksum) in self._get_block_ranges(): + if verify and chksum: + hash_obj = hashlib.new(self._cs_type) + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " + "image file '%s': %s" + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and chksum: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._log.debug("queueing %d blocks, queue length is %d" % + (blocks, self._batch_queue.qsize())) + + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and chksum and hash_obj.hexdigest() != chksum: + raise Error("checksum mismatch for blocks range %d-%d: " + "calculated %s, should be %s (image file %s)" + % (first, last, hash_obj.hexdigest(), + chksum, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + reraise(exc_info[0], exc_info[1], exc_info[2]) + + self._batch_queue.put(None) + + def copy(self, sync=True, verify=True): + """ + Copy the image to the destination file using bmap. The 'sync' argument + defines whether the destination file has to be synchronized upon + return. The 'verify' argument defines whether the checksum has to be + verified while copying. + """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[1].with_traceback(exc_info[2]) + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " + "have %u - bmap file '%s' does not belong to this " + "image" + % (blocks_written, self._image_path, self._dest_path, + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ + Synchronize the destination file to make sure all the data are actually + written to the disk. + """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ + This class is a specialized version of 'BmapCopy' which copies the image to + a block device. Unlike the base 'BmapCopy' class, this class does various + optimizations specific to block devices, e.g., switching to the 'noop' I/O + scheduler. + """ + + def __init__(self, image, dest, bmap=None, image_size=None, log=None): + """ + The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. + """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size, log=log) + + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " + "fit the block device '%s' which has %s capacity" + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" % \ + (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" + + def _tune_block_device(self): + """ + Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + self._log.warning("failed to enable I/O optimization, expect " + "suboptimal speed (reason: cannot switch " + "to the 'noop' I/O scheduler: %s)" % err) + else: + # The file contains a list of schedulers with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the name of the current scheduler. + import re + + match = re.match(r'.*\[(.+)\].*', contents) + if match: + self._old_scheduler_value = match.group(1) + + # Limit the write buffering, because we do not need too much of it when + # writing sequntially. Excessive buffering makes some systems not very + # responsive, e.g., this was observed in Fedora 17. + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + self._log.warning("failed to disable excessive buffering, expect " + "worse system responsiveness (reason: cannot set " + "max. I/O ratio to 1: %s)" % err) + + def _restore_bdev_settings(self): + """ + Restore old block device settings which we changed in + '_tune_block_device()'. + """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" + % (self._old_max_ratio_value, err)) + + def copy(self, sync=True, verify=True): + """ + The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. + """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() diff --git a/tests/oldcodebase/__init__.py b/tests/oldcodebase/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test-data/test.image.bmap.v1.2 b/tests/test-data/test.image.bmap.v1.2 new file mode 100644 index 0000000..9764bbb --- /dev/null +++ b/tests/test-data/test.image.bmap.v1.2 @@ -0,0 +1,90 @@ + + + + + + 821752 + + + 4096 + + + 201 + + + + 0-1 + 3-5 + 9-10 + 12 + 15-18 + 20 + 22 + 24 + 30-32 + 34-35 + 40 + 42-43 + 45 + 47 + 49-50 + 52-53 + 55-56 + 60-63 + 65-67 + 70 + 72 + 78-80 + 82-83 + 85 + 88 + 90-91 + 96 + 98-105 + 111 + 114-116 + 119-133 + 135 + 137 + 140 + 142-144 + 146-147 + 150-151 + 155 + 157 + 159-160 + 163-174 + 177 + 181-186 + 188-189 + 191 + 193 + 195 + 198-199 + + + + 117 + diff --git a/tests/test-data/test.image.bmap.v1.3 b/tests/test-data/test.image.bmap.v1.3 new file mode 100644 index 0000000..5c20964 --- /dev/null +++ b/tests/test-data/test.image.bmap.v1.3 @@ -0,0 +1,94 @@ + + + + + + 821752 + + + 4096 + + + 201 + + + 117 + + + e235f7cd0c6b8c07a2e6f2538510fb763e7790a6 + + + + 0-1 + 3-5 + 9-10 + 12 + 15-18 + 20 + 22 + 24 + 30-32 + 34-35 + 40 + 42-43 + 45 + 47 + 49-50 + 52-53 + 55-56 + 60-63 + 65-67 + 70 + 72 + 78-80 + 82-83 + 85 + 88 + 90-91 + 96 + 98-105 + 111 + 114-116 + 119-133 + 135 + 137 + 140 + 142-144 + 146-147 + 150-151 + 155 + 157 + 159-160 + 163-174 + 177 + 181-186 + 188-189 + 191 + 193 + 195 + 198-199 + + diff --git a/tests/test-data/test.image.bmap.v1.4 b/tests/test-data/test.image.bmap.v1.4 new file mode 100644 index 0000000..b1efaea --- /dev/null +++ b/tests/test-data/test.image.bmap.v1.4 @@ -0,0 +1,97 @@ + + + + + + 821752 + + + 4096 + + + 201 + + + 117 + + + sha256 + + + 4310fd457a88d307abeeb593a7888e1fa3cae0cfc01d905158967c904c5375e5 + + + + 0-1 + 3-5 + 9-10 + 12 + 15-18 + 20 + 22 + 24 + 30-32 + 34-35 + 40 + 42-43 + 45 + 47 + 49-50 + 52-53 + 55-56 + 60-63 + 65-67 + 70 + 72 + 78-80 + 82-83 + 85 + 88 + 90-91 + 96 + 98-105 + 111 + 114-116 + 119-133 + 135 + 137 + 140 + 142-144 + 146-147 + 150-151 + 155 + 157 + 159-160 + 163-174 + 177 + 181-186 + 188-189 + 191 + 193 + 195 + 198-199 + + diff --git a/tests/test-data/test.image.bmap.v2.0 b/tests/test-data/test.image.bmap.v2.0 new file mode 100644 index 0000000..a9df54d --- /dev/null +++ b/tests/test-data/test.image.bmap.v2.0 @@ -0,0 +1,97 @@ + + + + + + 821752 + + + 4096 + + + 201 + + + 117 + + + sha256 + + + d9cf7d44790d04fcbb089c5eeec7700e9233439ab6e4bd759035906e20f90070 + + + + 0-1 + 3-5 + 9-10 + 12 + 15-18 + 20 + 22 + 24 + 30-32 + 34-35 + 40 + 42-43 + 45 + 47 + 49-50 + 52-53 + 55-56 + 60-63 + 65-67 + 70 + 72 + 78-80 + 82-83 + 85 + 88 + 90-91 + 96 + 98-105 + 111 + 114-116 + 119-133 + 135 + 137 + 140 + 142-144 + 146-147 + 150-151 + 155 + 157 + 159-160 + 163-174 + 177 + 181-186 + 188-189 + 191 + 193 + 195 + 198-199 + + diff --git a/tests/test-data/test.image.gz b/tests/test-data/test.image.gz new file mode 100644 index 0000000..66d7341 Binary files /dev/null and b/tests/test-data/test.image.gz differ diff --git a/tests/test_api_base.py b/tests/test_api_base.py new file mode 100644 index 0000000..dfad251 --- /dev/null +++ b/tests/test_api_base.py @@ -0,0 +1,259 @@ +# -*- coding: utf-8 -*- +# vim: ts=4 sw=4 et ai si +# +# Copyright (c) 2012-2014 Intel, Inc. +# License: GPLv2 +# Author: Artem Bityutskiy +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This test verifies the base bmap creation and copying API functionality. It +generates a random sparse file, then creates a bmap fir this file and copies it +to a different file using the bmap. Then it compares the original random sparse +file and the copy and verifies that they are identical. +""" + +# Disable the following pylint recommendations: +# * Too many public methods (R0904) +# * Too many local variables (R0914) +# * Too many statements (R0915) +# pylint: disable=R0904 +# pylint: disable=R0914 +# pylint: disable=R0915 + +import os +import sys +import tempfile +import filecmp +import subprocess +from six.moves import zip_longest +from tests import helpers +from bmaptools import BmapHelpers, BmapCreate, Filemap + +# This is a work-around for Centos 6 +try: + import unittest2 as unittest # pylint: disable=F0401 +except ImportError: + import unittest + + +class Error(Exception): + """A class for exceptions generated by this test.""" + pass + + +def _compare_holes(file1, file2): + """ + Make sure that files 'file1' and 'file2' have holes at the same places. + The 'file1' and 'file2' arguments may be full file paths or file objects. + """ + + filemap1 = Filemap.filemap(file1) + filemap2 = Filemap.filemap(file2) + + iterator1 = filemap1.get_unmapped_ranges(0, filemap1.blocks_cnt) + iterator2 = filemap2.get_unmapped_ranges(0, filemap2.blocks_cnt) + + iterator = zip_longest(iterator1, iterator2) + for range1, range2 in iterator: + if range1 != range2: + raise Error("mismatch for hole %d-%d, it is %d-%d in file2" + % (range1[0], range1[1], range2[0], range2[1])) + + +def _generate_compressed_files(file_path, delete=True): + """ + This is a generator which yields compressed versions of a file + 'file_path'. + + The 'delete' argument specifies whether the compressed files that this + generator yields have to be automatically deleted. + """ + + # Make sure the temporary files start with the same name as 'file_obj' in + # order to simplify debugging. + prefix = os.path.splitext(os.path.basename(file_path))[0] + '.' + # Put the temporary files in the directory with 'file_obj' + directory = os.path.dirname(file_path) + + compressors = [("bzip2", None, ".bz2", "-c -k"), + ("pbzip2", None, ".p.bz2", "-c -k"), + ("gzip", None, ".gz", "-c"), + ("pigz", None, ".p.gz", "-c -k"), + ("xz", None, ".xz", "-c -k"), + ("lzop", None, ".lzo", "-c -k"), + ("lz4", None, ".lz4", "-c -k"), + # The "-P -C /" trick is used to avoid silly warnings: + # "tar: Removing leading `/' from member names" + ("bzip2", "tar", ".tar.bz2", "-c -j -O -P -C /"), + ("gzip", "tar", ".tar.gz", "-c -z -O -P -C /"), + ("xz", "tar", ".tar.xz", "-c -J -O -P -C /"), + ("lzop", "tar", ".tar.lzo", "-c --lzo -O -P -C /"), + ("lz4", "tar", ".tar.lz4", "-c -Ilz4 -O -P -C /"), + ("zip", None, ".zip", "-q -j -")] + + for decompressor, archiver, suffix, options in compressors: + if not BmapHelpers.program_is_available(decompressor): + continue + if archiver and not BmapHelpers.program_is_available(archiver): + continue + + tmp_file_obj = tempfile.NamedTemporaryFile('wb+', prefix=prefix, + delete=delete, dir=directory, + suffix=suffix) + + if archiver: + args = archiver + " " + options + " " + file_path + else: + args = decompressor + " " + options + " " + file_path + child_process = subprocess.Popen(args, shell=True, + stderr=subprocess.PIPE, + stdout=tmp_file_obj) + child_process.wait() + tmp_file_obj.flush() + yield tmp_file_obj.name + tmp_file_obj.close() + + +def _do_test(image, image_size, delete=True): + """ + A basic test for the bmap creation and copying functionality. It first + generates a bmap for file 'image', and then copies the sparse file to a + different file, and then checks that the original file and the copy are + identical. + + The 'image_size' argument is size of the image in bytes. The 'delete' + argument specifies whether the temporary files that this function creates + have to be automatically deleted. + """ + + # Make sure the temporary files start with the same name as 'image' in + # order to simplify debugging. + prefix = os.path.splitext(os.path.basename(image))[0] + '.' + # Put the temporary files in the directory with the image + directory = os.path.dirname(image) + + # Create and open a temporary file for a copy of the image + f_copy = tempfile.NamedTemporaryFile("wb+", prefix=prefix, + delete=delete, dir=directory, + suffix=".copy") + + # Create and open 2 temporary files for the bmap + f_bmap1 = tempfile.NamedTemporaryFile("w+", prefix=prefix, + delete=delete, dir=directory, + suffix=".bmap1") + f_bmap2 = tempfile.NamedTemporaryFile("w+", prefix=prefix, + delete=delete, dir=directory, + suffix=".bmap2") + + image_chksum = helpers.calculate_chksum(image) + + # + # Pass 1: generate the bmap, copy and compare + # + + # Create bmap for the random sparse file + creator = BmapCreate.BmapCreate(image, f_bmap1.name) + creator.generate() + + helpers.copy_and_verify_image(image, f_copy.name, f_bmap1.name, + image_chksum, image_size) + + # Make sure that holes in the copy are identical to holes in the random + # sparse file. + _compare_holes(image, f_copy.name) + + # + # Pass 2: same as pass 1, but use file objects instead of paths + # + + creator = BmapCreate.BmapCreate(image, f_bmap2) + creator.generate() + helpers.copy_and_verify_image(image, f_copy.name, f_bmap2.name, + image_chksum, image_size) + _compare_holes(image, f_copy.name) + + # Make sure the bmap files generated at pass 1 and pass 2 are identical + assert filecmp.cmp(f_bmap1.name, f_bmap2.name, False) + + # + # Pass 3: test compressed files copying with bmap + # + + for compressed in _generate_compressed_files(image, delete=delete): + helpers.copy_and_verify_image(compressed, f_copy.name, + f_bmap1.name, image_chksum, image_size) + + # Test without setting the size + helpers.copy_and_verify_image(compressed, f_copy.name, f_bmap1.name, + image_chksum, None) + + # Append a "file:" prefixe to make BmapCopy use urllib + compressed = "file:" + compressed + helpers.copy_and_verify_image(compressed, f_copy.name, f_bmap1.name, + image_chksum, image_size) + helpers.copy_and_verify_image(compressed, f_copy.name, f_bmap1.name, + image_chksum, None) + + # + # Pass 5: copy without bmap and make sure it is identical to the original + # file. + + helpers.copy_and_verify_image(image, f_copy.name, None, image_chksum, + image_size) + helpers.copy_and_verify_image(image, f_copy.name, None, image_chksum, None) + + # + # Pass 6: test compressed files copying without bmap + # + + for compressed in _generate_compressed_files(image, delete=delete): + helpers.copy_and_verify_image(compressed, f_copy.name, f_bmap1.name, + image_chksum, image_size) + + # Test without setting the size + helpers.copy_and_verify_image(compressed, f_copy.name, f_bmap1.name, + image_chksum, None) + + # Append a "file:" prefix to make BmapCopy use urllib + helpers.copy_and_verify_image(compressed, f_copy.name, f_bmap1.name, + image_chksum, image_size) + helpers.copy_and_verify_image(compressed, f_copy.name, f_bmap1.name, + image_chksum, None) + + # Close temporary files, which will also remove them + f_copy.close() + f_bmap1.close() + f_bmap2.close() + + +class TestCreateCopy(unittest.TestCase): + """ + The test class for this unit tests. Basically executes the '_do_test()' + function for different sparse files. + """ + + def test(self): # pylint: disable=R0201 + """ + The test entry point. Executes the '_do_test()' function for files of + different sizes, holes distribution and format. + """ + + # Delete all the test-related temporary files automatically + delete = True + # Create all the test-related temporary files in current directory + directory = '.' + + iterator = helpers.generate_test_files(delete=delete, + directory=directory) + for f_image, image_size, _, _ in iterator: + assert image_size == os.path.getsize(f_image.name) + _do_test(f_image.name, image_size, delete=delete) diff --git a/tests/test_compat.py b/tests/test_compat.py new file mode 100644 index 0000000..a875929 --- /dev/null +++ b/tests/test_compat.py @@ -0,0 +1,155 @@ +# -*- coding: utf-8 -*- +# vim: ts=4 sw=4 et ai si +# +# Copyright (c) 2012-2014 Intel, Inc. +# License: GPLv2 +# Author: Artem Bityutskiy +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This unit test verifies various compatibility aspects of the BmapCopy module: + * current BmapCopy has to handle all the older bmap formats + * older BmapCopy have to handle all the newer compatible bmap formats +""" + +# Disable the following pylint recommendations: +# * Too many public methods (R0904) +# * Attribute 'XYZ' defined outside __init__ (W0201), because unittest +# classes are not supposed to have '__init__()' +# pylint: disable=R0904 +# pylint: disable=W0201 + +import os +import shutil +import tempfile +from tests import helpers +from bmaptools import TransRead, BmapCopy + +# This is a work-around for Centos 6 +try: + import unittest2 as unittest # pylint: disable=F0401 +except ImportError: + import unittest + +# Test image file name +_IMAGE_NAME = "test.image.gz" +# Test bmap file names template +_BMAP_TEMPL = "test.image.bmap.v" +# Name of the subdirectory where test data are stored +_TEST_DATA_SUBDIR = "test-data" +# Name of the subdirectory where old BmapCopy modules are stored +_OLDCODEBASE_SUBDIR = "oldcodebase" + + +class TestCompat(unittest.TestCase): + """The test class for this unit test.""" + + def test(self): + """The test entry point.""" + + test_data_dir = os.path.join(os.path.dirname(__file__), + _TEST_DATA_SUBDIR) + image_path = os.path.join(test_data_dir, _IMAGE_NAME) + + # Construct the list of bmap files to test + self._bmap_paths = [] + for dentry in os.listdir(test_data_dir): + dentry_path = os.path.join(test_data_dir, dentry) + if os.path.isfile(dentry_path) and dentry.startswith(_BMAP_TEMPL): + self._bmap_paths.append(dentry_path) + + # Create and open a temporary file for uncompressed image and its copy + self._f_image = tempfile.NamedTemporaryFile("wb+", prefix=_IMAGE_NAME, + suffix=".image") + self._f_copy = tempfile.NamedTemporaryFile("wb+", prefix=_IMAGE_NAME, + suffix=".copy") + + # Uncompress the test image into 'self._f_image' + f_tmp_img = TransRead.TransRead(image_path) + shutil.copyfileobj(f_tmp_img, self._f_image) + f_tmp_img.close() + self._f_image.flush() + + image_chksum = helpers.calculate_chksum(self._f_image.name) + image_size = os.path.getsize(self._f_image.name) + + # Test the current version of BmapCopy + for bmap_path in self._bmap_paths: + helpers.copy_and_verify_image(image_path, self._f_copy.name, + bmap_path, image_chksum, + image_size) + + # Test the older versions of BmapCopy + self._test_older_bmapcopy() + + self._f_copy.close() + self._f_image.close() + + def _test_older_bmapcopy(self): + """Test older than the current versions of the BmapCopy class.""" + + def import_module(searched_module): + """Search and import a module by its name.""" + + modref = __import__(searched_module) + for name in searched_module.split(".")[1:]: + modref = getattr(modref, name) + return modref + + oldcodebase_dir = os.path.join(os.path.dirname(__file__), + _OLDCODEBASE_SUBDIR) + + # Construct the list of old BmapCopy modules + old_modules = [] + for dentry in os.listdir(oldcodebase_dir): + if dentry.startswith("BmapCopy") and dentry.endswith(".py"): + old_modules.append("tests." + _OLDCODEBASE_SUBDIR + "." + dentry[:-3]) + + for old_module in old_modules: + modref = import_module(old_module) + + for bmap_path in self._bmap_paths: + self._do_test_older_bmapcopy(bmap_path, modref) + + def _do_test_older_bmapcopy(self, bmap_path, modref): + """ + Test an older version of BmapCopy class, referenced to by the 'modref' + argument. The 'bmap_path' argument is the bmap file path to test with. + """ + + # Get a reference to the older BmapCopy class object to test with + old_bmapcopy_class = getattr(modref, "BmapCopy") + supported_ver = getattr(modref, "SUPPORTED_BMAP_VERSION") + + f_bmap = open(bmap_path, "r") + + # Find the version of the bmap file. The easiest is to simply use the + # latest BmapCopy. + bmapcopy = BmapCopy.BmapCopy(self._f_image, self._f_copy, f_bmap) + bmap_version = bmapcopy.bmap_version + bmap_version_major = bmapcopy.bmap_version_major + + try: + if supported_ver >= bmap_version: + writer = old_bmapcopy_class(self._f_image, self._f_copy, f_bmap) + writer.copy(True, True) + except: # pylint: disable=W0702 + if supported_ver >= bmap_version_major: + # The BmapCopy which we are testing is supposed to support this + # version of bmap file format. However, bmap format version 1.4 + # was a screw-up, because it actually had incompatible changes, + # so old versions of BmapCopy are supposed to fail. + if not (supported_ver == 1 and bmap_version == "1.4"): + print("Module \"%s\" failed to handle \"%s\"" % + (modref.__name__, bmap_path)) + raise + + f_bmap.close() diff --git a/tests/test_filemap.py b/tests/test_filemap.py new file mode 100644 index 0000000..c18dd8a --- /dev/null +++ b/tests/test_filemap.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- +# vim: ts=4 sw=4 et ai si +# +# Copyright (c) 2012-2014 Intel, Inc. +# License: GPLv2 +# Author: Artem Bityutskiy +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This test verifies 'Filemap' module functionality. It generates random sparse +files and makes sure the module returns correct information about the holes. +""" + +# Disable the following pylint recommendations: +# * Too many public methods - R0904 +# * Too many arguments - R0913 +# pylint: disable=R0904 +# pylint: disable=R0913 + +import sys +import random +import itertools +import tests.helpers +from six.moves import zip_longest +from bmaptools import Filemap + +# This is a work-around for Centos 6 +try: + import unittest2 as unittest # pylint: disable=F0401 +except ImportError: + import unittest + + +class Error(Exception): + """A class for exceptions generated by this test.""" + pass + + +def _check_ranges(f_image, filemap, first_block, blocks_cnt, + ranges, ranges_type): + """ + This is a helper function for '_do_test()' which compares the correct + 'ranges' list of mapped or unmapped blocks ranges for file object 'f_image' + with what the 'Filemap' module reports. The 'ranges_type' argument defines + whether the 'ranges' list is a list of mapped or unmapped blocks. The + 'first_block' and 'blocks_cnt' define the subset of blocks in 'f_image' + that should be verified by this function. + """ + + if ranges_type is "mapped": + filemap_iterator = filemap.get_mapped_ranges(first_block, blocks_cnt) + elif ranges_type is "unmapped": + filemap_iterator = filemap.get_unmapped_ranges(first_block, blocks_cnt) + else: + raise Error("incorrect list type") + + last_block = first_block + blocks_cnt - 1 + + # The 'ranges' list contains all ranges, from block zero to the last + # block. However, we are conducting a test for 'blocks_cnt' of blocks + # starting from block 'first_block'. Create an iterator which filters + # those block ranges from the 'ranges' list, that are out of the + # 'first_block'/'blocks_cnt' file region. + ranges_iterator = (x for x in ranges if x[1] >= first_block and + x[0] <= last_block) + iterator = zip_longest(ranges_iterator, filemap_iterator) + + # Iterate over both - the (filtered) 'ranges' list which contains correct + # ranges and the Filemap generator, and verify the mapped/unmapped ranges + # returned by the 'Filemap' module. + for correct, check in iterator: + + # The first and the last range of the filtered 'ranges' list may still + # be out of the limit - correct them in this case + if correct[0] < first_block: + correct = (first_block, correct[1]) + if correct[1] > last_block: + correct = (correct[0], last_block) + + if check[0] > check[1] or check != correct: + raise Error("bad or unmatching %s range for file '%s': correct " + "is %d-%d, get_%s_ranges(%d, %d) returned %d-%d" + % (ranges_type, f_image.name, correct[0], correct[1], + ranges_type, first_block, blocks_cnt, + check[0], check[1])) + + for block in range(correct[0], correct[1] + 1): + if ranges_type is "mapped" and filemap.block_is_unmapped(block): + raise Error("range %d-%d of file '%s' is mapped, but" + "'block_is_unmapped(%d) returned 'True'" + % (correct[0], correct[1], f_image.name, block)) + if ranges_type is "unmapped" and filemap.block_is_mapped(block): + raise Error("range %d-%d of file '%s' is unmapped, but" + "'block_is_mapped(%d) returned 'True'" + % (correct[0], correct[1], f_image.name, block)) + + +def _do_test(f_image, filemap, mapped, unmapped): + """ + Verify that the 'Filemap' module provides correct mapped and unmapped areas + for the 'f_image' file object. The 'mapped' and 'unmapped' lists contain + the correct ranges. The 'filemap' is one of the classed from the 'Filemap' + module. + """ + + # Check both 'get_mapped_ranges()' and 'get_unmapped_ranges()' for the + # entire file. + first_block = 0 + blocks_cnt = filemap.blocks_cnt + _check_ranges(f_image, filemap, first_block, blocks_cnt, mapped, "mapped") + _check_ranges(f_image, filemap, first_block, blocks_cnt, unmapped, + "unmapped") + + # Select a random area in the file and repeat the test few times + for _ in range(0, 10): + first_block = random.randint(0, filemap.blocks_cnt - 1) + blocks_cnt = random.randint(1, filemap.blocks_cnt - first_block) + _check_ranges(f_image, filemap, first_block, blocks_cnt, mapped, + "mapped") + _check_ranges(f_image, filemap, first_block, blocks_cnt, unmapped, + "unmapped") + + +class TestFilemap(unittest.TestCase): + """ + The test class for this unit tests. Basically executes the '_do_test()' + function for different sparse files. + """ + + def test(self): # pylint: disable=R0201 + """ + The test entry point. Executes the '_do_test()' function for files of + different sizes, holes distribution and format. + """ + + # Delete all the test-related temporary files automatically + delete = True + # Create all the test-related temporary files in current directory + directory = '.' + # Maximum size of the random files used in this test + max_size = 16 * 1024 * 1024 + + iterator = tests.helpers.generate_test_files(max_size, directory, + delete) + for f_image, _, mapped, unmapped in iterator: + try: + fiemap = Filemap.FilemapFiemap(f_image) + _do_test(f_image, fiemap, mapped, unmapped) + + seek = Filemap.FilemapSeek(f_image) + _do_test(f_image, seek, mapped, unmapped) + except Filemap.ErrorNotSupp: + pass -- cgit v1.2.3 From 711014cf9a0002b1de668bc36a46bead9bd92450 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Mon, 10 Dec 2018 15:43:52 +0000 Subject: Import bmap-tools_3.5-2.debian.tar.xz [dgit import tarball bmap-tools 3.5-2 bmap-tools_3.5-2.debian.tar.xz] --- changelog | 317 +++++++++++++++++++++ compat | 1 + control | 40 +++ copyright | 52 ++++ gbp.conf | 6 + manpages | 1 + ...ap-catch-StopIteration-from-next-iterator.patch | 32 +++ patches/series | 1 + rules | 11 + source/format | 1 + tests/control | 3 + tests/smoke | 25 ++ tests/test-webserver.sh | 48 ++++ watch | 2 + 14 files changed, 540 insertions(+) create mode 100644 changelog create mode 100644 compat create mode 100644 control create mode 100644 copyright create mode 100644 gbp.conf create mode 100644 manpages create mode 100644 patches/Filemap-catch-StopIteration-from-next-iterator.patch create mode 100644 patches/series create mode 100755 rules create mode 100644 source/format create mode 100644 tests/control create mode 100755 tests/smoke create mode 100755 tests/test-webserver.sh create mode 100644 watch diff --git a/changelog b/changelog new file mode 100644 index 0000000..083b997 --- /dev/null +++ b/changelog @@ -0,0 +1,317 @@ +bmap-tools (3.5-2) unstable; urgency=medium + + * d/p/Filemap-catch-StopIteration-from-next-iterator.patch: + Fix FTBFS with Python 3.7 (Closes: #915686) + + -- Simon McVittie Mon, 10 Dec 2018 15:43:52 +0000 + +bmap-tools (3.5-1) unstable; urgency=medium + + * New upstream release + - Drop all patches, applied upstream + - Silences warnings with blk-mq (Closes: #906328) + * Depend and build-depend on python3-six + * Remove unnecessary XS-Python-Version. python3 versions < 3.3 are + no longer supported in any supported Debian release, and in any case + this was the wrong field (it should have been X-Python3-Version). + * d/watch: Download GitHub's autogenerated archive files since upstream + no longer releases a canonical signed tarball + * Install upstream release notes as NEWS.gz (Policy §12.7) + * Standards-Version: 4.2.1 (no further changes required) + + -- Simon McVittie Fri, 28 Sep 2018 10:19:41 +0100 + +bmap-tools (3.4-2) unstable; urgency=medium + + * Standards-Version: 4.1.4 (no changes required) + * Add proposed patches from upstream pull requests to fix image + downloading under Python 3 (Closes: #896182) and to fix downloading + compressed files + * debian/tests: Add a simple autopkgtest + * debian/gbp.conf: Don't put a numbered prefix on patches + + -- Simon McVittie Wed, 25 Apr 2018 20:47:22 +0100 + +bmap-tools (3.4-1) unstable; urgency=medium + + * New upstream release + - Drop all patches, applied upstream + - Suggest lz4 and unzip for newly-supported decompressors + - d/copyright: Update + * Update Vcs-* for move to salsa.debian.org + * Standards-Version: 4.1.3 (no changes required) + * d/gbp.conf: Merge from upstream VCS tags + * d/watch, Homepage, Source: Follow new upstream location + * Use Python 3 now that upstream supports it (Closes: #830877) + * Demote python3-gpgme from Recommends to Suggests + * Use pybuild + * d/p/BmapHelpers.py-fix-Errno-25-Inappropriate-ioctl-for-devic.patch, + d/p/Fix-errno-usage.patch: + Apply some post-release bug fixes from upstream git + + -- Simon McVittie Mon, 26 Feb 2018 09:13:43 +0000 + +bmap-tools (3.2-6) unstable; urgency=medium + + * Standards-Version: 4.1.1 + - d/copyright: Use https URL for Format + * Rules-Requires-Root: no + * Move to debhelper compat level 10 + + -- Simon McVittie Mon, 30 Oct 2017 21:08:06 +0000 + +bmap-tools (3.2-5) unstable; urgency=medium + + * d/p/*-TransRead-Fix-differentiating-between-local-files-an.patch: + - Added. Fix downloads from https (Closes: #831655) + * Add myself to Uploaders + + -- Sjoerd Simons Fri, 11 Nov 2016 15:17:42 +0100 + +bmap-tools (3.2-4) unstable; urgency=medium + + * Switch Vcs-Git to https (see #810378) + * Switch Vcs-Browser to https and cgit + * Standards-Version: 3.9.8 (no changes needed) + * Use DEP-14 branch names debian/master, upstream/latest + * Downgrade all decompressors (bzip2, lzop, xz-utils) to Recommends. + Each decompressor is only used when operating on the relevant file + format, so none are hard dependencies. + * debian/control: normalize via "wrap-and-sort -abst" + + -- Simon McVittie Thu, 07 Jul 2016 09:44:04 +0100 + +bmap-tools (3.2-3) unstable; urgency=high + + * Downgrade python-gpgme from Depends to Recommends. It is only + used if the bmap file comes with a detached signature, and is + conditionally imported. It seems pygpgme is in danger of being removed + from Debian testing, and it would be good to avoid losing bmap-tools. + * Standards-Version: 3.9.6 (no changes needed) + * Build-depend on dh-python + * Release with high urgency to get it into testing before pygpgme is + removed + + -- Simon McVittie Wed, 13 Jan 2016 20:15:15 +0000 + +bmap-tools (3.2-2) unstable; urgency=medium + + * Add patch to make the child process for transparent uncompression + read the compressed file directly if possible, avoiding blocking + forever in join() if the read thread does not terminate + + -- Simon McVittie Tue, 03 Jun 2014 15:37:17 +0100 + +bmap-tools (3.2-1) unstable; urgency=low + + * New upstream release + * Correct debian/changelog syntax in entries from upstream + * Add some patches from upstream to improve the man page + * Remove python-lzma from Depends, no longer used + * Adjust dependencies on (de)compressors: + - do not explicitly depend on gzip or tar, they're already Essential: yes + - demote pbzip2 and pigz to Suggests, they're not strictly necessary + * Standards-Version: 3.9.5 (no changes needed) + + -- Simon McVittie Sun, 09 Mar 2014 13:25:56 +0000 + +bmap-tools (3.2) unstable; urgency=low + + * Add support for LZO and archives ('.lzo' and '.tar.lzo'). + * Add support for multi-stream bzip2 archives (creted with "pbzip2"). + * Support tmpfs by using the SEEK_HOLE method instead of FIEMAP. + * Use external tools like 'gzip' and 'bzip2' for decompressing, instead of + using internal python libraries. + + -- Artem Bityutskiy Wed, 19 Feb 2014 16:50:12 +0200 + +bmap-tools (3.2~rc2) unstable; urgency=low + + * Bump the version number to 3.2~rc2. + + -- Artem Bityutskiy Fri, 31 Jan 2014 12:54:42 +0200 + +bmap-tools (3.1) unstable; urgency=low + + * Change bmap format version from 1.4 to 2.0, because there are incompatible + changes in 1.4 comparing to 1.3, so the right version number is 2.0 + * Add backward and forward bmap format compatibility unit-tests + + -- Artem Bityutskiy Thu, 07 Nov 2013 17:26:57 +0200 + +bmap-tools (3.0) unstable; urgency=low + + * Switch from using SHA1 for checksumming to SHA256. + * Start supporting OpenPGP signatures. Both detached and clearsign signatures + are supported. + * Always sync the image file before creating the bmap for it, to work-around + kernel bugs in early FIEMAP implementations. + + -- Artem Bityutskiy Wed, 02 Oct 2013 09:30:22 +0300 + +bmap-tools (2.6) unstable; urgency=low + + * Add support for on-the-fly decompression of '.xz' and '.tar.xz' files. + + -- Artem Bityutskiy Tue, 13 Aug 2013 14:53:49 +0300 + +bmap-tools (2.5-1) unstable; urgency=low + + * Initial release to Debian (Closes: #714280) + * Set myself as Debian maintainer + * Add debian/gbp.conf + * Add debian/source/format + * Add debian/watch + * Standards-Version: 3.9.4 (no changes needed) + + -- Simon McVittie Mon, 19 Aug 2013 15:30:58 +0100 + +bmap-tools (2.5) unstable; urgency=low + + * Do not fail when lacking permisssions for accessing block device's sysfs + files. + * Improve debian packaging. + + -- Artem Bityutskiy Mon, 05 Aug 2013 10:05:09 +0300 + +bmap-tools (2.4) unstable; urgency=low + + * Add support for ssh:// URLs. + + -- Artem Bityutskiy Wed, 05 Jun 2013 18:15:41 +0300 + +bmap-tools (2.3) unstable; urgency=low + + * Add bmap file SHA1 verification, make tests work on btrfs. + + -- Artem Bityutskiy Mon, 06 May 2013 10:58:32 +0300 + +bmap-tools (2.2) unstable; urgency=low + + * Support username and password in URLs. + + -- Artem Bityutskiy Mon, 11 Mar 2013 14:40:17 +0200 + +bmap-tools (2.1) unstable; urgency=low + + * Fix out of memory issues when copying .bz2 files. + + -- Artem Bityutskiy Mon, 18 Feb 2013 16:38:32 +0200 + +bmap-tools (2.0) unstable; urgency=low + + * Fix the an issue with running out of memory in TransRead.py. + + -- Artem Bityutskiy Thu, 17 Jan 2013 11:33:15 +0200 + +bmap-tools (2.0~rc5) unstable; urgency=low + + * When block device optimzations fail - raise an exception except of muting + the error, because we really want to know about these failures and possibly + fix them. + + -- Artem Bityutskiy Tue, 15 Jan 2013 14:51:27 +0200 + +bmap-tools (2.0~rc4) unstable; urgency=low + + * Fix bmap autodiscovery. + + -- Artem Bityutskiy Thu, 10 Jan 2013 13:58:07 +0200 + +bmap-tools (2.0~rc3) unstable; urgency=low + + * Fix uncaught urllib2 exception bug introduced in rc1. + + -- Artem Bityutskiy Mon, 07 Jan 2013 10:19:49 +0200 + +bmap-tools (2.0~rc2) unstable; urgency=low + + * Fix writing to block devices, which was broken in rc1. + * Make the informational messages a bit nicer. + + -- Artem Bityutskiy Fri, 04 Jan 2013 09:52:41 +0200 + +bmap-tools (2.0~rc1) unstable; urgency=low + + * Allow copying without bmap only if --nobmap was specified. + * Auto-discover the bmap file. + * Support reading from URLs. + * Implement progress bar. + * Highlight error and warning messages with red and yellow labels. + + -- Artem Bityutskiy Thu, 20 Dec 2012 10:47:00 +0200 + +bmap-tools (1.0) unstable; urgency=low + + * Release version 1.0 of the tools - almost identical to 1.0~rc7 except of few + minor differences like spelling fixes. + + -- Artem Bityutskiy Mon, 03 Dec 2012 10:00:33 +0200 + +bmap-tools (1.0~rc7) unstable; urgency=low + + * Add a Fiemap.py module which implements python API to the linux FIEMAP ioct. + * Use the FIEMAP ioctl properly and optimally. + * Add unit-tests, current test coverage is 66%. + * A lot of core rerafactoring. + * Several bug fixes in 'BmapCopy' (e.g., .tar.gz format support was broken). + * Add README and RELEASE_NOTES files. + + -- Artem Bityutskiy Thu, 29 Nov 2012 12:29:39 +0200 + +bmap-tools (0.6) unstable; urgency=low + + * Improve the base API test to cover the case when there is no bmap. + * Fix a bug when copying without bmap. + + -- Artem Bityutskiy Wed, 21 Nov 2012 16:43:49 +0200 + +bmap-tools (0.5) unstable; urgency=low + + * Fix handling of bmap files which contain ranges with only one block. + * Restore the block device settings which we change on exit. + * Change block device settings correctly for partitions. + * Rework API modules to accept file-like objects, not only paths. + * Fix and silence pylint warnings. + * Implement the base API test-case. + + -- Artem Bityutskiy Tue, 20 Nov 2012 15:40:30 +0200 + +bmap-tools (0.4) unstable; urgency=low + + * Improved compressed images flashing speed by exploiting multiple threads: + now we read/decompress the image in one thread and write it in a different + thread. + + -- Artem Bityutskiy Wed, 14 Nov 2012 12:35:06 +0200 + +bmap-tools (0.3) unstable; urgency=low + + * Fix flashing speed calculations + * Fix the Ctrl-C freeze issue - now we synchronize the block device + periodically so if a Ctrl-C interruption happens, we terminate withen few + seconds. + + -- Artem Bityutskiy Tue, 13 Nov 2012 10:56:11 +0200 + +bmap-tools (0.2) unstable; urgency=low + + * Release 0.2 - mostly internal code re-structuring and renamings, + not much functional changes. + * The 'bmap-flasher' and 'bmap-creator' tools do not exist anymore. Now + we have 'bmaptool' which supports 'copy' and 'create' sub-commands instead. + * The BmapFlasher module was also re-named to BmapCopy. + + -- Artem Bityutskiy Fri, 09 Nov 2012 12:20:37 +0200 + +bmap-tools (0.1.1) unstable; urgency=low + + * Release 0.1.1 - a lot of fixes and speed improvements. + + -- Artem Bityutskiy Wed, 07 Nov 2012 11:36:29 +0200 + +bmap-tools (0.1.0) unstable; urgency=low + + * Initial release. + + -- Ed Bartosh Sun, 27 Oct 2012 22:31:28 +0300 diff --git a/compat b/compat new file mode 100644 index 0000000..f599e28 --- /dev/null +++ b/compat @@ -0,0 +1 @@ +10 diff --git a/control b/control new file mode 100644 index 0000000..9d5d067 --- /dev/null +++ b/control @@ -0,0 +1,40 @@ +Source: bmap-tools +Maintainer: Simon McVittie +Uploaders: Sjoerd Simons +Section: utils +Priority: optional +Build-Depends: + debhelper (>= 10~), + dh-python, + python3, + python3-setuptools, + python3-six, +Standards-Version: 4.2.1 +Vcs-Git: https://salsa.debian.org/debian/bmap-tools.git +Vcs-Browser: https://salsa.debian.org/debian/bmap-tools +Rules-Requires-Root: no +Homepage: https://github.com/intel/bmap-tools + +Package: bmap-tools +Architecture: all +Depends: + python3 (>= 3.3), + python3-six, + ${misc:Depends}, + ${python3:Depends}, +Recommends: + bzip2, + lzop, + xz-utils, +Suggests: + lz4, + pbzip2, + pigz, + python3-gpgme, + unzip, +Description: tool to flash image files to block devices using the block map + bmaptool is a generic tool for creating the block map (bmap) for a + file, and copying files using the block map. The idea is that large + file containing unused blocks, like raw system image files, can be + copied or flashed a lot faster with bmaptool than with traditional + tools like "dd" or "cp". diff --git a/copyright b/copyright new file mode 100644 index 0000000..146b3d6 --- /dev/null +++ b/copyright @@ -0,0 +1,52 @@ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-name: bmap-tools +Source: https://github.com/intel/bmap-tools/releases +Comment: + The initial package was put together by Ed Bartosh + on Sun Oct 27 22:32:19 EEST 2012. + +Files: * +Copyright: © 2012-2014 Intel, Inc. +License: GPL-2 + +Files: debian/* +Copyright: + © 2012-2013 Intel, Inc. + © 2014-2018 Simon McVittie +License: GPL-2 + +Files: debian/tests/test-webserver.sh +Copyright: + © 2017 Red Hat Inc. +License: LGPL-2.1+ + +License: GPL-2 + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + . + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. +Comment: + On Debian systems, the full text of the GPL v2 can be found + in /usr/share/common-licenses/GPL-2. + +License: LGPL-2.1+ + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + . + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + . + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +Comment: + On Debian systems, the full text of the GNU Lesser General Public License + version 2.1 can be found in the file '/usr/share/common-licenses/LGPL-2.1'. diff --git a/gbp.conf b/gbp.conf new file mode 100644 index 0000000..76af9f5 --- /dev/null +++ b/gbp.conf @@ -0,0 +1,6 @@ +[DEFAULT] +debian-branch = debian/master +upstream-branch = upstream/latest +pristine-tar = True +upstream-vcs-tag = v%(version)s +patch-numbers = False diff --git a/manpages b/manpages new file mode 100644 index 0000000..8a9b6db --- /dev/null +++ b/manpages @@ -0,0 +1 @@ +docs/man1/bmaptool.1 diff --git a/patches/Filemap-catch-StopIteration-from-next-iterator.patch b/patches/Filemap-catch-StopIteration-from-next-iterator.patch new file mode 100644 index 0000000..2c5a627 --- /dev/null +++ b/patches/Filemap-catch-StopIteration-from-next-iterator.patch @@ -0,0 +1,32 @@ +From: Simon McVittie +Date: Mon, 10 Dec 2018 15:23:47 +0000 +Subject: Filemap: catch StopIteration from next(iterator) + +In Python >= 3.7, if code in a generator raises StopIteration, it is +transformed into a RuntimeError instead of terminating the generator +gracefully. + +Bug: https://github.com/intel/bmap-tools/issues/57 +Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=915686 +Forwarded: https://github.com/intel/bmap-tools/pull/58 +--- + bmaptools/Filemap.py | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/bmaptools/Filemap.py b/bmaptools/Filemap.py +index 3e56798..e06e654 100644 +--- a/bmaptools/Filemap.py ++++ b/bmaptools/Filemap.py +@@ -476,7 +476,11 @@ class FilemapFiemap(_FilemapBase): + _log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))" + % (start, count, start + count - 1)) + iterator = self._do_get_mapped_ranges(start, count) +- first_prev, last_prev = next(iterator) ++ ++ try: ++ first_prev, last_prev = next(iterator) ++ except StopIteration: ++ return + + for first, last in iterator: + if last_prev == first - 1: diff --git a/patches/series b/patches/series new file mode 100644 index 0000000..49b7bd0 --- /dev/null +++ b/patches/series @@ -0,0 +1 @@ +Filemap-catch-StopIteration-from-next-iterator.patch diff --git a/rules b/rules new file mode 100755 index 0000000..8c6b17f --- /dev/null +++ b/rules @@ -0,0 +1,11 @@ +#!/usr/bin/make -f + +%: + dh $@ --with=python3 --buildsystem=pybuild + +override_dh_auto_install: + dh_auto_install + install -d debian/bmap-tools/usr/share/doc/bmap-tools + install -m644 docs/RELEASE_NOTES debian/bmap-tools/usr/share/doc/bmap-tools/NEWS + install __main__.py debian/bmap-tools/usr/bin/bmaptool + sed -i -e '1s,.*,#!/usr/bin/python3,' debian/bmap-tools/usr/bin/bmaptool diff --git a/source/format b/source/format new file mode 100644 index 0000000..163aaf8 --- /dev/null +++ b/source/format @@ -0,0 +1 @@ +3.0 (quilt) diff --git a/tests/control b/tests/control new file mode 100644 index 0000000..96130c1 --- /dev/null +++ b/tests/control @@ -0,0 +1,3 @@ +Tests: smoke +Depends: bmap-tools, dosfstools, python3 +Restrictions: allow-stderr diff --git a/tests/smoke b/tests/smoke new file mode 100755 index 0000000..667ac2f --- /dev/null +++ b/tests/smoke @@ -0,0 +1,25 @@ +#!/bin/sh + +set -eu + +here="$(dirname "$0")" +here="$(readlink -f "$here")" + +cd "${AUTOPKGTEST_TMP:-"${ADTTMP}"}" + +mkdir webroot +"$here/test-webserver.sh" webroot + +truncate -s10M webroot/filesystem.img +/sbin/mkfs.vfat webroot/filesystem.img +bmaptool create webroot/filesystem.img > webroot/filesystem.img.bmap +cat webroot/filesystem.img.bmap + +bmaptool copy webroot/filesystem.img filesystem.out +diff -s webroot/filesystem.img filesystem.out + +gzip -9n webroot/filesystem.img +bmaptool copy "$(cat httpd-address)/filesystem.img.gz" filesystem.out2 +diff -s filesystem.out filesystem.out2 + +kill "$(cat httpd-pid)" diff --git a/tests/test-webserver.sh b/tests/test-webserver.sh new file mode 100755 index 0000000..a1c778e --- /dev/null +++ b/tests/test-webserver.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# Taken from Flatpak + +# Copyright 2017 Red Hat, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# On Debian systems, the full text of the GNU Lesser General Public License +# version 2.1 can be found in the file '/usr/share/common-licenses/LGPL-2.1'. + +set -euo pipefail + +dir=$1 +test_tmpdir=$(pwd) + +cd ${dir} +env PYTHONUNBUFFERED=1 setsid python3 -m http.server 0 >${test_tmpdir}/httpd-output & +child_pid=$! + +for x in $(seq 50); do + # Snapshot the output + cp ${test_tmpdir}/httpd-output{,.tmp} + # If it's non-empty, see whether it matches our regexp + if test -s ${test_tmpdir}/httpd-output.tmp; then + sed -e 's,Serving HTTP on 0.0.0.0 port \([0-9]*\) .*,\1,' < ${test_tmpdir}/httpd-output.tmp > ${test_tmpdir}/httpd-port + if ! cmp ${test_tmpdir}/httpd-output.tmp ${test_tmpdir}/httpd-port 1>/dev/null; then + # If so, we've successfully extracted the port + break + fi + fi + sleep 0.1 +done +port=$(cat ${test_tmpdir}/httpd-port) +echo "http://127.0.0.1:${port}" > ${test_tmpdir}/httpd-address +echo "$child_pid" > ${test_tmpdir}/httpd-pid diff --git a/watch b/watch new file mode 100644 index 0000000..0063adb --- /dev/null +++ b/watch @@ -0,0 +1,2 @@ +version=4 +https://github.com/intel/@PACKAGE@/tags .*/v@ANY_VERSION@@ARCHIVE_EXT@ -- cgit v1.2.3 From b7d910d07e4afb34a3782b778943680d441cb343 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Mon, 10 Dec 2018 15:23:47 +0000 Subject: Filemap: catch StopIteration from next(iterator) In Python >= 3.7, if code in a generator raises StopIteration, it is transformed into a RuntimeError instead of terminating the generator gracefully. Bug: https://github.com/intel/bmap-tools/issues/57 Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=915686 Forwarded: https://github.com/intel/bmap-tools/pull/58 Gbp-Pq: Name Filemap-catch-StopIteration-from-next-iterator.patch --- bmaptools/Filemap.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bmaptools/Filemap.py b/bmaptools/Filemap.py index 3e56798..e06e654 100644 --- a/bmaptools/Filemap.py +++ b/bmaptools/Filemap.py @@ -476,7 +476,11 @@ class FilemapFiemap(_FilemapBase): _log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))" % (start, count, start + count - 1)) iterator = self._do_get_mapped_ranges(start, count) - first_prev, last_prev = next(iterator) + + try: + first_prev, last_prev = next(iterator) + except StopIteration: + return for first, last in iterator: if last_prev == first - 1: -- cgit v1.2.3 From 6ed6fc95c4676a5e69874ec580ce5d9ed989dca2 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Mon, 10 Dec 2018 15:23:47 +0000 Subject: Filemap: catch StopIteration from next(iterator) In Python >= 3.7, if code in a generator raises StopIteration, it is transformed into a RuntimeError instead of terminating the generator gracefully. Bug: https://github.com/intel/bmap-tools/issues/57 Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=915686 Forwarded: https://github.com/intel/bmap-tools/pull/58 Applied-upstream: 3.6, commit:2d3d0aeead0ac1b1f5e9fa5ef351aac8b14b5da9 Gbp-Pq: Name Filemap-catch-StopIteration-from-next-iterator.patch --- bmaptools/Filemap.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bmaptools/Filemap.py b/bmaptools/Filemap.py index 3e56798..e06e654 100644 --- a/bmaptools/Filemap.py +++ b/bmaptools/Filemap.py @@ -476,7 +476,11 @@ class FilemapFiemap(_FilemapBase): _log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))" % (start, count, start + count - 1)) iterator = self._do_get_mapped_ranges(start, count) - first_prev, last_prev = next(iterator) + + try: + first_prev, last_prev = next(iterator) + except StopIteration: + return for first, last in iterator: if last_prev == first - 1: -- cgit v1.2.3 From f7e0ef7a9ab0ba1b305b14bcd5f6f7052a6bdb7e Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Mon, 24 Jun 2019 09:12:03 +0100 Subject: test_api_base: Skip test if filesystem is unsuitable When run on disorderfs (an artificial FUSE filesystem used by the Reproducible Builds project to detect filesystem order dependencies), we cannot map the file to detect holes. The same is likely to be true for other simple FUSE filesystems. Signed-off-by: Simon McVittie Forwarded: https://github.com/intel/bmap-tools/pull/59 Applied-upstream: 3.6, commit:0a3609b29250822aef918cb2913cc6360b9053a0 Gbp-Pq: Name test_api_base-Skip-test-if-filesystem-is-unsuitable.patch --- tests/test_api_base.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_api_base.py b/tests/test_api_base.py index dfad251..093ccf0 100644 --- a/tests/test_api_base.py +++ b/tests/test_api_base.py @@ -135,6 +135,12 @@ def _do_test(image, image_size, delete=True): have to be automatically deleted. """ + try: + Filemap.filemap(image) + except Filemap.ErrorNotSupp as e: + sys.stderr.write('%s\n' % e) + return + # Make sure the temporary files start with the same name as 'image' in # order to simplify debugging. prefix = os.path.splitext(os.path.basename(image))[0] + '.' -- cgit v1.2.3 From 01de0f9a24e9082e46f975359753d20dcdaf238d Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Wed, 3 Feb 2021 09:15:05 +0000 Subject: Correct parameter name when documenting --psplash-pipe in the man page Signed-off-by: Simon McVittie Forwarded: https://github.com/intel/bmap-tools/pull/80 Applied-upstream: 3.7, commit:15e6a6366d16f11e836754f147635fb963340719 Gbp-Pq: Name Correct-parameter-name-when-documenting-psplash-pipe-in-t.patch --- docs/man1/bmaptool.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/man1/bmaptool.1 b/docs/man1/bmaptool.1 index dedff9b..6d4ddc2 100644 --- a/docs/man1/bmaptool.1 +++ b/docs/man1/bmaptool.1 @@ -209,7 +209,7 @@ IMAGE matches the checksums. .RE .PP -\-\-no-sig-verify +\-\-psplash\-pipe PATH .RS 2 Write progress to a psplash pipe. .RE -- cgit v1.2.3 From 1d74baa0e85170639854eff97cde13b9ed9169bf Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Wed, 3 Feb 2021 09:15:31 +0000 Subject: Expand documentation of --psplash-pipe to specify what is reported Signed-off-by: Simon McVittie Forwarded: https://github.com/intel/bmap-tools/pull/80 Applied-upstream: 3.7, commit:a8a8a2adfcd816d253cf203c6079ced8bbbbb4e7 Gbp-Pq: Name Expand-documentation-of-psplash-pipe-to-specify-what-is-r.patch --- docs/man1/bmaptool.1 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/man1/bmaptool.1 b/docs/man1/bmaptool.1 index 6d4ddc2..e6bbb2f 100644 --- a/docs/man1/bmaptool.1 +++ b/docs/man1/bmaptool.1 @@ -211,7 +211,9 @@ IMAGE matches the checksums. .PP \-\-psplash\-pipe PATH .RS 2 -Write progress to a psplash pipe. +Write periodic machine-readable progress reports to a fifo in the format +used by \fBpsplash\fR. Each progress report consists of "PROGRESS" followed +by a space, an integer percentage and a newline. .RE .RE -- cgit v1.2.3 From 97aed8d756e14ad2ce718bb6613a163f9fc5a1b5 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 10 Mar 2021 14:09:34 +0200 Subject: TransRead: kill subprocesses Kill and wait for subprocesses when destroying TransRead objects. This gets rid of the following warning (observed when running self-tests): /usr/lib64/python3.9/subprocess.py:1048: ResourceWarning: subprocess 140912 is still running _warn("subprocess %s is still running" % self.pid, Origin: upstream, 3.7, commit:d17e2eaea318444c7370298739da3c5a3969cc6d Signed-off-by: Artem Bityutskiy Gbp-Pq: Name TransRead-kill-subprocesses.patch --- bmaptools/TransRead.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/bmaptools/TransRead.py b/bmaptools/TransRead.py index da3c8b5..1d0b582 100644 --- a/bmaptools/TransRead.py +++ b/bmaptools/TransRead.py @@ -188,14 +188,21 @@ class TransRead(object): """The class destructor which closes opened files.""" self._done = True - for child in self._child_processes: - child.kill() + if getattr(self, "_f_objs"): + for file_obj in self._f_objs: + file_obj.close() + self._f_objs = None - if self._rthread: + if getattr(self, "_rthread"): self._rthread.join() - - for file_obj in self._f_objs: - file_obj.close() + self._rthread = None + + if getattr(self, "_child_processes"): + for child in self._child_processes: + if child.poll() is None: + child.kill() + child.wait() + self._child_processes = [] def _read_thread(self, f_from, f_to): """ -- cgit v1.2.3 From fb1d7c312722af146eed463f86ff9f2e0ed1341e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 10 Mar 2021 14:50:59 +0200 Subject: Do not use subprocess pipe We use the 'subprocess' module for running external processes, and in few places we create sub-processes with the 'stderr=subprocess.PIPE' argument. Howerver, we never read from the pipe, which means that it may get filled and block the external program. This is dangerous and may lead to deadlock situations. This patch fixes the issue by removing the argument. If we do not read sub-process's 'stderr', it is OK for it to inherit it from the main program, so the error message will just go to bmaptool's standare error stream. Origin: upstream, 3.7, commit:d77f3e9a6e496ba8d460f27bfef02aec45181b78 Signed-off-by: Artem Bityutskiy Gbp-Pq: Name Do-not-use-subprocess-pipe.patch --- bmaptools/TransRead.py | 6 ++---- tests/test_api_base.py | 4 +--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/bmaptools/TransRead.py b/bmaptools/TransRead.py index 1d0b582..cdfd37e 100644 --- a/bmaptools/TransRead.py +++ b/bmaptools/TransRead.py @@ -419,8 +419,7 @@ class TransRead(object): child_process = subprocess.Popen(args, shell=True, bufsize=1024 * 1024, stdin=child_stdin, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + stdout=subprocess.PIPE) if child_stdin == subprocess.PIPE: # A separate reader thread is created only when we are reading via @@ -490,8 +489,7 @@ class TransRead(object): # host command = "test -f " + path + " && test -r " + path child_process = subprocess.Popen(popen_args + [command], - bufsize=1024 * 1024, - stdout=subprocess.PIPE) + bufsize=1024 * 1024) child_process.wait() if child_process.returncode != 0: raise Error("\"%s\" on \"%s\" cannot be read: make sure it " diff --git a/tests/test_api_base.py b/tests/test_api_base.py index ea996c7..11adeaa 100644 --- a/tests/test_api_base.py +++ b/tests/test_api_base.py @@ -116,9 +116,7 @@ def _generate_compressed_files(file_path, delete=True): args = archiver + " " + options + " " + file_path else: args = decompressor + " " + options + " " + file_path - child_process = subprocess.Popen(args, shell=True, - stderr=subprocess.PIPE, - stdout=tmp_file_obj) + child_process = subprocess.Popen(args, shell=True, stdout=tmp_file_obj) child_process.wait() tmp_file_obj.flush() yield tmp_file_obj.name -- cgit v1.2.3 From 473a1e60b20e54280f7dddea7698692b49eaefa1 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 10 Mar 2021 15:33:48 +0200 Subject: TransRead: hide useless message The previous patch (stop using 'subprocess.PIPE') addes a side-effect - now we see 'tar' the following tar message: tar: Removing leading `/' from member names' This patch gets rid of them. Origin: upstream, 3.7, commit:f41e5529471ff94fc84280338a0e13e4862daa63 Signed-off-by: Artem Bityutskiy Gbp-Pq: Name TransRead-hide-useless-message.patch --- bmaptools/TransRead.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bmaptools/TransRead.py b/bmaptools/TransRead.py index cdfd37e..37bda8b 100644 --- a/bmaptools/TransRead.py +++ b/bmaptools/TransRead.py @@ -395,6 +395,11 @@ class TransRead(object): self.size = os.fstat(self._f_objs[-1].fileno()).st_size return + if archiver == "tar": + # This will get rid of messages like: + # tar: Removing leading `/' from member names'. + args += " -P -C /" + # Make sure decompressor and the archiver programs are available if not BmapHelpers.program_is_available(decompressor): raise Error("the \"%s\" program is not available but it is " -- cgit v1.2.3 From c9598ee6bd8299adf92fc75101c66127e3160fca Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 11 Mar 2021 10:48:38 +0200 Subject: tests: fix test_bmap_helpers on non-ZFS One test failed when running with on a system that does not have ZFS. Origin: upstream, 3.7, commit:3a84ba28364c5686b6b1ce0742a4b70d8f8b37c3 Signed-off-by: Artem Bityutskiy Gbp-Pq: Name tests-fix-test_bmap_helpers-on-non-ZFS.patch --- tests/test_bmap_helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_bmap_helpers.py b/tests/test_bmap_helpers.py index 8516164..1617957 100644 --- a/tests/test_bmap_helpers.py +++ b/tests/test_bmap_helpers.py @@ -102,7 +102,8 @@ class TestBmapHelpers(unittest.TestCase): mock_open.side_effect = IOError with self.assertRaises(BmapHelpers.Error): - BmapHelpers.is_zfs_configuration_compatible() + if not BmapHelpers.is_zfs_configuration_compatible(): + raise BmapHelpers.Error def test_is_zfs_configuration_compatible_notinstalled(self): """Check compatiblilty check passes when zfs not installed""" -- cgit v1.2.3 From d98071162295759ace2c7e28c6bd0a1c94afba77 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Thu, 28 Oct 2021 11:03:09 +0100 Subject: tests: Use unittest.mock from Python standard library if possible This avoids an unnecessary external dependency when using Python >= 3.3. Signed-off-by: Simon McVittie Forwarded: https://github.com/intel/bmap-tools/pull/87 Applied-upstream: 3.7, commit:a1ca1172f259f32ff9eb0469567be1a9085cca88 Gbp-Pq: Name tests-Use-unittest.mock-from-Python-standard-library-if-p.patch --- requirements-test.txt | 2 +- tests/test_bmap_helpers.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/requirements-test.txt b/requirements-test.txt index 1cc6bbb..cea340a 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,4 +1,4 @@ six nose backports.tempfile -mock \ No newline at end of file +mock ; python_version < '3.3' diff --git a/tests/test_bmap_helpers.py b/tests/test_bmap_helpers.py index 1617957..47b3862 100644 --- a/tests/test_bmap_helpers.py +++ b/tests/test_bmap_helpers.py @@ -21,7 +21,10 @@ This test verifies 'BmapHelpers' module functionality. import os import sys import tempfile -from mock import patch, mock +try: + from unittest.mock import patch, mock +except ImportError: # for Python < 3.3 + from mock import patch, mock from backports import tempfile as btempfile from bmaptools import BmapHelpers -- cgit v1.2.3 From ec3f40f161a2068f073e666d2a3ecdeba98a8f68 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Thu, 28 Oct 2021 11:05:52 +0100 Subject: tests: Try to use TemporaryDirectory from Python standard library This avoids an unnecessary external dependency with Python >= 3.2. Signed-off-by: Simon McVittie Forwarded: https://github.com/intel/bmap-tools/pull/87 Applied-upstream: 3.7, commit:dfba9f9c664c240bbf339189bf7abd7314bcafbc Gbp-Pq: Name tests-Try-to-use-TemporaryDirectory-from-Python-standard-.patch --- requirements-test.txt | 2 +- tests/test_bmap_helpers.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/requirements-test.txt b/requirements-test.txt index cea340a..f83802d 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,4 +1,4 @@ six nose -backports.tempfile +backports.tempfile ; python_version < '3.2' mock ; python_version < '3.3' diff --git a/tests/test_bmap_helpers.py b/tests/test_bmap_helpers.py index 47b3862..56b079e 100644 --- a/tests/test_bmap_helpers.py +++ b/tests/test_bmap_helpers.py @@ -25,7 +25,10 @@ try: from unittest.mock import patch, mock except ImportError: # for Python < 3.3 from mock import patch, mock -from backports import tempfile as btempfile +try: + from tempfile import TemporaryDirectory +except ImportError: # for Python < 3.2 + from backports.tempfile import TemporaryDirectory from bmaptools import BmapHelpers @@ -58,7 +61,7 @@ class TestBmapHelpers(unittest.TestCase): def test_get_file_system_type_symlink(self): """Check a file system type is returned when used with a symlink""" - with btempfile.TemporaryDirectory(prefix="testdir_", dir=".") as directory: + with TemporaryDirectory(prefix="testdir_", dir=".") as directory: fobj = tempfile.NamedTemporaryFile("r", prefix="testfile_", delete=False, dir=directory, suffix=".img") lnk = os.path.join(directory, "test_symlink") -- cgit v1.2.3 From 5b434c4c5b9588aef3a285779e70c2b5441f63b0 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Thu, 28 Oct 2021 11:16:39 +0100 Subject: tests: Pass -c -k options to zstd, too Otherwise we get interactive prompts during testing, like this: zstd: /*stdin*\: unexpected end of file zstd: /*stdin*\: unexpected end of file zstd: /*stdin*\: unexpected end of file zstd: /*stdin*\: unexpected end of file zstd: .../.pybuild/cpython3_3.9/build/4Khole_idts5mgb.img.zst already exists; overwrite (y/n) ? Signed-off-by: Simon McVittie Forwarded: https://github.com/intel/bmap-tools/pull/88 Applied-upstream: 3.7, commit:18f21738a89a30b335421dc1292bde81e56853de Gbp-Pq: Name tests-Pass-c-k-options-to-zstd-too.patch --- tests/test_api_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_api_base.py b/tests/test_api_base.py index 11adeaa..39b83fa 100644 --- a/tests/test_api_base.py +++ b/tests/test_api_base.py @@ -91,7 +91,7 @@ def _generate_compressed_files(file_path, delete=True): ("xz", None, ".xz", "-c -k"), ("lzop", None, ".lzo", "-c -k"), ("lz4", None, ".lz4", "-c -k"), - ("zstd", None, ".zst", ""), + ("zstd", None, ".zst", "-c -k"), # The "-P -C /" trick is used to avoid silly warnings: # "tar: Removing leading `/' from member names" ("bzip2", "tar", ".tar.bz2", "-c -j -O -P -C /"), -- cgit v1.2.3 From 479a6f8e86f140dbefd43fec2192f0a291491568 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Thu, 28 Oct 2021 12:23:30 +0100 Subject: tests: Fix import pattern for mock objects The legacy mock module contains a mock.mock submodule, but unittest.mock does not contain a redundant unittest.mock.mock. This bug was masked by the transparent fallback to the legacy mock module. The actual test only uses mock.patch(), so we can simplify by just importing the one member that we need. Fixes: a1ca1172 "tests: Use unittest.mock from Python standard library if possible" Signed-off-by: Simon McVittie Forwarded: https://github.com/intel/bmap-tools/pull/89 Applied-upstream: 3.7, commit:47908b5389d1f3de9306c0030856b3d3180ade86 Gbp-Pq: Name tests-Fix-import-pattern-for-mock-objects.patch --- tests/test_bmap_helpers.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_bmap_helpers.py b/tests/test_bmap_helpers.py index 56b079e..36c4557 100644 --- a/tests/test_bmap_helpers.py +++ b/tests/test_bmap_helpers.py @@ -22,9 +22,9 @@ import os import sys import tempfile try: - from unittest.mock import patch, mock + from unittest.mock import patch except ImportError: # for Python < 3.3 - from mock import patch, mock + from mock import patch try: from tempfile import TemporaryDirectory except ImportError: # for Python < 3.2 @@ -76,7 +76,7 @@ class TestBmapHelpers(unittest.TestCase): delete=True, dir=".", suffix=".txt") as fobj: fobj.write("1") fobj.flush() - mockobj = mock.patch.object(BmapHelpers, "ZFS_COMPAT_PARAM_PATH", fobj.name) + mockobj = patch.object(BmapHelpers, "ZFS_COMPAT_PARAM_PATH", fobj.name) with mockobj: self.assertTrue(BmapHelpers.is_zfs_configuration_compatible()) @@ -88,7 +88,7 @@ class TestBmapHelpers(unittest.TestCase): delete=True, dir=".", suffix=".txt") as fobj: fobj.write("0") fobj.flush() - mockobj = mock.patch.object(BmapHelpers, "ZFS_COMPAT_PARAM_PATH", fobj.name) + mockobj = patch.object(BmapHelpers, "ZFS_COMPAT_PARAM_PATH", fobj.name) with mockobj: self.assertFalse(BmapHelpers.is_zfs_configuration_compatible()) @@ -97,7 +97,7 @@ class TestBmapHelpers(unittest.TestCase): with tempfile.NamedTemporaryFile("a", prefix="testfile_", delete=True, dir=".", suffix=".txt") as fobj: - mockobj = mock.patch.object(BmapHelpers, "ZFS_COMPAT_PARAM_PATH", fobj.name) + mockobj = patch.object(BmapHelpers, "ZFS_COMPAT_PARAM_PATH", fobj.name) with self.assertRaises(BmapHelpers.Error): with mockobj: BmapHelpers.is_zfs_configuration_compatible() @@ -116,7 +116,7 @@ class TestBmapHelpers(unittest.TestCase): directory = os.path.dirname(__file__) filepath = os.path.join(directory, "BmapHelpers/file/does/not/exist") - mockobj = mock.patch.object(BmapHelpers, "ZFS_COMPAT_PARAM_PATH", filepath) + mockobj = patch.object(BmapHelpers, "ZFS_COMPAT_PARAM_PATH", filepath) with mockobj: self.assertFalse(BmapHelpers.is_zfs_configuration_compatible()) @@ -128,7 +128,7 @@ class TestBmapHelpers(unittest.TestCase): delete=True, dir=".", suffix=".img") as fobj: fobj.write("1") fobj.flush() - mockobj = mock.patch.object(BmapHelpers, "ZFS_COMPAT_PARAM_PATH", fobj.name) + mockobj = patch.object(BmapHelpers, "ZFS_COMPAT_PARAM_PATH", fobj.name) with mockobj: self.assertTrue(BmapHelpers.is_compatible_file_system(fobj.name)) @@ -140,7 +140,7 @@ class TestBmapHelpers(unittest.TestCase): delete=True, dir=".", suffix=".img") as fobj: fobj.write("0") fobj.flush() - mockobj = mock.patch.object(BmapHelpers, "ZFS_COMPAT_PARAM_PATH", fobj.name) + mockobj = patch.object(BmapHelpers, "ZFS_COMPAT_PARAM_PATH", fobj.name) with mockobj: self.assertFalse(BmapHelpers.is_compatible_file_system(fobj.name)) -- cgit v1.2.3