summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitri John Ledkov <xnox@ubuntu.com>2016-08-11 12:45:00 +0100
committerDimitri John Ledkov <xnox@ubuntu.com>2016-08-11 12:45:00 +0100
commitffd998be34a1a3d208106d051e85058ff9ac9667 (patch)
tree3f93a4a7362dcbdbadf5086e9404dc58ea22200a
parent3d69435ee3292b4b1db2d61c4784789d75883821 (diff)
Imported Upstream version 4.7
-rw-r--r--CHANGES18
-rw-r--r--Documentation/btrfs-balance.asciidoc153
-rw-r--r--Documentation/btrfs-convert.asciidoc33
-rw-r--r--Documentation/btrfs-filesystem.asciidoc23
-rw-r--r--Documentation/btrfs-man5.asciidoc34
-rw-r--r--Documentation/btrfs-scrub.asciidoc32
-rw-r--r--Documentation/btrfs-send.asciidoc1
-rw-r--r--Documentation/btrfs-subvolume.asciidoc47
-rw-r--r--Documentation/btrfs.asciidoc31
-rw-r--r--btrfs-corrupt-block.c51
-rw-r--r--btrfs-image.c1
-rw-r--r--btrfs-map-logical.c5
-rw-r--r--btrfstune.c1
-rw-r--r--cmds-balance.c136
-rw-r--r--cmds-check.c360
-rw-r--r--cmds-device.c7
-rw-r--r--cmds-fi-du.c9
-rw-r--r--cmds-fi-usage.c1
-rw-r--r--cmds-filesystem.c12
-rw-r--r--cmds-inspect.c4
-rw-r--r--cmds-qgroup.c2
-rw-r--r--cmds-quota.c1
-rw-r--r--cmds-replace.c4
-rw-r--r--cmds-scrub.c2
-rw-r--r--cmds-subvolume.c5
-rwxr-xr-xconfigure18
-rw-r--r--ctree.h10
-rw-r--r--disk-io.c2
-rw-r--r--extent-tree.c4
-rw-r--r--extent_io.c18
-rw-r--r--extent_io.h53
-rw-r--r--ioctl.h2
-rw-r--r--kerncompat.h7
-rw-r--r--mkfs.c31
-rw-r--r--print-tree.c2
-rw-r--r--qgroup-verify.c508
-rw-r--r--qgroup-verify.h3
-rw-r--r--raid6.c16
-rw-r--r--repair.c2
-rw-r--r--repair.h2
-rwxr-xr-xtests/clean-tests.sh2
-rwxr-xr-xtests/convert-tests/006-large-hole-extent/test.sh23
-rwxr-xr-xtests/convert-tests/007-unsupported-block-sizes/test.sh21
-rwxr-xr-xtests/fuzz-tests/001-simple-unmounted/test.sh2
-rwxr-xr-xtests/misc-tests/006-image-on-missing-device/test.sh4
-rwxr-xr-xtests/test-console.sh2
-rw-r--r--utils.c21
-rw-r--r--utils.h8
-rwxr-xr-xversion.sh2
-rw-r--r--volumes.c15
50 files changed, 1341 insertions, 410 deletions
diff --git a/CHANGES b/CHANGES
index db900588..eced7a98 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,5 +1,21 @@
+btrfs-progs-4.7 (2016-07-29)
+ * convert: fix creating discontig extents
+ * check: speed up traversing heavily reflinked extents within a file
+ * check: verify qgroups of higher levels
+ * check: repair can now fix wrong qgroup numbers
+ * balance: new option to run in the background
+ * defrag: default extent target size changed to 32MiB
+ * du: silently skip non-btrfs dirs/files
+ * documentation updates: btrfs(5), btrfs(8), balance, subvolume, scrub,
+ filesystem, convert
+ * bugfixes:
+ * unaligned access (reported for sparc64) in raid56 parity calculations
+ * use /bin/bash
+ * other stability fixes and cleanups
+ * more tests
+
btrfs-progs-4.6.1 (2016-06-24)
- * fi resize: negative resize argument accepted again (broken
+ * fi resize: negative resize argument accepted again
* qgroup rescan: fix skipping when rescan is in progress
* mkfs: initialize stripesize to correct value
* testsuite updates, mostly convert tests
diff --git a/Documentation/btrfs-balance.asciidoc b/Documentation/btrfs-balance.asciidoc
index 7df40b9c..c456898e 100644
--- a/Documentation/btrfs-balance.asciidoc
+++ b/Documentation/btrfs-balance.asciidoc
@@ -51,17 +51,32 @@ NOTE: A short syntax *btrfs balance <path>* works due to backward compatibility
but is deprecated and should not be used anymore. Use *btrfs balance start*
command instead.
+PERFORMANCE IMPLICATIONS
+------------------------
+
+Balance operation is intense namely in the IO respect, but can be also CPU
+intense. It affects other actions on the filesystem. There are typically lots
+of data being copied from one location to another, and lots of metadata get
+updated.
+
+Depending on the actual block group layout, it can be also seek-heavy. The
+performance on rotational devices is noticeably worse than on SSDs or fast
+arrays.
+
SUBCOMMAND
----------
*cancel* <path>::
-cancel running or paused balance
+cancel running or paused balance, the command will block and wait until the
+actually processed blockgroup is finished
*pause* <path>::
pause running balance operation, this will store the state of the balance
progress and used filters to the filesystem
*resume* <path>::
-resume interrupted balance
+resume interrupted balance, the balance status must be stored on the filesystem
+from previous run, eg. after it was forcibly interrupted and mounted again with
+'skip_balance'
*start* [options] <path>::
start the balance operation according to the specified filters, no filters
@@ -73,6 +88,10 @@ filesystem size. To prevent starting a full balance by accident, the user is
warned and has a few seconds to cancel the operation before it starts. The
warning and delay can be skipped with '--full-balance' option.
+
+Please note that the filters must be written together with the '-d', '-m' and
+'-s' options, because they're optional and bare '-d' etc alwo work and mean no
+filters.
++
`Options`
+
-d[<filters>]::::
@@ -85,6 +104,9 @@ act on system chunks (requires '-f'), see `FILTERS` section for details about 'f
be verbose and print balance filter arguments
-f::::
force reducing of metadata integrity, eg. when going from 'raid1' to 'single'
+--background|--bg::::
+run the balance operation asynchronously in the background, uses `fork`(2) to
+start the process that calls the kernel ioctl
*status* [-v] <path>::
Show status of running or paused balance.
@@ -94,7 +116,7 @@ If '-v' option is given, output will be verbose.
FILTERS
-------
From kernel 3.3 onwards, btrfs balance can limit its action to a subset of the
-full filesystem, and can be used to change the replication configuration (e.g.
+whole filesystem, and can be used to change the replication configuration (e.g.
moving data from single to RAID1). This functionality is accessed through the
'-d', '-m' or '-s' options to btrfs balance start, which filter on data,
metadata and system blocks respectively.
@@ -140,6 +162,9 @@ parameters.
+
NOTE: starting with kernel 4.5, the 'data' chunks can be converted to/from the
'DUP' profile on a single device.
++
+NOTE: starting with kernel 4.6, all profiles can be converted to/from 'DUP' on
+multi-device filesystems.
*limit=<number>*::
*limit=<range>*::
@@ -206,6 +231,128 @@ Conversion to profiles based on striping (RAID0, RAID5/6) require the work
space on each device. An interrupted balance may leave partially filled block
groups that might consume the work space.
+EXAMPLES
+--------
+
+A more comprehensive example when going from one to multiple devices, and back,
+can be found in section 'TYPICAL USECASES' of `btrfs-device`(8).
+
+MAKING BLOCK GROUP LAYOUT MORE COMPACT
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The layout of block groups is not normally visible, most tools report only
+summarized numbers of free or used space, but there are still some hints
+provided.
+
+Let's use the following real life example and start with the output:
+
+--------------------
+$ btrfs fi df /path
+Data, single: total=75.81GiB, used=64.44GiB
+System, RAID1: total=32.00MiB, used=20.00KiB
+Metadata, RAID1: total=15.87GiB, used=8.84GiB
+GlobalReserve, single: total=512.00MiB, used=0.00B
+--------------------
+
+Roughly calculating for data, '75G - 64G = 11G', the used/total ratio is
+about '85%'. How can we can interpret that:
+
+* chunks are filled by 85% on average, ie. the 'usage' filter with anything
+ smaller than 85 will likely not affect anything
+* in a more realistic scenario, the space is distributed unevenly, we can
+ assume there are completely used chunks and the remaining are partially filled
+
+Compacting the layout could be used on both. In the former case it would spread
+data of a given chunk to the others and removing it. Here we can estimate that
+roughly 850 MiB of data have to be moved (85% of a 1 GiB chunk).
+
+In the latter case, targeting the partially used chunks will have to move less
+data and thus will be faster. A typical filter command would look like:
+
+--------------------
+# btrfs balance start -dusage=50 /path
+Done, had to relocate 2 out of 97 chunks
+
+$ btrfs fi df /path
+Data, single: total=74.03GiB, used=64.43GiB
+System, RAID1: total=32.00MiB, used=20.00KiB
+Metadata, RAID1: total=15.87GiB, used=8.84GiB
+GlobalReserve, single: total=512.00MiB, used=0.00B
+--------------------
+
+As you can see, the 'total' amount of data is decreased by just 1 GiB, which is
+an expected result. Let's see what will happen when we increase the estimated
+usage filter.
+
+--------------------
+# btrfs balance start -dusage=85 /path
+Done, had to relocate 13 out of 95 chunks
+
+$ btrfs fi df /path
+Data, single: total=68.03GiB, used=64.43GiB
+System, RAID1: total=32.00MiB, used=20.00KiB
+Metadata, RAID1: total=15.87GiB, used=8.85GiB
+GlobalReserve, single: total=512.00MiB, used=0.00B
+--------------------
+
+Now the used/total ratio is about 94% and we moved about '74G - 68G = 6G' of
+data to the remaining blockgroups, ie. the 6GiB are now free of filesystem
+structures, and can be reused for new data or metadata block groups.
+
+We can do a similar exercise with the metadata block groups, but this should
+not be typically necessary, unless the used/total ration is really off. Here
+the ratio is roughly 50% but the difference as an absolute number is "a few
+gigabytes", which can be considered normal for a workload with snapshots or
+reflinks updated frequently.
+
+--------------------
+# btrfs balance start -musage=50 /path
+Done, had to relocate 4 out of 89 chunks
+
+$ btrfs fi df /path
+Data, single: total=68.03GiB, used=64.43GiB
+System, RAID1: total=32.00MiB, used=20.00KiB
+Metadata, RAID1: total=14.87GiB, used=8.85GiB
+GlobalReserve, single: total=512.00MiB, used=0.00B
+--------------------
+
+Just 1 GiB decrease, which possibly means there are block groups with good
+utilization. Making the metadata layout more compact would in turn require
+updating more metadata structures, ie. lots of IO. As running out of metadata
+space is a more severe problem, it's not necessary to keep the utilization
+ratio too high. For the purpose of this example, let's see the effects of
+further compaction:
+
+--------------------
+# btrfs balance start -musage=70 /path
+Done, had to relocate 13 out of 88 chunks
+
+$ btrfs fi df .
+Data, single: total=68.03GiB, used=64.43GiB
+System, RAID1: total=32.00MiB, used=20.00KiB
+Metadata, RAID1: total=11.97GiB, used=8.83GiB
+GlobalReserve, single: total=512.00MiB, used=0.00B
+--------------------
+
+GETTING RID OF COMPLETELY UNUSED BLOCK GROUPS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Normally the balance operation needs a work space, to temporarily move the
+data before the old block groups gets removed. If there's no work space, it
+ends with 'no space left'.
+
+There's a special case when the block groups are completely unused, possibly
+left after removing lots of files or deleting snapshots. Removing empty block
+groups is automatic since 3.18. The same can be achieved manually with a
+notable exception that this operation does not require the work space. Thus it
+can be used to reclaim unused block groups to make it available.
+
+--------------------
+# btrfs balance start -dusage=0 /path
+--------------------
+
+This should lead to decrease in the 'total' numbers in the *btrfs fi df* output.
+
EXIT STATUS
-----------
*btrfs balance* returns a zero exit status if it succeeds. Non zero is
diff --git a/Documentation/btrfs-convert.asciidoc b/Documentation/btrfs-convert.asciidoc
index ab3577db..ecc157cd 100644
--- a/Documentation/btrfs-convert.asciidoc
+++ b/Documentation/btrfs-convert.asciidoc
@@ -3,7 +3,7 @@ btrfs-convert(8)
NAME
----
-btrfs-convert - convert from ext2/3/4 filesystem to btrfs
+btrfs-convert - convert from ext2/3/4 filesystem to btrfs in-place
SYNOPSIS
--------
@@ -28,6 +28,11 @@ steps to transform the btrfs filesystem to a more compact layout. The
conversion inherits the original data block fragmentation and the metadata
blocks are bound to the original free space layout.
+Due to different constraints, it's possible to convert only filesystem that
+have supported data block size (ie. the same that would be valid for
+'mkfs.btrfs'). This is typically the system page size (4KiB on x86_64
+machines).
+
**REMOVE THE ORIGINAL FILESYSTEM METADATA**
By removing the 'ext2_saved' subvolume, all metadata of the original filesystem
@@ -36,7 +41,7 @@ will be removed:
# btrfs subvolume delete /mnt/ext2_saved
At this point it's not possible to do rollback. The filesystem is usable but may
-be impacted by the fragmentation.
+be impacted by the fragmentation inherited from the original filesystem.
**MAKE FILE DATA MORE CONTIGUOUS**
@@ -45,8 +50,8 @@ filesystem. This will attempt to make file extents more contiguous.
# btrfs filesystem defrag -v -r -f -t 32M /mnt/btrfs
-Verbose recursive defragmentation ('-v', '-r'), flush data per-file ('-f') with target
-extent size 32M ('-t').
+Verbose recursive defragmentation ('-v', '-r'), flush data per-file ('-f') with
+target extent size 32MiB ('-t').
**ATTEMPT TO MAKE BTRFS METADATA MORE COMPACT**
@@ -54,18 +59,19 @@ Optional but recommended step.
The metadata block groups after conversion may be smaller than the default size
(256MiB or 1GiB). Running a balance will attempt to merge the block groups.
-This depends on the free space layout (and fragmentation) and may fail. This is
-a soft error leaving the filesystem usable but the block group layout may
-remain unchanged.
+This depends on the free space layout (and fragmentation) and may fail due to
+lack of enough work space. This is a soft error leaving the filesystem usable
+but the block group layout may remain unchanged.
-Note that balance operation takes a lot of time.
+Note that balance operation takes a lot of time, please see also
+`btrfs-balance`(8).
# btrfs balance start -m /mnt/btrfs
OPTIONS
-------
-d|--no-datasum::
-disable data checksum calculations and set NODATASUM file flag, this can speed
+disable data checksum calculations and set the NODATASUM file flag, this can speed
up the conversion
-i|--no-xattr::
ignore xattrs and ACLs of files
@@ -84,17 +90,20 @@ set filesystem label during conversion
-L|--copy-label::
use label from the converted filesystem
-O|--features <feature1>[,<feature2>...]::
-A list of filesystem features turned on at btrfs-convert time. Not all features
+A list of filesystem features turned on at conversion time. Not all features
are supported by old kernels. To disable a feature, prefix it with '^'.
+Description of the features is in section 'FILESYSTEM FEATURES' of
+`mkfs.btrfs`(8).
+
To see all available features that btrfs-convert supports run:
+
+btrfs-convert -O list-all+
+
-p|--progress::
-show progress of conversion, on by default
+show progress of conversion (a heartbeat indicator and number of inodes
+processed), on by default
--no-progress::
-disable detailed progress and show only the main phases of conversion
+disable progress and show only the main phases of conversion
EXIT STATUS
-----------
diff --git a/Documentation/btrfs-filesystem.asciidoc b/Documentation/btrfs-filesystem.asciidoc
index dc032faa..9782af9b 100644
--- a/Documentation/btrfs-filesystem.asciidoc
+++ b/Documentation/btrfs-filesystem.asciidoc
@@ -11,9 +11,10 @@ SYNOPSIS
DESCRIPTION
-----------
-*btrfs filesystem* is used to do the whole filesystem level tasks, including
-all the regular filesystem operations like resizing, space stats, label
-setting/getting, and defragmentation.
+*btrfs filesystem* is used to perform several whole filesystem level tasks,
+including all the regular filesystem operations like resizing, space stats,
+label setting/getting, and defragmentation. There are other whole filesystem
+taks like scrub or balance that are grouped in separate commands.
SUBCOMMAND
----------
@@ -116,15 +117,23 @@ compression. See also section 'EXAMPLES'.
-r::::
defragment files recursively in given directories
-f::::
-flush data for each file before going to the next file. This will limit the amount
-of dirty data to current file, otherwise the amount cumulates from several files
-and may increase system load.
+flush data for each file before going to the next file.
++
+This will limit the amount of dirty data to current file, otherwise the amount
+cumulates from several files and will increase system load. This can also lead
+to ENOSPC if there's too much dirty data to write and it's not possible to make
+the reservations for the new data (ie. how the COW design works).
++
-s <start>[kKmMgGtTpPeE]::::
defragmentation will start from the given offset, default is beginning of a file
-l <len>[kKmMgGtTpPeE]::::
defragment only up to 'len' bytes, default is the file size
-t <size>[kKmMgGtTpPeE]::::
-target extent size, do not touch extents bigger than 'size'
+target extent size, do not touch extents bigger than 'size', default: 32M
++
+The value is only advisory and the final size of the extents may differ,
+depending on the state of the free space and fragmentation or other internal
+logic. Reasonable values are from tens to hundreds of megabytes.
*du* [options] <path> [<path>..]::
Calculate disk usage of the target files using FIEMAP. For individual
diff --git a/Documentation/btrfs-man5.asciidoc b/Documentation/btrfs-man5.asciidoc
index 7dd323fa..467f11bf 100644
--- a/Documentation/btrfs-man5.asciidoc
+++ b/Documentation/btrfs-man5.asciidoc
@@ -14,6 +14,8 @@ tools. Currently covers:
2. file attributes
+3. control device
+
MOUNT OPTIONS
-------------
@@ -455,11 +457,43 @@ When set on a directory, all newly created files will inherit this attribute.
No other attributes are supported. For the complete list please refer to the
`chattr`(1) manual page.
+CONTROL DEVICE
+--------------
+
+There's a character special device `/dev/btrfs-control` with major and minor
+numbers 10 and 234 (the device can be found under the 'misc' category).
+
+--------------------
+$ ls -l /dev/btrfs-control
+crw------- 1 root root 10, 234 Jan 1 12:00 /dev/btrfs-control
+--------------------
+
+The device accepts some ioctl calls that can perform following actions on the
+filesyste module:
+
+* scan devices for btrfs filesytem (ie. to let multi-device filesystems mount
+ automatically) and register them with the kernel module
+* similar to scan, but also wait until the device scanning process is finished
+ for a given filesystem
+* get the supported features (can be also found under '/sys/fs/btrfs/features')
+
+
+The device is usually created by ..., but can be created manually:
+
+--------------------
+# mknod --mode=600 c 10 234 /dev/btrfs-control
+--------------------
+
+The device is not strictly required but the device scanning will not work and a
+workaround would need to be used to mount a multi-device filesystem. The mount
+option 'device' can trigger the device scanning during mount.
+
SEE ALSO
--------
`acl`(5),
`btrfs`(8),
`chattr`(1),
`fstrim`(8),
+`ioctl`(2),
`mkfs.btrfs`(8),
`mount`(8)
diff --git a/Documentation/btrfs-scrub.asciidoc b/Documentation/btrfs-scrub.asciidoc
index 83ddcaa7..40e793c2 100644
--- a/Documentation/btrfs-scrub.asciidoc
+++ b/Documentation/btrfs-scrub.asciidoc
@@ -12,16 +12,30 @@ SYNOPSIS
DESCRIPTION
-----------
*btrfs scrub* is used to scrub a btrfs filesystem, which will read all data
-and metadata blocks from all disks and verify checksums. Automatically repair
+and metadata blocks from all devices and verify checksums. Automatically repair
corrupted blocks if there's a correct copy available.
+NOTE: Scrub is not a filesystem checker (fsck) and does not verify nor repair
+structural damage in the filesystem.
+
+The user is supposed to run it manually or via a periodic system service. The
+recommended period is a month but could be less. The estimated device bandwidth
+utilization is about 80% on an idle filesytem. The IO priority class is by
+default 'idle' so background scrub should not interfere with normal filesystem
+operation significantly.
+
+The scrubbing status is recorded in '/var/lib/btrfs/' in textual files named
+'scrub.status.UUID' for a filesystem identified by the given UUID. (An
+itermediate progress is communicated through a named pipe in file
+'scrub.progress.UUID' in the same directory.) The status file is updated
+periodically every 5 seconds. An resumed scrub will continue from the last
+saved position.
+
SUBCOMMAND
----------
*cancel* <path>|<device>::
If a scrub is running on the filesystem identified by 'path>' cancel it.
+
-Progress is saved in the scrub progress file ('/var/lib/btrfs/scrub.status.UUID')
-and scrubbing can be resumed later using the *btrfs scrub resume* command.
If a 'device' is specified, the corresponding filesystem is found and
*btrfs scrub cancel* behaves as if it was called on that filesystem.
@@ -40,9 +54,6 @@ Start a scrub on all devices of the filesystem identified by 'path' or on
a single 'device'. If a scrub is already running, the new one fails.
+
Without options, scrub is started as a background process.
-Progress can be obtained with the *btrfs scrub status* command. Scrubbing
-involves reading all data from all disks and verifying checksums. Errors are
-corrected along the way if possible.
+
The default IO priority of scrub is the idle class. The priority can be
configured similar to the `ionice`(1) syntax using '-c' and '-n' options.
@@ -52,11 +63,11 @@ configured similar to the `ionice`(1) syntax using '-c' and '-n' options.
-B::::
do not background and print scrub statistics when finished
-d::::
-print separate statistics for each device of the filesystem ('-B' only)
+print separate statistics for each device of the filesystem ('-B' only) at the end
-q::::
be quiet, omit error messages and statistics
-r::::
-read only mode, do not attempt to correct anything, can be run on a read-only
+run in read-only mode, do not attempt to correct anything, can be run on a read-only
filesystem
-R::::
print raw statistics per-device instead of a summary
@@ -66,8 +77,8 @@ set IO priority class (see `ionice`(1) manpage)
set IO priority classdata (see `ionice`(1) manpage)
-f::::
force starting new scrub even if a scrub is already running,
-this is useful when scrub status file is damaged and reports a running
-scrub although it is not
+this can useful when scrub status file is damaged and reports a running
+scrub although it is not, but should not normally be necessary
*status* [-d] <path>|<device>::
Show status of a running scrub for the filesystem identified by 'path' or
@@ -95,3 +106,4 @@ further details.
SEE ALSO
--------
`mkfs.btrfs`(8),
+`ionice`(1)
diff --git a/Documentation/btrfs-send.asciidoc b/Documentation/btrfs-send.asciidoc
index 47b0b047..96659eed 100644
--- a/Documentation/btrfs-send.asciidoc
+++ b/Documentation/btrfs-send.asciidoc
@@ -53,6 +53,7 @@ send in 'NO_FILE_DATA' mode
The output stream does not contain any file
data and thus cannot be used to transfer changes. This mode is faster and
useful to show the differences in metadata.
+
-v|--verbose::
enable verbose output, print generated commands in a readable form, (each
occurrence of this option increases the verbosity level)
diff --git a/Documentation/btrfs-subvolume.asciidoc b/Documentation/btrfs-subvolume.asciidoc
index fb64aa4a..2044b07d 100644
--- a/Documentation/btrfs-subvolume.asciidoc
+++ b/Documentation/btrfs-subvolume.asciidoc
@@ -3,7 +3,7 @@ btrfs-subvolume(8)
NAME
----
-btrfs-subvolume - control btrfs subvolume(s)
+btrfs-subvolume - manage btrfs subvolumes
SYNOPSIS
--------
@@ -11,35 +11,39 @@ SYNOPSIS
DESCRIPTION
-----------
-*btrfs subvolume* is used to control the filesystem to create/delete/list/show
-subvolumes and snapshots.
+*btrfs subvolume* is used to create/delete/list/show btrfs subvolumes and
+snapshots.
SUBVOLUME AND SNAPSHOT
----------------------
-A subvolume in btrfs is not like an LVM logical volume, which is quite
-independent from each other, a btrfs subvolume has its hierarchy and relations
-between other subvolumes.
-A subvolume in btrfs can be accessed in two ways.
+A subvolume is a part of filesystem with it's own and independent
+file/directory hierarchy. Subvolumes can share file extents. A snapshot is
+also subvolume, but with a given initial content of the original subvolume.
-1. From the parent subvolume +
-When accessing from the parent subvolume, the subvolume can be used just
-like a directory. It can have child subvolumes and its own files/directories.
+NOTE: A subvolume in btrfs is not like an LVM logical volume, which is
+block-level snapshot while btrfs subvolumes are file extent-based.
-2. Separate mounted filesystem +
-When `mount`(8) using 'subvol' or 'subvolid' mount option, one can access
-files/directories/subvolumes inside it, but nothing in parent subvolumes.
+A subvolume looks like a normal directory, with some additional operations
+described below. Subvolumes can be renamed or moved, nesting subvolumes is not
+restricted but has some implications regarding snapshotting.
-Also every btrfs filesystem has a default subvolume as its initially top-level
-subvolume, whose subvolume id is 5. (0 is also acceptable as an alias.)
+A subvolume in btrfs can be accessed in two ways:
-A btrfs snapshot is much like a subvolume, but shares its data(and metadata)
-with other subvolume/snapshot. Due to the capabilities of COW, modifications
-inside a snapshot will only show in a snapshot but not in its source subvolume.
+* like any other directory that is accessible to the user
+* like a separately mounted filesystem (options 'subvol' or 'subvolid')
-Although in btrfs, subvolumes/snapshots are treated as directories, only
-subvolume/snapshot can be the source of a snapshot, snapshot can not be made
-from normal directories.
+In the latter case the parent directory is not visible and accessible. This is
+similar to a bind mount, and in fact the subvolume mount does exactly that.
+
+A freshly created filesystem is also a subvolume, called 'top-level',
+internally has an id 5. This subvolume cannot be removed or replaced by another
+subvolume. This is also the subvolume that will be mounted by default, unless
+the default subvolume has been changed (see subcommand 'set-default').
+
+A snapshot is a subvolume like any other, with given initial content. By
+default, snapshots are created read-write. File modifications in a snapshot
+do not affect the files in the original subvolume.
SUBCOMMAND
-----------
@@ -178,5 +182,6 @@ further details.
SEE ALSO
--------
`mkfs.btrfs`(8),
+`mount`(8),
`btrfs-quota`(8),
`btrfs-qgroup`(8),
diff --git a/Documentation/btrfs.asciidoc b/Documentation/btrfs.asciidoc
index 6a77a852..1c37abf8 100644
--- a/Documentation/btrfs.asciidoc
+++ b/Documentation/btrfs.asciidoc
@@ -3,7 +3,7 @@ btrfs(8)
NAME
----
-btrfs - control a btrfs filesystem
+btrfs - a toolbox to manage btrfs filesystems
SYNOPSIS
--------
@@ -15,6 +15,10 @@ The *btrfs* utility is a toolbox for managing btrfs filesystems. There are
command groups to work with subvolumes, devices, for whole filesystem or other
specific actions. See section *COMMANDS*.
+There are also standalone tools for some tasks like *btrfs-convert* or
+*btrfstune* that were separate historically and/or haven't been merged to the
+main utility. See section 'STANDALONE TOOLS' for more details.
+
COMMAND SYNTAX
--------------
@@ -96,6 +100,27 @@ COMMANDS
Create/delete/list/manage btrfs subvolume. +
See `btrfs-subvolume`(8) for details.
+STANDALONE TOOLS
+----------------
+
+There are several standalone tools to provide certain functionality. If the
+functionality proves to be useful, the standalone tools are declared obsolete
+and their functionality copied to the main tool. The deprecation period is long
+(years) and the obsolete binaries are still provided.
+
+Tools that are still in active use without an equivalent in *btrfs*:
+
+*btrfs-convert*:: in-place conversion from ext2/3/4 filesystems to btrfs
+*btrfstune*:: tweak some filesystem properties on a unmounted filesystem
+*btrfs-select-super*:: rescue tool to overwrite primary superblock from a spare copy
+*btrfs-find-root*:: rescue helper to find tree roots in a filesystem
+
+Deprecated and obsolete tools:
+
+*btrfs-debug-tree*:: moved to *btrfs inspect-internal dump-tree*
+*btrfs-show-super*:: moved to *btrfs inspect-internal dump-super*
+*btrfs-zero-log*:: moved to *btrfs rescue zero-log*
+
EXIT STATUS
-----------
*btrfs* returns a zero exit status if it succeeds. Non zero is returned in
@@ -109,9 +134,9 @@ further details.
SEE ALSO
--------
-`mkfs.btrfs`(8), `ionice`(1),
`btrfs-balance`(8),
`btrfs-check`(8),
+`btrfs-convert`(8),
`btrfs-device`(8),
`btrfs-filesystem`(8),
`btrfs-inspect-internal`(8),
@@ -125,3 +150,5 @@ SEE ALSO
`btrfs-scrub`(8),
`btrfs-send`(8),
`btrfs-subvolume`(8),
+`btrfstune`(8),
+`mkfs.btrfs`(8)
diff --git a/btrfs-corrupt-block.c b/btrfs-corrupt-block.c
index 020afab5..a488e479 100644
--- a/btrfs-corrupt-block.c
+++ b/btrfs-corrupt-block.c
@@ -34,27 +34,28 @@
#define FIELD_BUF_LEN 80
-static struct extent_buffer *debug_corrupt_block(struct btrfs_root *root,
- u64 bytenr, u32 blocksize, u64 copy)
+static int debug_corrupt_block(struct extent_buffer *eb,
+ struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 copy)
{
int ret;
- struct extent_buffer *eb;
u64 length;
struct btrfs_multi_bio *multi = NULL;
struct btrfs_device *device;
int num_copies;
int mirror_num = 1;
- eb = btrfs_find_create_tree_block(root->fs_info, bytenr, blocksize);
- if (!eb)
- return NULL;
-
length = blocksize;
while (1) {
ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
eb->start, &length, &multi,
mirror_num, NULL);
- BUG_ON(ret);
+ if (ret) {
+ error("cannot map block %llu length %llu mirror %d: %d",
+ (unsigned long long)eb->start,
+ (unsigned long long)length,
+ mirror_num, ret);
+ return ret;
+ }
device = multi->stripes[0].dev;
eb->fd = device->fd;
device->total_ios++;
@@ -68,10 +69,22 @@ static struct extent_buffer *debug_corrupt_block(struct btrfs_root *root,
if (!copy || mirror_num == copy) {
ret = read_extent_from_disk(eb, 0, eb->len);
+ if (ret < 0) {
+ error("cannot read eb bytenr %llu: %s",
+ (unsigned long long)eb->dev_bytenr,
+ strerror(-ret));
+ return ret;
+ }
printf("corrupting %llu copy %d\n", eb->start,
mirror_num);
memset(eb->data, 0, eb->len);
- write_extent_to_disk(eb);
+ ret = write_extent_to_disk(eb);
+ if (ret < 0) {
+ error("cannot write eb bytenr %llu: %s",
+ (unsigned long long)eb->dev_bytenr,
+ strerror(-ret));
+ return ret;
+ }
fsync(eb->fd);
}
@@ -84,7 +97,8 @@ static struct extent_buffer *debug_corrupt_block(struct btrfs_root *root,
if (mirror_num > num_copies)
break;
}
- return eb;
+
+ return 0;
}
static void print_usage(int ret)
@@ -1018,7 +1032,6 @@ int main(int argc, char **argv)
struct cache_tree root_cache;
struct btrfs_key key;
struct btrfs_root *root;
- struct extent_buffer *eb;
char *dev;
/* chunk offset can be 0,so change to (u64)-1 */
u64 logical = (u64)-1;
@@ -1295,8 +1308,20 @@ int main(int argc, char **argv)
if (corrupt_block_keys) {
corrupt_keys_in_block(root, logical);
} else {
- eb = debug_corrupt_block(root, logical,
- root->sectorsize, copy);
+ struct extent_buffer *eb;
+
+ eb = btrfs_find_create_tree_block(root->fs_info,
+ logical, root->sectorsize);
+ if (!eb) {
+ error(
+ "not enough memory to allocate extent buffer for bytenr %llu",
+ (unsigned long long)logical);
+ ret = 1;
+ goto out_close;
+ }
+
+ debug_corrupt_block(eb, root, logical, root->sectorsize,
+ copy);
free_extent_buffer(eb);
}
logical += root->sectorsize;
diff --git a/btrfs-image.c b/btrfs-image.c
index 6feeb46c..af5437b4 100644
--- a/btrfs-image.c
+++ b/btrfs-image.c
@@ -2598,6 +2598,7 @@ static int restore_metadump(const char *input, FILE *out, int old_restore,
if (stat(target, &st)) {
fprintf(stderr, "statting %s failed\n", target);
close_ctree(info->chunk_root);
+ free(cluster);
return 1;
}
diff --git a/btrfs-map-logical.c b/btrfs-map-logical.c
index fd0286d5..f421a50f 100644
--- a/btrfs-map-logical.c
+++ b/btrfs-map-logical.c
@@ -329,6 +329,11 @@ int main(int argc, char **argv)
goto out_close_fd;
if (ret > 0)
break;
+ /* check again if there is overlap. */
+ if (cur_logical + cur_len < logical ||
+ cur_logical >= logical + bytes)
+ break;
+
real_logical = max(logical, cur_logical);
real_len = min(logical + bytes, cur_logical + cur_len) -
real_logical;
diff --git a/btrfstune.c b/btrfstune.c
index f731e3d2..ac559fb9 100644
--- a/btrfstune.c
+++ b/btrfstune.c
@@ -399,7 +399,6 @@ int main(int argc, char *argv[])
int ret;
u64 super_flags = 0;
- optind = 1;
while(1) {
static const struct option long_options[] = {
{ "help", no_argument, NULL, GETOPT_VAL_HELP},
diff --git a/cmds-balance.c b/cmds-balance.c
index 708bbf45..f17345ed 100644
--- a/cmds-balance.c
+++ b/cmds-balance.c
@@ -20,6 +20,9 @@
#include <unistd.h>
#include <getopt.h>
#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
#include <errno.h>
#include "kerncompat.h"
@@ -427,7 +430,6 @@ static int do_balance(const char *path, struct btrfs_ioctl_balance_args *args,
{
int fd;
int ret;
- int e;
DIR *dirstream = NULL;
fd = btrfs_open_dir(path, &dirstream, 1);
@@ -453,30 +455,28 @@ static int do_balance(const char *path, struct btrfs_ioctl_balance_args *args,
}
ret = ioctl(fd, BTRFS_IOC_BALANCE_V2, args);
- e = errno;
-
if (ret < 0) {
/*
* older kernels don't have the new balance ioctl, try the
* old one. But, the old one doesn't know any filters, so
* don't fall back if they tried to use the fancy new things
*/
- if (e == ENOTTY && !(flags & BALANCE_START_FILTERS)) {
+ if (errno == ENOTTY && !(flags & BALANCE_START_FILTERS)) {
ret = do_balance_v1(fd);
if (ret == 0)
goto out;
- e = errno;
}
- if (e == ECANCELED) {
+ if (errno == ECANCELED) {
if (args->state & BTRFS_BALANCE_STATE_PAUSE_REQ)
fprintf(stderr, "balance paused by user\n");
if (args->state & BTRFS_BALANCE_STATE_CANCEL_REQ)
fprintf(stderr, "balance canceled by user\n");
ret = 0;
} else {
- error("error during balancing '%s': %s", path, strerror(e));
- if (e != EINPROGRESS)
+ error("error during balancing '%s': %s", path,
+ strerror(errno));
+ if (errno != EINPROGRESS)
fprintf(stderr,
"There may be more info in syslog - try dmesg | tail\n");
ret = 1;
@@ -510,6 +510,8 @@ static const char * const cmd_balance_start_usage[] = {
"-v be verbose",
"-f force reducing of metadata integrity",
"--full-balance do not print warning and do not delay start",
+ "--background|--bg",
+ " run the balance as a background process",
NULL
};
@@ -520,14 +522,15 @@ static int cmd_balance_start(int argc, char **argv)
&args.meta, NULL };
int force = 0;
int verbose = 0;
+ int background = 0;
unsigned start_flags = 0;
int i;
memset(&args, 0, sizeof(args));
- optind = 1;
while (1) {
- enum { GETOPT_VAL_FULL_BALANCE = 256 };
+ enum { GETOPT_VAL_FULL_BALANCE = 256,
+ GETOPT_VAL_BACKGROUND = 257 };
static const struct option longopts[] = {
{ "data", optional_argument, NULL, 'd'},
{ "metadata", optional_argument, NULL, 'm' },
@@ -536,6 +539,9 @@ static int cmd_balance_start(int argc, char **argv)
{ "verbose", no_argument, NULL, 'v' },
{ "full-balance", no_argument, NULL,
GETOPT_VAL_FULL_BALANCE },
+ { "background", no_argument, NULL,
+ GETOPT_VAL_BACKGROUND },
+ { "bg", no_argument, NULL, GETOPT_VAL_BACKGROUND },
{ NULL, 0, NULL, 0 }
};
@@ -574,6 +580,9 @@ static int cmd_balance_start(int argc, char **argv)
case GETOPT_VAL_FULL_BALANCE:
start_flags |= BALANCE_START_NOWARN;
break;
+ case GETOPT_VAL_BACKGROUND:
+ background = 1;
+ break;
default:
usage(cmd_balance_start_usage);
}
@@ -626,6 +635,40 @@ static int cmd_balance_start(int argc, char **argv)
args.flags |= BTRFS_BALANCE_FORCE;
if (verbose)
dump_ioctl_balance_args(&args);
+ if (background) {
+ switch (fork()) {
+ case (-1):
+ error("unable to fork to run balance in background");
+ return 1;
+ case (0):
+ setsid();
+ switch(fork()) {
+ case (-1):
+ error(
+ "unable to fork to run balance in background");
+ exit(1);
+ case (0):
+ /*
+ * Read the return value to silence compiler
+ * warning. Change to / should succeed and
+ * we're not in a security-sensitive context.
+ */
+ i = chdir("/");
+ close(0);
+ close(1);
+ close(2);
+ open("/dev/null", O_RDONLY);
+ open("/dev/null", O_WRONLY);
+ open("/dev/null", O_WRONLY);
+ break;
+ default:
+ exit(0);
+ }
+ break;
+ default:
+ exit(0);
+ }
+ }
return do_balance(argv[optind], &args, start_flags);
}
@@ -641,7 +684,6 @@ static int cmd_balance_pause(int argc, char **argv)
const char *path;
int fd;
int ret;
- int e;
DIR *dirstream = NULL;
clean_args_no_options(argc, argv, cmd_balance_pause_usage);
@@ -656,19 +698,17 @@ static int cmd_balance_pause(int argc, char **argv)
return 1;
ret = ioctl(fd, BTRFS_IOC_BALANCE_CTL, BTRFS_BALANCE_CTL_PAUSE);
- e = errno;
- close_file_or_dir(fd, dirstream);
-
if (ret < 0) {
error("balance pause on '%s' failed: %s", path,
- (e == ENOTCONN) ? "Not running" : strerror(e));
- if (e == ENOTCONN)
- return 2;
+ (errno == ENOTCONN) ? "Not running" : strerror(errno));
+ if (errno == ENOTCONN)
+ ret = 2;
else
- return 1;
+ ret = 1;
}
- return 0;
+ close_file_or_dir(fd, dirstream);
+ return ret;
}
static const char * const cmd_balance_cancel_usage[] = {
@@ -682,7 +722,6 @@ static int cmd_balance_cancel(int argc, char **argv)
const char *path;
int fd;
int ret;
- int e;
DIR *dirstream = NULL;
clean_args_no_options(argc, argv, cmd_balance_cancel_usage);
@@ -697,19 +736,17 @@ static int cmd_balance_cancel(int argc, char **argv)
return 1;
ret = ioctl(fd, BTRFS_IOC_BALANCE_CTL, BTRFS_BALANCE_CTL_CANCEL);
- e = errno;
- close_file_or_dir(fd, dirstream);
-
if (ret < 0) {
error("balance cancel on '%s' failed: %s", path,
- (e == ENOTCONN) ? "Not in progress" : strerror(e));
- if (e == ENOTCONN)
- return 2;
+ (errno == ENOTCONN) ? "Not in progress" : strerror(errno));
+ if (errno == ENOTCONN)
+ ret = 2;
else
- return 1;
+ ret = 1;
}
- return 0;
+ close_file_or_dir(fd, dirstream);
+ return ret;
}
static const char * const cmd_balance_resume_usage[] = {
@@ -725,7 +762,6 @@ static int cmd_balance_resume(int argc, char **argv)
DIR *dirstream = NULL;
int fd;
int ret;
- int e;
clean_args_no_options(argc, argv, cmd_balance_resume_usage);
@@ -742,28 +778,25 @@ static int cmd_balance_resume(int argc, char **argv)
args.flags |= BTRFS_BALANCE_RESUME;
ret = ioctl(fd, BTRFS_IOC_BALANCE_V2, &args);
- e = errno;
- close_file_or_dir(fd, dirstream);
-
if (ret < 0) {
- if (e == ECANCELED) {
+ if (errno == ECANCELED) {
if (args.state & BTRFS_BALANCE_STATE_PAUSE_REQ)
fprintf(stderr, "balance paused by user\n");
if (args.state & BTRFS_BALANCE_STATE_CANCEL_REQ)
fprintf(stderr, "balance canceled by user\n");
- } else if (e == ENOTCONN || e == EINPROGRESS) {
+ } else if (errno == ENOTCONN || errno == EINPROGRESS) {
error("balance resume on '%s' failed: %s", path,
- (e == ENOTCONN) ? "Not in progress" :
+ (errno == ENOTCONN) ? "Not in progress" :
"Already running");
- if (e == ENOTCONN)
- return 2;
+ if (errno == ENOTCONN)
+ ret = 2;
else
- return 1;
+ ret = 1;
} else {
error("error during balancing '%s': %s\n"
"There may be more info in syslog - try dmesg | tail",
- path, strerror(e));
- return 1;
+ path, strerror(errno));
+ ret = 1;
}
} else {
printf("Done, had to relocate %llu out of %llu chunks\n",
@@ -771,7 +804,8 @@ static int cmd_balance_resume(int argc, char **argv)
(unsigned long long)args.stat.considered);
}
- return 0;
+ close_file_or_dir(fd, dirstream);
+ return ret;
}
static const char * const cmd_balance_status_usage[] = {
@@ -796,9 +830,7 @@ static int cmd_balance_status(int argc, char **argv)
int fd;
int verbose = 0;
int ret;
- int e;
- optind = 1;
while (1) {
int opt;
static const struct option longopts[] = {
@@ -829,16 +861,15 @@ static int cmd_balance_status(int argc, char **argv)
return 2;
ret = ioctl(fd, BTRFS_IOC_BALANCE_PROGRESS, &args);
- e = errno;
- close_file_or_dir(fd, dirstream);
-
if (ret < 0) {
- if (e == ENOTCONN) {
+ if (errno == ENOTCONN) {
printf("No balance found on '%s'\n", path);
- return 0;
+ ret = 0;
+ goto out;
}
- error("balance status on '%s' failed: %s", path, strerror(e));
- return 2;
+ error("balance status on '%s' failed: %s", path, strerror(errno));
+ ret = 2;
+ goto out;
}
if (args.state & BTRFS_BALANCE_STATE_RUNNING) {
@@ -862,7 +893,10 @@ static int cmd_balance_status(int argc, char **argv)
if (verbose)
dump_ioctl_balance_args(&args);
- return 1;
+ ret = 1;
+out:
+ close_file_or_dir(fd, dirstream);
+ return ret;
}
static int cmd_balance_full(int argc, char **argv)
diff --git a/cmds-check.c b/cmds-check.c
index 9927fce6..fbeb3a4a 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -67,7 +67,6 @@ static u64 data_bytes_referenced = 0;
static int found_old_backref = 0;
static LIST_HEAD(duplicate_extents);
static LIST_HEAD(delete_items);
-static int repair = 0;
static int no_holes = 0;
static int init_extent_tree = 0;
static int check_data_csum = 0;
@@ -76,7 +75,7 @@ static struct task_ctx ctx = { 0 };
static struct cache_tree *roots_info_cache = NULL;
struct extent_backref {
- struct list_head list;
+ struct rb_node node;
unsigned int is_data:1;
unsigned int found_extent_tree:1;
unsigned int full_backref:1;
@@ -84,6 +83,11 @@ struct extent_backref {
unsigned int broken:1;
};
+static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
+{
+ return rb_entry(node, struct extent_backref, node);
+}
+
struct data_backref {
struct extent_backref node;
union {
@@ -99,6 +103,61 @@ struct data_backref {
u32 found_ref;
};
+static inline struct data_backref* to_data_backref(struct extent_backref *back)
+{
+ return container_of(back, struct data_backref, node);
+}
+
+static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
+{
+ struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
+ struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
+ struct data_backref *back1 = to_data_backref(ext1);
+ struct data_backref *back2 = to_data_backref(ext2);
+
+ WARN_ON(!ext1->is_data);
+ WARN_ON(!ext2->is_data);
+
+ /* parent and root are a union, so this covers both */
+ if (back1->parent > back2->parent)
+ return 1;
+ if (back1->parent < back2->parent)
+ return -1;
+
+ /* This is a full backref and the parents match. */
+ if (back1->node.full_backref)
+ return 0;
+
+ if (back1->owner > back2->owner)
+ return 1;
+ if (back1->owner < back2->owner)
+ return -1;
+
+ if (back1->offset > back2->offset)
+ return 1;
+ if (back1->offset < back2->offset)
+ return -1;
+
+ if (back1->bytes > back2->bytes)
+ return 1;
+ if (back1->bytes < back2->bytes)
+ return -1;
+
+ if (back1->found_ref && back2->found_ref) {
+ if (back1->disk_bytenr > back2->disk_bytenr)
+ return 1;
+ if (back1->disk_bytenr < back2->disk_bytenr)
+ return -1;
+
+ if (back1->found_ref > back2->found_ref)
+ return 1;
+ if (back1->found_ref < back2->found_ref)
+ return -1;
+ }
+
+ return 0;
+}
+
/*
* Much like data_backref, just removed the undetermined members
* and change it to use list_head.
@@ -122,12 +181,59 @@ struct tree_backref {
};
};
+static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
+{
+ return container_of(back, struct tree_backref, node);
+}
+
+static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
+{
+ struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
+ struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
+ struct tree_backref *back1 = to_tree_backref(ext1);
+ struct tree_backref *back2 = to_tree_backref(ext2);
+
+ WARN_ON(ext1->is_data);
+ WARN_ON(ext2->is_data);
+
+ /* parent and root are a union, so this covers both */
+ if (back1->parent > back2->parent)
+ return 1;
+ if (back1->parent < back2->parent)
+ return -1;
+
+ return 0;
+}
+
+static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
+{
+ struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
+ struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
+
+ if (ext1->is_data > ext2->is_data)
+ return 1;
+
+ if (ext1->is_data < ext2->is_data)
+ return -1;
+
+ if (ext1->full_backref > ext2->full_backref)
+ return 1;
+ if (ext1->full_backref < ext2->full_backref)
+ return -1;
+
+ if (ext1->is_data)
+ return compare_data_backref(node1, node2);
+ else
+ return compare_tree_backref(node1, node2);
+}
+
/* Explicit initialization for extent_record::flag_block_full_backref */
enum { FLAG_UNSET = 2 };
struct extent_record {
struct list_head backrefs;
struct list_head dups;
+ struct rb_root backref_tree;
struct list_head list;
struct cache_extent cache;
struct btrfs_disk_key parent_key;
@@ -152,6 +258,11 @@ struct extent_record {
unsigned int wrong_chunk_type:1;
};
+static inline struct extent_record* to_extent_record(struct list_head *entry)
+{
+ return container_of(entry, struct extent_record, list);
+}
+
struct inode_backref {
struct list_head list;
unsigned int found_dir_item:1;
@@ -166,6 +277,11 @@ struct inode_backref {
char name[0];
};
+static inline struct inode_backref* to_inode_backref(struct list_head *entry)
+{
+ return list_entry(entry, struct inode_backref, list);
+}
+
struct root_item_record {
struct list_head list;
u64 objectid;
@@ -256,6 +372,11 @@ struct root_backref {
char name[0];
};
+static inline struct root_backref* to_root_backref(struct list_head *entry)
+{
+ return list_entry(entry, struct root_backref, list);
+}
+
struct root_record {
struct list_head backrefs;
struct cache_extent cache;
@@ -834,8 +955,7 @@ static void free_inode_rec(struct inode_record *rec)
return;
while (!list_empty(&rec->backrefs)) {
- backref = list_entry(rec->backrefs.next,
- struct inode_backref, list);
+ backref = to_inode_backref(rec->backrefs.next);
list_del(&backref->list);
free(backref);
}
@@ -1979,7 +2099,7 @@ static int check_root_dir(struct inode_record *rec)
goto out;
if (list_empty(&rec->backrefs))
goto out;
- backref = list_entry(rec->backrefs.next, struct inode_backref, list);
+ backref = to_inode_backref(rec->backrefs.next);
if (!backref->found_inode_ref)
goto out;
if (backref->index != 0 || backref->namelen != 2 ||
@@ -3116,8 +3236,7 @@ static void free_root_record(struct cache_extent *cache)
rec = container_of(cache, struct root_record, cache);
while (!list_empty(&rec->backrefs)) {
- backref = list_entry(rec->backrefs.next,
- struct root_backref, list);
+ backref = to_root_backref(rec->backrefs.next);
list_del(&backref->list);
free(backref);
}
@@ -3743,22 +3862,21 @@ out:
static int all_backpointers_checked(struct extent_record *rec, int print_errs)
{
- struct list_head *cur = rec->backrefs.next;
+ struct rb_node *n;
struct extent_backref *back;
struct tree_backref *tback;
struct data_backref *dback;
u64 found = 0;
int err = 0;
- while(cur != &rec->backrefs) {
- back = list_entry(cur, struct extent_backref, list);
- cur = cur->next;
+ for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
+ back = rb_node_to_extent_backref(n);
if (!back->found_extent_tree) {
err = 1;
if (!print_errs)
goto out;
if (back->is_data) {
- dback = (struct data_backref *)back;
+ dback = to_data_backref(back);
fprintf(stderr, "Backref %llu %s %llu"
" owner %llu offset %llu num_refs %lu"
" not found in extent tree\n",
@@ -3772,7 +3890,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
(unsigned long long)dback->offset,
(unsigned long)dback->num_refs);
} else {
- tback = (struct tree_backref *)back;
+ tback = to_tree_backref(back);
fprintf(stderr, "Backref %llu parent %llu"
" root %llu not found in extent tree\n",
(unsigned long long)rec->start,
@@ -3784,7 +3902,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
err = 1;
if (!print_errs)
goto out;
- tback = (struct tree_backref *)back;
+ tback = to_tree_backref(back);
fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
(unsigned long long)rec->start,
back->full_backref ? "parent" : "root",
@@ -3793,7 +3911,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
(unsigned long long)tback->root, back);
}
if (back->is_data) {
- dback = (struct data_backref *)back;
+ dback = to_data_backref(back);
if (dback->found_ref != dback->num_refs) {
err = 1;
if (!print_errs)
@@ -3837,7 +3955,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
if (!back->is_data) {
found += 1;
} else {
- dback = (struct data_backref *)back;
+ dback = to_data_backref(back);
found += dback->found_ref;
}
}
@@ -3855,17 +3973,16 @@ out:
return err;
}
-static int free_all_extent_backrefs(struct extent_record *rec)
+static void __free_one_backref(struct rb_node *node)
{
- struct extent_backref *back;
- struct list_head *cur;
- while (!list_empty(&rec->backrefs)) {
- cur = rec->backrefs.next;
- back = list_entry(cur, struct extent_backref, list);
- list_del(cur);
- free(back);
- }
- return 0;
+ struct extent_backref *back = rb_node_to_extent_backref(node);
+
+ free(back);
+}
+
+static void free_all_extent_backrefs(struct extent_record *rec)
+{
+ rb_free_nodes(&rec->backref_tree, __free_one_backref);
}
static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
@@ -3905,7 +4022,7 @@ static int check_owner_ref(struct btrfs_root *root,
struct extent_record *rec,
struct extent_buffer *buf)
{
- struct extent_backref *node;
+ struct extent_backref *node, *tmp;
struct tree_backref *back;
struct btrfs_root *ref_root;
struct btrfs_key key;
@@ -3915,14 +4032,15 @@ static int check_owner_ref(struct btrfs_root *root,
int found = 0;
int ret;
- list_for_each_entry(node, &rec->backrefs, list) {
+ rbtree_postorder_for_each_entry_safe(node, tmp,
+ &rec->backref_tree, node) {
if (node->is_data)
continue;
if (!node->found_ref)
continue;
if (node->full_backref)
continue;
- back = (struct tree_backref *)node;
+ back = to_tree_backref(node);
if (btrfs_header_owner(buf) == back->root)
return 0;
}
@@ -3960,18 +4078,16 @@ static int check_owner_ref(struct btrfs_root *root,
static int is_extent_tree_record(struct extent_record *rec)
{
- struct list_head *cur = rec->backrefs.next;
- struct extent_backref *node;
+ struct extent_backref *ref, *tmp;
struct tree_backref *back;
int is_extent = 0;
- while(cur != &rec->backrefs) {
- node = list_entry(cur, struct extent_backref, list);
- cur = cur->next;
- if (node->is_data)
+ rbtree_postorder_for_each_entry_safe(ref, tmp,
+ &rec->backref_tree, node) {
+ if (ref->is_data)
return 0;
- back = (struct tree_backref *)node;
- if (node->full_backref)
+ back = to_tree_backref(ref);
+ if (ref->full_backref)
return 0;
if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
is_extent = 1;
@@ -4345,32 +4461,31 @@ static int check_block(struct btrfs_root *root,
return ret;
}
+
static struct tree_backref *find_tree_backref(struct extent_record *rec,
u64 parent, u64 root)
{
- struct list_head *cur = rec->backrefs.next;
- struct extent_backref *node;
- struct tree_backref *back;
+ struct rb_node *node;
+ struct tree_backref *back = NULL;
+ struct tree_backref match = {
+ .node = {
+ .is_data = 0,
+ },
+ };
- while(cur != &rec->backrefs) {
- node = list_entry(cur, struct extent_backref, list);
- cur = cur->next;
- if (node->is_data)
- continue;
- back = (struct tree_backref *)node;
- if (parent > 0) {
- if (!node->full_backref)
- continue;
- if (parent == back->parent)
- return back;
- } else {
- if (node->full_backref)
- continue;
- if (back->root == root)
- return back;
- }
+ if (parent) {
+ match.parent = parent;
+ match.node.full_backref = 1;
+ } else {
+ match.root = root;
}
- return NULL;
+
+ node = rb_search(&rec->backref_tree, &match.node.node,
+ (rb_compare_keys)compare_extent_backref, NULL);
+ if (node)
+ back = to_tree_backref(rb_node_to_extent_backref(node));
+
+ return back;
}
static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
@@ -4388,7 +4503,7 @@ static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
ref->root = root;
ref->node.full_backref = 0;
}
- list_add_tail(&ref->node.list, &rec->backrefs);
+ rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
return ref;
}
@@ -4399,35 +4514,32 @@ static struct data_backref *find_data_backref(struct extent_record *rec,
int found_ref,
u64 disk_bytenr, u64 bytes)
{
- struct list_head *cur = rec->backrefs.next;
- struct extent_backref *node;
- struct data_backref *back;
+ struct rb_node *node;
+ struct data_backref *back = NULL;
+ struct data_backref match = {
+ .node = {
+ .is_data = 1,
+ },
+ .owner = owner,
+ .offset = offset,
+ .bytes = bytes,
+ .found_ref = found_ref,
+ .disk_bytenr = disk_bytenr,
+ };
- while(cur != &rec->backrefs) {
- node = list_entry(cur, struct extent_backref, list);
- cur = cur->next;
- if (!node->is_data)
- continue;
- back = (struct data_backref *)node;
- if (parent > 0) {
- if (!node->full_backref)
- continue;
- if (parent == back->parent)
- return back;
- } else {
- if (node->full_backref)
- continue;
- if (back->root == root && back->owner == owner &&
- back->offset == offset) {
- if (found_ref && node->found_ref &&
- (back->bytes != bytes ||
- back->disk_bytenr != disk_bytenr))
- continue;
- return back;
- }
- }
+ if (parent) {
+ match.parent = parent;
+ match.node.full_backref = 1;
+ } else {
+ match.root = root;
}
- return NULL;
+
+ node = rb_search(&rec->backref_tree, &match.node.node,
+ (rb_compare_keys)compare_extent_backref, NULL);
+ if (node)
+ back = to_data_backref(rb_node_to_extent_backref(node));
+
+ return back;
}
static struct data_backref *alloc_data_backref(struct extent_record *rec,
@@ -4456,7 +4568,7 @@ static struct data_backref *alloc_data_backref(struct extent_record *rec,
ref->bytes = max_size;
ref->found_ref = 0;
ref->num_refs = 0;
- list_add_tail(&ref->node.list, &rec->backrefs);
+ rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
if (max_size > rec->max_size)
rec->max_size = max_size;
return ref;
@@ -4489,13 +4601,12 @@ static void check_extent_type(struct extent_record *rec)
* Check SYSTEM extent, as it's also marked as metadata, we can only
* make sure it's a SYSTEM extent by its backref
*/
- if (!list_empty(&rec->backrefs)) {
+ if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
struct extent_backref *node;
struct tree_backref *tback;
u64 bg_type;
- node = list_entry(rec->backrefs.next, struct extent_backref,
- list);
+ node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
if (node->is_data) {
/* tree block shouldn't have data backref */
rec->wrong_chunk_type = 1;
@@ -4545,6 +4656,7 @@ static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
INIT_LIST_HEAD(&rec->backrefs);
INIT_LIST_HEAD(&rec->dups);
INIT_LIST_HEAD(&rec->list);
+ rec->backref_tree = RB_ROOT;
memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
rec->cache.start = tmpl->start;
rec->cache.size = tmpl->nr;
@@ -6347,7 +6459,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans,
back->node.found_extent_tree = 0;
if (!back->node.found_extent_tree && back->node.found_ref) {
- list_del(&back->node.list);
+ rb_erase(&back->node.node, &rec->backref_tree);
free(back);
}
} else {
@@ -6366,7 +6478,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans,
back->node.found_extent_tree = 0;
}
if (!back->node.found_extent_tree && back->node.found_ref) {
- list_del(&back->node.list);
+ rb_erase(&back->node.node, &rec->backref_tree);
free(back);
}
}
@@ -6507,7 +6619,7 @@ static int record_extent(struct btrfs_trans_handle *trans,
} else {
struct btrfs_disk_key copy_key;;
- tback = (struct tree_backref *)back;
+ tback = to_tree_backref(back);
bi = (struct btrfs_tree_block_info *)(ei + 1);
memset_extent_buffer(leaf, 0, (unsigned long)bi,
sizeof(*bi));
@@ -6536,7 +6648,7 @@ static int record_extent(struct btrfs_trans_handle *trans,
u64 parent;
int i;
- dback = (struct data_backref *)back;
+ dback = to_data_backref(back);
if (back->full_backref)
parent = dback->parent;
else
@@ -6574,7 +6686,7 @@ static int record_extent(struct btrfs_trans_handle *trans,
} else {
u64 parent;
- tback = (struct tree_backref *)back;
+ tback = to_tree_backref(back);
if (back->full_backref)
parent = tback->parent;
else
@@ -6803,7 +6915,7 @@ out:
static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
struct extent_record *rec)
{
- struct extent_backref *back;
+ struct extent_backref *back, *tmp;
struct data_backref *dback;
struct extent_entry *entry, *best = NULL;
LIST_HEAD(entries);
@@ -6819,11 +6931,12 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
if (rec->metadata)
return 0;
- list_for_each_entry(back, &rec->backrefs, list) {
+ rbtree_postorder_for_each_entry_safe(back, tmp,
+ &rec->backref_tree, node) {
if (back->full_backref || !back->is_data)
continue;
- dback = (struct data_backref *)back;
+ dback = to_data_backref(back);
/*
* We only pay attention to backrefs that we found a real
@@ -6945,11 +7058,12 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
* Ok great we all agreed on an extent record, let's go find the real
* references and fix up the ones that don't match.
*/
- list_for_each_entry(back, &rec->backrefs, list) {
+ rbtree_postorder_for_each_entry_safe(back, tmp,
+ &rec->backref_tree, node) {
if (back->full_backref || !back->is_data)
continue;
- dback = (struct data_backref *)back;
+ dback = to_data_backref(back);
/*
* Still ignoring backrefs that don't have a real ref attached
@@ -7013,7 +7127,7 @@ static int process_duplicates(struct btrfs_root *root,
*/
remove_cache_extent(extent_cache, &rec->cache);
- good = list_entry(rec->dups.next, struct extent_record, list);
+ good = to_extent_record(rec->dups.next);
list_del_init(&good->list);
INIT_LIST_HEAD(&good->backrefs);
INIT_LIST_HEAD(&good->dups);
@@ -7147,7 +7261,7 @@ static int delete_duplicate_records(struct btrfs_root *root,
ret = err;
out:
while (!list_empty(&delete_list)) {
- tmp = list_entry(delete_list.next, struct extent_record, list);
+ tmp = to_extent_record(delete_list.next);
list_del_init(&tmp->list);
if (tmp == rec)
continue;
@@ -7155,7 +7269,7 @@ out:
}
while (!list_empty(&rec->dups)) {
- tmp = list_entry(rec->dups.next, struct extent_record, list);
+ tmp = to_extent_record(rec->dups.next);
list_del_init(&tmp->list);
free(tmp);
}
@@ -7174,7 +7288,7 @@ static int find_possible_backrefs(struct btrfs_fs_info *info,
struct extent_record *rec)
{
struct btrfs_root *root;
- struct extent_backref *back;
+ struct extent_backref *back, *tmp;
struct data_backref *dback;
struct cache_extent *cache;
struct btrfs_file_extent_item *fi;
@@ -7182,12 +7296,13 @@ static int find_possible_backrefs(struct btrfs_fs_info *info,
u64 bytenr, bytes;
int ret;
- list_for_each_entry(back, &rec->backrefs, list) {
+ rbtree_postorder_for_each_entry_safe(back, tmp,
+ &rec->backref_tree, node) {
/* Don't care about full backrefs (poor unloved backrefs) */
if (back->full_backref || !back->is_data)
continue;
- dback = (struct data_backref *)back;
+ dback = to_data_backref(back);
/* We found this one, we don't need to do a lookup */
if (dback->found_ref)
@@ -7270,7 +7385,7 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
{
struct btrfs_key key;
struct btrfs_root *dest_root;
- struct extent_backref *back;
+ struct extent_backref *back, *tmp;
struct data_backref *dback;
struct orphan_data_extent *orphan;
struct btrfs_path *path;
@@ -7282,11 +7397,12 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- list_for_each_entry(back, &rec->backrefs, list) {
+ rbtree_postorder_for_each_entry_safe(back, tmp,
+ &rec->backref_tree, node) {
if (back->full_backref || !back->is_data ||
!back->found_extent_tree)
continue;
- dback = (struct data_backref *)back;
+ dback = to_data_backref(back);
if (dback->found_ref)
continue;
key.objectid = dback->root;
@@ -7349,9 +7465,8 @@ static int fixup_extent_refs(struct btrfs_fs_info *info,
struct btrfs_trans_handle *trans = NULL;
int ret;
struct btrfs_path *path;
- struct list_head *cur = rec->backrefs.next;
struct cache_extent *cache;
- struct extent_backref *back;
+ struct extent_backref *back, *tmp;
int allocated = 0;
u64 flags = 0;
@@ -7402,10 +7517,8 @@ static int fixup_extent_refs(struct btrfs_fs_info *info,
}
/* step three, recreate all the refs we did find */
- while(cur != &rec->backrefs) {
- back = list_entry(cur, struct extent_backref, list);
- cur = cur->next;
-
+ rbtree_postorder_for_each_entry_safe(back, tmp,
+ &rec->backref_tree, node) {
/*
* if we didn't find any references, don't create a
* new extent record
@@ -7660,8 +7773,7 @@ static int check_extent_refs(struct btrfs_root *root,
* belong to a different extent item and not the weird duplicate one.
*/
while (repair && !list_empty(&duplicate_extents)) {
- rec = list_entry(duplicate_extents.next, struct extent_record,
- list);
+ rec = to_extent_record(duplicate_extents.next);
list_del_init(&rec->list);
/* Sometimes we can find a backref before we find an actual
@@ -9543,6 +9655,7 @@ int cmd_check(int argc, char **argv)
int init_csum_tree = 0;
int readonly = 0;
int qgroup_report = 0;
+ int qgroups_repaired = 0;
enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
while(1) {
@@ -9698,7 +9811,7 @@ int cmd_check(int argc, char **argv)
uuidbuf);
ret = qgroup_verify_all(info);
if (ret == 0)
- ret = report_qgroups(1);
+ report_qgroups(1);
goto close_out;
}
if (subvolid) {
@@ -9852,6 +9965,10 @@ int cmd_check(int argc, char **argv)
err = qgroup_verify_all(info);
if (err)
goto out;
+ report_qgroups(0);
+ err = repair_qgroups(info, &qgroups_repaired);
+ if (err)
+ goto out;
}
if (!list_empty(&root->fs_info->recow_ebs)) {
@@ -9860,10 +9977,9 @@ int cmd_check(int argc, char **argv)
}
out:
/* Don't override original ret */
- if (ret)
- report_qgroups(0);
- else
- ret = report_qgroups(0);
+ if (!ret && qgroups_repaired)
+ ret = qgroups_repaired;
+
if (found_old_backref) { /*
* there was a disk format change when mixed
* backref was in testing tree. The old format
diff --git a/cmds-device.c b/cmds-device.c
index 1c886ad5..a939c56f 100644
--- a/cmds-device.c
+++ b/cmds-device.c
@@ -107,8 +107,9 @@ static int cmd_device_add(int argc, char **argv)
continue;
}
- res = btrfs_prepare_device(devfd, argv[i], 1, &dev_block_count,
- 0, discard);
+ res = btrfs_prepare_device(devfd, argv[i], &dev_block_count, 0,
+ PREP_DEVICE_ZERO_END | PREP_DEVICE_VERBOSE |
+ (discard ? PREP_DEVICE_DISCARD : 0));
close(devfd);
if (res) {
ret++;
@@ -257,7 +258,6 @@ static int cmd_device_scan(int argc, char **argv)
int all = 0;
int ret = 0;
- optind = 1;
while (1) {
int c;
static const struct option long_options[] = {
@@ -392,7 +392,6 @@ static int cmd_device_stats(int argc, char **argv)
__u64 flags = 0;
DIR *dirstream = NULL;
- optind = 1;
while ((c = getopt(argc, argv, "z")) != -1) {
switch (c) {
case 'z':
diff --git a/cmds-fi-du.c b/cmds-fi-du.c
index 12855a51..ec8e550f 100644
--- a/cmds-fi-du.c
+++ b/cmds-fi-du.c
@@ -389,8 +389,14 @@ static int du_walk_dir(struct du_dir_ctxt *ctxt, struct rb_root *shared_extents)
dirfd(dirstream),
shared_extents, &tot, &shr,
0);
- if (ret)
+ if (ret == -ENOTTY) {
+ continue;
+ } else if (ret) {
+ fprintf(stderr,
+ "failed to walk dir/file: %s :%s\n",
+ entry->d_name, strerror(-ret));
break;
+ }
ctxt->bytes_total += tot;
ctxt->bytes_shared += shr;
@@ -537,7 +543,6 @@ int cmd_filesystem_du(int argc, char **argv)
unit_mode = get_unit_mode_from_arg(&argc, argv, 1);
- optind = 1;
while (1) {
static const struct option long_options[] = {
{ "summarize", no_argument, NULL, 's'},
diff --git a/cmds-fi-usage.c b/cmds-fi-usage.c
index b26ece1d..04d68b18 100644
--- a/cmds-fi-usage.c
+++ b/cmds-fi-usage.c
@@ -922,7 +922,6 @@ int cmd_filesystem_usage(int argc, char **argv)
unit_mode = get_unit_mode_from_arg(&argc, argv, 1);
- optind = 1;
while (1) {
int c;
diff --git a/cmds-filesystem.c b/cmds-filesystem.c
index 9392a30b..00e4bfe4 100644
--- a/cmds-filesystem.c
+++ b/cmds-filesystem.c
@@ -968,7 +968,7 @@ static const char * const cmd_filesystem_defrag_usage[] = {
"-f flush data to disk immediately after defragmenting",
"-s start defragment only from byte onward",
"-l len defragment only up to len bytes",
- "-t size target extent size hint",
+ "-t size target extent size hint (default: 32M)",
NULL
};
@@ -1029,7 +1029,7 @@ static int cmd_filesystem_defrag(int argc, char **argv)
int flush = 0;
u64 start = 0;
u64 len = (u64)-1;
- u64 thresh = 0;
+ u64 thresh;
int i;
int recursive = 0;
int ret = 0;
@@ -1037,11 +1037,17 @@ static int cmd_filesystem_defrag(int argc, char **argv)
int compress_type = BTRFS_COMPRESS_NONE;
DIR *dirstream;
+ /*
+ * Kernel has a different default (256K) that is supposed to be safe,
+ * but it does not defragment very well. The 32M will likely lead to
+ * better results and is independent of the kernel default.
+ */
+ thresh = 32 * 1024 * 1024;
+
defrag_global_errors = 0;
defrag_global_verbose = 0;
defrag_global_errors = 0;
defrag_global_fancy_ioctl = 0;
- optind = 1;
while(1) {
int c = getopt(argc, argv, "vrc::fs:l:t:");
if (c < 0)
diff --git a/cmds-inspect.c b/cmds-inspect.c
index dd7b9dd2..4b7cea07 100644
--- a/cmds-inspect.c
+++ b/cmds-inspect.c
@@ -96,7 +96,6 @@ static int cmd_inspect_inode_resolve(int argc, char **argv)
int ret;
DIR *dirstream = NULL;
- optind = 1;
while (1) {
int c = getopt(argc, argv, "v");
if (c < 0)
@@ -151,7 +150,6 @@ static int cmd_inspect_logical_resolve(int argc, char **argv)
char *path_ptr;
DIR *dirstream = NULL;
- optind = 1;
while (1) {
int c = getopt(argc, argv, "Pvs:");
if (c < 0)
@@ -323,7 +321,7 @@ static int cmd_inspect_rootid(int argc, char **argv)
ret = lookup_ino_rootid(fd, &rootid);
if (ret) {
- error("rootid failed with ret=%d", ret);
+ error("failed to lookup root id: %s", strerror(-ret));
goto out;
}
diff --git a/cmds-qgroup.c b/cmds-qgroup.c
index 14418d45..a3bc939a 100644
--- a/cmds-qgroup.c
+++ b/cmds-qgroup.c
@@ -309,7 +309,6 @@ static int cmd_qgroup_show(int argc, char **argv)
unit_mode = get_unit_mode_from_arg(&argc, argv, 0);
- optind = 1;
while (1) {
int c;
static const struct option long_options[] = {
@@ -408,7 +407,6 @@ static int cmd_qgroup_limit(int argc, char **argv)
int exclusive = 0;
DIR *dirstream = NULL;
- optind = 1;
while (1) {
int c = getopt(argc, argv, "ce");
if (c < 0)
diff --git a/cmds-quota.c b/cmds-quota.c
index 568482fc..75c032b1 100644
--- a/cmds-quota.c
+++ b/cmds-quota.c
@@ -120,7 +120,6 @@ static int cmd_quota_rescan(int argc, char **argv)
DIR *dirstream = NULL;
int wait_for_completion = 0;
- optind = 1;
while (1) {
int c = getopt(argc, argv, "sw");
if (c < 0)
diff --git a/cmds-replace.c b/cmds-replace.c
index 6036e2f9..d1bf057e 100644
--- a/cmds-replace.c
+++ b/cmds-replace.c
@@ -254,8 +254,8 @@ static int cmd_replace_start(int argc, char **argv)
}
strncpy((char *)start_args.start.tgtdev_name, dstdev,
BTRFS_DEVICE_PATH_NAME_MAX);
- ret = btrfs_prepare_device(fddstdev, dstdev, 1, &dstdev_block_count, 0,
- 0);
+ ret = btrfs_prepare_device(fddstdev, dstdev, &dstdev_block_count, 0,
+ PREP_DEVICE_ZERO_END | PREP_DEVICE_VERBOSE);
if (ret)
goto leave_with_error;
diff --git a/cmds-scrub.c b/cmds-scrub.c
index 4a1d4751..c03bc5fb 100644
--- a/cmds-scrub.c
+++ b/cmds-scrub.c
@@ -1141,7 +1141,6 @@ static int scrub_start(int argc, char **argv, int resume)
int force = 0;
int nothing_to_resume = 0;
- optind = 1;
while ((c = getopt(argc, argv, "BdqrRc:n:f")) != -1) {
switch (c) {
case 'B':
@@ -1675,7 +1674,6 @@ static int cmd_scrub_status(int argc, char **argv)
int err = 0;
DIR *dirstream = NULL;
- optind = 1;
while ((c = getopt(argc, argv, "dR")) != -1) {
switch (c) {
case 'd':
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index e3d2cbce..be2d41ed 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -127,7 +127,6 @@ static int cmd_subvol_create(int argc, char **argv)
struct btrfs_qgroup_inherit *inherit = NULL;
DIR *dirstream = NULL;
- optind = 1;
while (1) {
int c = getopt(argc, argv, "c:i:v");
if (c < 0)
@@ -263,7 +262,6 @@ static int cmd_subvol_delete(int argc, char **argv)
int verbose = 0;
int commit_mode = 0;
- optind = 1;
while (1) {
int c;
static const struct option long_options[] = {
@@ -439,7 +437,6 @@ static int cmd_subvol_list(int argc, char **argv)
filter_set = btrfs_list_alloc_filter_set();
comparer_set = btrfs_list_alloc_comparer_set();
- optind = 1;
while(1) {
int c;
static const struct option long_options[] = {
@@ -616,7 +613,6 @@ static int cmd_subvol_snapshot(int argc, char **argv)
struct btrfs_qgroup_inherit *inherit = NULL;
DIR *dirstream1 = NULL, *dirstream2 = NULL;
- optind = 1;
memset(&args, 0, sizeof(args));
while (1) {
int c = getopt(argc, argv, "c:i:r");
@@ -1186,7 +1182,6 @@ static int cmd_subvol_sync(int argc, char **argv)
int id_count;
int sleep_interval = 1;
- optind = 1;
while (1) {
int c = getopt(argc, argv, "s:");
diff --git a/configure b/configure
index ccf63922..b2dd1f9f 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for btrfs-progs v4.6.1.
+# Generated by GNU Autoconf 2.69 for btrfs-progs v4.7.
#
# Report bugs to <linux-btrfs@vger.kernel.org>.
#
@@ -580,8 +580,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='btrfs-progs'
PACKAGE_TARNAME='btrfs-progs'
-PACKAGE_VERSION='v4.6.1'
-PACKAGE_STRING='btrfs-progs v4.6.1'
+PACKAGE_VERSION='v4.7'
+PACKAGE_STRING='btrfs-progs v4.7'
PACKAGE_BUGREPORT='linux-btrfs@vger.kernel.org'
PACKAGE_URL='http://btrfs.wiki.kernel.org'
@@ -1288,7 +1288,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures btrfs-progs v4.6.1 to adapt to many kinds of systems.
+\`configure' configures btrfs-progs v4.7 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1353,7 +1353,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of btrfs-progs v4.6.1:";;
+ short | recursive ) echo "Configuration of btrfs-progs v4.7:";;
esac
cat <<\_ACEOF
@@ -1462,7 +1462,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-btrfs-progs configure v4.6.1
+btrfs-progs configure v4.7
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1831,7 +1831,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by btrfs-progs $as_me v4.6.1, which was
+It was created by btrfs-progs $as_me v4.7, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -6634,7 +6634,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by btrfs-progs $as_me v4.6.1, which was
+This file was extended by btrfs-progs $as_me v4.7, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6697,7 +6697,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-btrfs-progs config.status v4.6.1
+btrfs-progs config.status v4.7
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
diff --git a/ctree.h b/ctree.h
index 9e3626f1..5cb5533a 100644
--- a/ctree.h
+++ b/ctree.h
@@ -898,7 +898,7 @@ struct btrfs_qgroup_status_item {
__le64 version;
__le64 generation;
__le64 flags;
- __le64 scan; /* progress during scanning */
+ __le64 rescan; /* progress during scanning */
} __attribute__ ((__packed__));
struct btrfs_block_group_item {
@@ -2125,8 +2125,8 @@ BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item,
generation, 64);
BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
flags, 64);
-BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item,
- scan, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item,
+ rescan, 64);
BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_version,
struct btrfs_qgroup_status_item, version, 64);
@@ -2134,8 +2134,8 @@ BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_generation,
struct btrfs_qgroup_status_item, generation, 64);
BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_flags,
struct btrfs_qgroup_status_item, flags, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_scan,
- struct btrfs_qgroup_status_item, scan, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_rescan,
+ struct btrfs_qgroup_status_item, rescan, 64);
/* btrfs_qgroup_info_item */
BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
diff --git a/disk-io.c b/disk-io.c
index fbce506a..3647ecca 100644
--- a/disk-io.c
+++ b/disk-io.c
@@ -125,7 +125,7 @@ u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
void btrfs_csum_final(u32 crc, char *result)
{
- *(__le32 *)result = ~cpu_to_le32(crc);
+ put_unaligned_le32(~crc, result);
}
static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
diff --git a/extent-tree.c b/extent-tree.c
index 5ca53fa9..a58da237 100644
--- a/extent-tree.c
+++ b/extent-tree.c
@@ -3985,10 +3985,11 @@ static int __btrfs_record_file_extent(struct btrfs_trans_handle *trans,
u64 extent_offset;
u64 num_bytes = *ret_num_bytes;
- num_bytes = min_t(u64, num_bytes, BTRFS_MAX_EXTENT_SIZE);
/*
* All supported file system should not use its 0 extent.
* As it's for hole
+ *
+ * And hole extent has no size limit, no need to loop.
*/
if (disk_bytenr == 0) {
ret = btrfs_insert_file_extent(trans, root, objectid,
@@ -3996,6 +3997,7 @@ static int __btrfs_record_file_extent(struct btrfs_trans_handle *trans,
num_bytes, num_bytes);
return ret;
}
+ num_bytes = min_t(u64, num_bytes, BTRFS_MAX_EXTENT_SIZE);
path = btrfs_alloc_path();
if (!path)
diff --git a/extent_io.c b/extent_io.c
index c99d3627..7b9eb8ef 100644
--- a/extent_io.c
+++ b/extent_io.c
@@ -540,10 +540,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
struct extent_buffer *eb;
eb = calloc(1, sizeof(struct extent_buffer) + blocksize);
- if (!eb) {
- BUG();
+ if (!eb)
return NULL;
- }
eb->start = bytenr;
eb->len = blocksize;
@@ -564,7 +562,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
struct extent_buffer *new;
new = __alloc_extent_buffer(NULL, src->start, src->len);
- if (new == NULL)
+ if (!new)
return NULL;
copy_extent_buffer(new, src, 0, 0, src->len);
@@ -774,7 +772,11 @@ int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
this_len = min(this_len, (u64)info->tree_root->nodesize);
eb = malloc(sizeof(struct extent_buffer) + this_len);
- BUG_ON(!eb);
+ if (!eb) {
+ fprintf(stderr, "cannot allocate memory for eb\n");
+ ret = -ENOMEM;
+ goto out;
+ }
memset(eb, 0, sizeof(struct extent_buffer) + this_len);
eb->start = offset;
@@ -825,6 +827,10 @@ int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
multi = NULL;
}
return 0;
+
+out:
+ kfree(raid_map);
+ return ret;
}
int set_extent_buffer_dirty(struct extent_buffer *eb)
@@ -889,5 +895,5 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
unsigned long nr)
{
- return test_bit(nr, (unsigned long *)(eb->data + start));
+ return le_test_bit(nr, (u8 *)eb->data + start);
}
diff --git a/extent_io.h b/extent_io.h
index a9a73535..d9594c32 100644
--- a/extent_io.h
+++ b/extent_io.h
@@ -29,25 +29,44 @@
#include <btrfs/list.h>
#endif /* BTRFS_FLAT_INCLUDES */
-#define EXTENT_DIRTY 1
-#define EXTENT_WRITEBACK (1 << 1)
-#define EXTENT_UPTODATE (1 << 2)
-#define EXTENT_LOCKED (1 << 3)
-#define EXTENT_NEW (1 << 4)
-#define EXTENT_DELALLOC (1 << 5)
-#define EXTENT_DEFRAG (1 << 6)
-#define EXTENT_DEFRAG_DONE (1 << 7)
-#define EXTENT_BUFFER_FILLED (1 << 8)
-#define EXTENT_CSUM (1 << 9)
-#define EXTENT_BAD_TRANSID (1 << 10)
-#define EXTENT_BUFFER_DUMMY (1 << 11)
+#define EXTENT_DIRTY (1U << 0)
+#define EXTENT_WRITEBACK (1U << 1)
+#define EXTENT_UPTODATE (1U << 2)
+#define EXTENT_LOCKED (1U << 3)
+#define EXTENT_NEW (1U << 4)
+#define EXTENT_DELALLOC (1U << 5)
+#define EXTENT_DEFRAG (1U << 6)
+#define EXTENT_DEFRAG_DONE (1U << 7)
+#define EXTENT_BUFFER_FILLED (1U << 8)
+#define EXTENT_CSUM (1U << 9)
+#define EXTENT_BAD_TRANSID (1U << 10)
+#define EXTENT_BUFFER_DUMMY (1U << 11)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
-#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
-#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
-#define BLOCK_GROUP_SYSTEM EXTENT_NEW
+#define BLOCK_GROUP_DATA (1U << 1)
+#define BLOCK_GROUP_METADATA (1U << 2)
+#define BLOCK_GROUP_SYSTEM (1U << 4)
-#define BLOCK_GROUP_DIRTY EXTENT_DIRTY
+#define BLOCK_GROUP_DIRTY (1U)
+
+/*
+ * The extent buffer bitmap operations are done with byte granularity instead of
+ * word granularity for two reasons:
+ * 1. The bitmaps must be little-endian on disk.
+ * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a
+ * single word in a bitmap may straddle two pages in the extent buffer.
+ */
+#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
+#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
+#define BITMAP_FIRST_BYTE_MASK(start) \
+ ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
+#define BITMAP_LAST_BYTE_MASK(nbits) \
+ (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
+
+static inline int le_test_bit(int nr, const u8 *addr)
+{
+ return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1)));
+}
struct btrfs_fs_info;
@@ -76,7 +95,7 @@ struct extent_buffer {
struct list_head lru;
struct list_head recow;
int refs;
- int flags;
+ u32 flags;
int fd;
char data[];
};
diff --git a/ioctl.h b/ioctl.h
index 5f18bcb3..620dd3d2 100644
--- a/ioctl.h
+++ b/ioctl.h
@@ -685,7 +685,7 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
- struct btrfs_ioctl_ino_path_args)
+ struct btrfs_ioctl_logical_ino_args)
#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \
struct btrfs_ioctl_received_subvol_args)
#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args)
diff --git a/kerncompat.h b/kerncompat.h
index 378f0552..8b9a84c5 100644
--- a/kerncompat.h
+++ b/kerncompat.h
@@ -55,7 +55,8 @@
#define gfp_t int
#define get_cpu_var(p) (p)
#define __get_cpu_var(p) (p)
-#define BITS_PER_LONG (__SIZEOF_LONG__ * 8)
+#define BITS_PER_BYTE 8
+#define BITS_PER_LONG (__SIZEOF_LONG__ * BITS_PER_BYTE)
#define __GFP_BITS_SHIFT 20
#define __GFP_BITS_MASK ((int)((1 << __GFP_BITS_SHIFT) - 1))
#define GFP_KERNEL 0
@@ -336,15 +337,19 @@ struct __una_u64 { __le64 x; } __attribute__((__packed__));
#define get_unaligned_le8(p) (*((u8 *)(p)))
#define get_unaligned_8(p) (*((u8 *)(p)))
#define put_unaligned_le8(val,p) ((*((u8 *)(p))) = (val))
+#define put_unaligned_8(val,p) ((*((u8 *)(p))) = (val))
#define get_unaligned_le16(p) le16_to_cpu(((const struct __una_u16 *)(p))->x)
#define get_unaligned_16(p) (((const struct __una_u16 *)(p))->x)
#define put_unaligned_le16(val,p) (((struct __una_u16 *)(p))->x = cpu_to_le16(val))
+#define put_unaligned_16(val,p) (((struct __una_u16 *)(p))->x = (val))
#define get_unaligned_le32(p) le32_to_cpu(((const struct __una_u32 *)(p))->x)
#define get_unaligned_32(p) (((const struct __una_u32 *)(p))->x)
#define put_unaligned_le32(val,p) (((struct __una_u32 *)(p))->x = cpu_to_le32(val))
+#define put_unaligned_32(val,p) (((struct __una_u32 *)(p))->x = (val))
#define get_unaligned_le64(p) le64_to_cpu(((const struct __una_u64 *)(p))->x)
#define get_unaligned_64(p) (((const struct __una_u64 *)(p))->x)
#define put_unaligned_le64(val,p) (((struct __una_u64 *)(p))->x = cpu_to_le64(val))
+#define put_unaligned_64(val,p) (((struct __una_u64 *)(p))->x = (val))
#ifndef true
#define true 1
diff --git a/mkfs.c b/mkfs.c
index 697bdc2e..f30f0574 100644
--- a/mkfs.c
+++ b/mkfs.c
@@ -1318,10 +1318,27 @@ static int cleanup_temp_chunks(struct btrfs_fs_info *fs_info,
if (is_temp_block_group(path->nodes[0], bgi,
data_profile, meta_profile,
sys_profile)) {
+ u64 flags = btrfs_disk_block_group_flags(path->nodes[0],
+ bgi);
+
ret = btrfs_free_block_group(trans, fs_info,
found_key.objectid, found_key.offset);
if (ret < 0)
goto out;
+
+ if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) ==
+ BTRFS_BLOCK_GROUP_DATA)
+ alloc->data -= found_key.offset;
+ else if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) ==
+ BTRFS_BLOCK_GROUP_METADATA)
+ alloc->metadata -= found_key.offset;
+ else if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) ==
+ BTRFS_BLOCK_GROUP_SYSTEM)
+ alloc->system -= found_key.offset;
+ else if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) ==
+ (BTRFS_BLOCK_GROUP_METADATA |
+ BTRFS_BLOCK_GROUP_DATA))
+ alloc->mixed -= found_key.offset;
}
btrfs_release_path(path);
key.objectid = found_key.objectid + found_key.offset;
@@ -1619,8 +1636,11 @@ int main(int argc, char **argv)
strerror(errno));
exit(1);
}
- ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count,
- block_count, discard);
+ ret = btrfs_prepare_device(fd, file, &dev_block_count,
+ block_count,
+ (zero_end ? PREP_DEVICE_ZERO_END : 0) |
+ (discard ? PREP_DEVICE_DISCARD : 0) |
+ (verbose ? PREP_DEVICE_VERBOSE : 0));
if (ret) {
close(fd);
exit(1);
@@ -1750,8 +1770,11 @@ int main(int argc, char **argv)
close(fd);
continue;
}
- ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count,
- block_count, discard);
+ ret = btrfs_prepare_device(fd, file, &dev_block_count,
+ block_count,
+ (verbose ? PREP_DEVICE_VERBOSE : 0) |
+ (zero_end ? PREP_DEVICE_ZERO_END : 0) |
+ (discard ? PREP_DEVICE_DISCARD : 0));
if (ret) {
close(fd);
exit(1);
diff --git a/print-tree.c b/print-tree.c
index 746f25be..9f9e11e2 100644
--- a/print-tree.c
+++ b/print-tree.c
@@ -1037,7 +1037,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
btrfs_qgroup_status_generation(l, qg_status),
flags_str,
(unsigned long long)
- btrfs_qgroup_status_scan(l, qg_status));
+ btrfs_qgroup_status_rescan(l, qg_status));
break;
case BTRFS_QGROUP_RELATION_KEY:
break;
diff --git a/qgroup-verify.c b/qgroup-verify.c
index 6ca95eb8..66eb870a 100644
--- a/qgroup-verify.c
+++ b/qgroup-verify.c
@@ -29,13 +29,16 @@
#include "utils.h"
#include "ulist.h"
#include "rbtree-utils.h"
+#include "transaction.h"
+#include "repair.h"
#include "qgroup-verify.h"
/*#define QGROUP_VERIFY_DEBUG*/
static unsigned long tot_extents_scanned = 0;
-static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive);
+struct qgroup_count;
+static struct qgroup_count *find_count(u64 qgroupid);
struct qgroup_info {
u64 referenced;
@@ -54,6 +57,19 @@ struct qgroup_count {
struct qgroup_info info;
struct rb_node rb_node;
+
+ /* Parents when we are a child group */
+ struct list_head groups;
+
+ /*
+ * Children when we are a parent group (not currently used but
+ * maintained to mirror kernel handling of qgroups)
+ */
+ struct list_head members;
+
+ u64 cur_refcnt;
+
+ struct list_head bad_list;
};
static struct counts_tree {
@@ -63,9 +79,44 @@ static struct counts_tree {
unsigned int qgroup_inconsist:1;
} counts = { .root = RB_ROOT };
+static LIST_HEAD(bad_qgroups);
+
static struct rb_root by_bytenr = RB_ROOT;
/*
+ * Glue structure to represent the relations between qgroups. Mirrored
+ * from kernel.
+ */
+struct btrfs_qgroup_list {
+ struct list_head next_group;
+ struct list_head next_member;
+ struct qgroup_count *group; /* Parent group */
+ struct qgroup_count *member;
+};
+
+/* Allow us to reset ref counts during accounting without zeroing each group. */
+static u64 qgroup_seq = 1ULL;
+
+static inline void update_cur_refcnt(struct qgroup_count *c)
+{
+ if (c->cur_refcnt < qgroup_seq)
+ c->cur_refcnt = qgroup_seq;
+ c->cur_refcnt++;
+}
+
+static inline u64 group_get_cur_refcnt(struct qgroup_count *c)
+{
+ if (c->cur_refcnt < qgroup_seq)
+ return 0;
+ return c->cur_refcnt - qgroup_seq;
+}
+
+static void inc_qgroup_seq(int root_count)
+{
+ qgroup_seq += root_count + 1;
+}
+
+/*
* List of interior tree blocks. We walk this list after loading the
* extent tree to resolve implied refs. For each interior node we'll
* place a shared ref in the ref tree against each child object. This
@@ -267,10 +318,11 @@ FREE_RB_BASED_TREE(ref, free_ref_node);
/*
* Resolves all the possible roots for the ref at parent.
*/
-static void find_parent_roots(struct ulist *roots, u64 parent)
+static int find_parent_roots(struct ulist *roots, u64 parent)
{
struct ref *ref;
struct rb_node *node;
+ int ret;
/*
* Search the rbtree for the first ref with bytenr == parent.
@@ -296,15 +348,134 @@ static void find_parent_roots(struct ulist *roots, u64 parent)
}
do {
- if (ref->root)
- ulist_add(roots, ref->root, 0, 0);
- else
- find_parent_roots(roots, ref->parent);
+ if (ref->root) {
+ if (is_fstree(ref->root)) {
+ ret = ulist_add(roots, ref->root, 0, 0);
+ if (ret < 0)
+ goto out;
+ }
+ } else {
+ ret = find_parent_roots(roots, ref->parent);
+ if (ret < 0)
+ goto out;
+ }
node = rb_next(node);
if (node)
ref = rb_entry(node, struct ref, bytenr_node);
} while (node && ref->bytenr == parent);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int account_one_extent(struct ulist *roots, u64 bytenr, u64 num_bytes)
+{
+ int ret;
+ u64 id, nr_roots, nr_refs;
+ struct qgroup_count *count;
+ struct ulist *counts = ulist_alloc(0);
+ struct ulist *tmp = ulist_alloc(0);
+ struct ulist_iterator uiter;
+ struct ulist_iterator tmp_uiter;
+ struct ulist_node *unode;
+ struct ulist_node *tmp_unode;
+ struct btrfs_qgroup_list *glist;
+
+ if (!counts || !tmp) {
+ ulist_free(counts);
+ ulist_free(tmp);
+ return ENOMEM;
+ }
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(roots, &uiter))) {
+ BUG_ON(unode->val == 0ULL);
+
+ /*
+ * For each root, find their corresponding tracking group and
+ * add it to our qgroups list.
+ */
+ count = find_count(unode->val);
+ if (!count)
+ continue;
+
+ BUG_ON(!is_fstree(unode->val));
+ ret = ulist_add(counts, count->qgroupid, ptr_to_u64(count), 0);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * Now we look for parents (and parents of those...). Use a tmp
+ * ulist here to avoid re-walking (and re-incrementing) our
+ * already added items on every loop iteration.
+ */
+ ulist_reinit(tmp);
+ ret = ulist_add(tmp, count->qgroupid, ptr_to_u64(count), 0);
+ if (ret < 0)
+ goto out;
+
+ ULIST_ITER_INIT(&tmp_uiter);
+ while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
+ /* Bump the refcount on a node every time we see it. */
+ count = u64_to_ptr(tmp_unode->aux);
+ update_cur_refcnt(count);
+
+ list_for_each_entry(glist, &count->groups, next_group) {
+ struct qgroup_count *parent;
+ parent = glist->group;
+ id = parent->qgroupid;
+
+ BUG_ON(!count);
+
+ ret = ulist_add(counts, id, ptr_to_u64(parent),
+ 0);
+ if (ret < 0)
+ goto out;
+ ret = ulist_add(tmp, id, ptr_to_u64(parent),
+ 0);
+ if (ret < 0)
+ goto out;
+ }
+ }
+ }
+
+ /*
+ * Now that we have gathered up and counted all the groups, we
+ * can add bytes for this ref.
+ */
+ nr_roots = roots->nnodes;
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(counts, &uiter))) {
+ count = u64_to_ptr(unode->aux);
+
+ nr_refs = group_get_cur_refcnt(count);
+ if (nr_refs) {
+ count->info.referenced += num_bytes;
+ count->info.referenced_compressed += num_bytes;
+
+ if (nr_refs == nr_roots) {
+ count->info.exclusive += num_bytes;
+ count->info.exclusive_compressed += num_bytes;
+ }
+ }
+#ifdef QGROUP_VERIFY_DEBUG
+ printf("account (%llu, %llu), qgroup %llu/%llu, rfer %llu,"
+ " excl %llu, refs %llu, roots %llu\n", bytenr, num_bytes,
+ btrfs_qgroup_level(count->qgroupid),
+ btrfs_qgroup_subvid(count->qgroupid),
+ count->info.referenced, count->info.exclusive, nr_refs,
+ nr_roots);
+#endif
+ }
+
+ inc_qgroup_seq(roots->nnodes);
+ ret = 0;
+out:
+ ulist_free(counts);
+ ulist_free(tmp);
+ return ret;
}
static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes,
@@ -318,18 +489,16 @@ static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes,
* - resolve all possible roots for shared refs, insert each
* of those into ref_roots ulist (this is a recursive process)
*
- * - Walk ref_roots ulist, adding extent bytes to each qgroup count that
- * cooresponds to a found root.
+ * - With all roots resolved we can account the ref - this is done in
+ * account_one_extent().
*/
-static void account_all_refs(int do_qgroups, u64 search_subvol)
+static int account_all_refs(int do_qgroups, u64 search_subvol)
{
- int exclusive;
struct ref *ref;
struct rb_node *node;
u64 bytenr, num_bytes;
struct ulist *roots = ulist_alloc(0);
- struct ulist_iterator uiter;
- struct ulist_node *unode;
+ int ret;
node = rb_first(&by_bytenr);
while (node) {
@@ -347,10 +516,16 @@ static void account_all_refs(int do_qgroups, u64 search_subvol)
do {
BUG_ON(ref->bytenr != bytenr);
BUG_ON(ref->num_bytes != num_bytes);
- if (ref->root)
- ulist_add(roots, ref->root, 0, 0);
- else
- find_parent_roots(roots, ref->parent);
+ if (ref->root) {
+ if (is_fstree(ref->root)) {
+ if (ulist_add(roots, ref->root, 0, 0) < 0)
+ goto enomem;
+ }
+ } else {
+ ret = find_parent_roots(roots, ref->parent);
+ if (ret < 0)
+ goto enomem;
+ }
/*
* When we leave this inner loop, node is set
@@ -362,29 +537,22 @@ static void account_all_refs(int do_qgroups, u64 search_subvol)
ref = rb_entry(node, struct ref, bytenr_node);
} while (node && ref->bytenr == bytenr);
- /*
- * Now that we have all roots, we can properly account
- * this extent against the corresponding qgroups.
- */
- if (roots->nnodes == 1)
- exclusive = 1;
- else
- exclusive = 0;
-
if (search_subvol)
print_subvol_info(search_subvol, bytenr, num_bytes,
roots);
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(roots, &uiter))) {
- BUG_ON(unode->val == 0ULL);
- /* We only want to account fs trees */
- if (is_fstree(unode->val) && do_qgroups)
- add_bytes(unode->val, num_bytes, exclusive);
- }
+ if (!do_qgroups)
+ continue;
+
+ if (account_one_extent(roots, bytenr, num_bytes))
+ goto enomem;
}
ulist_free(roots);
+ return 0;
+enomem:
+ error("Out of memory while accounting refs for qgroups");
+ return -ENOMEM;
}
static u64 resolve_one_root(u64 bytenr)
@@ -668,6 +836,9 @@ static struct qgroup_count *alloc_count(struct btrfs_disk_key *key,
item->exclusive = btrfs_qgroup_info_exclusive(leaf, disk);
item->exclusive_compressed =
btrfs_qgroup_info_exclusive_compressed(leaf, disk);
+ INIT_LIST_HEAD(&c->groups);
+ INIT_LIST_HEAD(&c->members);
+ INIT_LIST_HEAD(&c->bad_list);
if (insert_count(c)) {
free(c);
@@ -677,29 +848,30 @@ static struct qgroup_count *alloc_count(struct btrfs_disk_key *key,
return c;
}
-static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive)
+static int add_qgroup_relation(u64 memberid, u64 parentid)
{
- struct qgroup_count *count = find_count(root_objectid);
- struct qgroup_info *qg;
+ struct qgroup_count *member;
+ struct qgroup_count *parent;
+ struct btrfs_qgroup_list *list;
- BUG_ON(num_bytes < 4096); /* Random sanity check. */
+ if (memberid > parentid)
+ return 0;
- if (!count)
- return;
+ member = find_count(memberid);
+ parent = find_count(parentid);
+ if (!member || !parent)
+ return -ENOENT;
- qg = &count->info;
+ list = calloc(1, sizeof(*list));
+ if (!list)
+ return -ENOMEM;
- qg->referenced += num_bytes;
- /*
- * count of compressed bytes is unimplemented, so we do the
- * same as kernel.
- */
- qg->referenced_compressed += num_bytes;
+ list->group = parent;
+ list->member = member;
+ list_add_tail(&list->next_group, &member->groups);
+ list_add_tail(&list->next_member, &parent->members);
- if (exclusive) {
- qg->exclusive += num_bytes;
- qg->exclusive_compressed += num_bytes;
- }
+ return 0;
}
static void read_qgroup_status(struct btrfs_path *path,
@@ -733,11 +905,18 @@ static int load_quota_info(struct btrfs_fs_info *info)
struct btrfs_qgroup_info_item *item;
struct qgroup_count *count;
int i, nr;
+ int search_relations = 0;
+loop:
+ /*
+ * Do 2 passes, the first allocates group counts and reads status
+ * items. The 2nd pass picks up relation items and glues them to their
+ * respective count structures.
+ */
btrfs_init_path(&path);
key.offset = 0;
- key.objectid = 0;
+ key.objectid = search_relations ? 0 : BTRFS_QGROUP_RELATION_KEY;
key.type = 0;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
@@ -754,17 +933,26 @@ static int load_quota_info(struct btrfs_fs_info *info)
btrfs_item_key(leaf, &disk_key, i);
btrfs_disk_key_to_cpu(&key, &disk_key);
+ if (search_relations) {
+ if (key.type == BTRFS_QGROUP_RELATION_KEY) {
+ ret = add_qgroup_relation(key.objectid,
+ key.offset);
+ if (ret) {
+ error("out of memory");
+ goto out;
+ }
+ }
+ continue;
+ }
+
if (key.type == BTRFS_QGROUP_STATUS_KEY) {
read_qgroup_status(&path, &counts);
continue;
}
- if (key.type == BTRFS_QGROUP_RELATION_KEY)
- printf("Ignoring qgroup relation key %llu\n",
- key.objectid);
/*
- * Ignore: BTRFS_QGROUP_LIMIT_KEY,
- * BTRFS_QGROUP_RELATION_KEY
+ * At this point, we can ignore anything that
+ * isn't a qgroup info.
*/
if (key.type != BTRFS_QGROUP_INFO_KEY)
continue;
@@ -796,6 +984,12 @@ static int load_quota_info(struct btrfs_fs_info *info)
ret = 0;
btrfs_release_path(&path);
+
+ if (!search_relations) {
+ search_relations = 1;
+ goto loop;
+ }
+
out:
return ret;
}
@@ -1040,6 +1234,11 @@ static void print_fields_signed(long long bytes,
prefix, type, bytes, type, bytes_compressed);
}
+static inline int qgroup_printable(struct qgroup_count *c)
+{
+ return !!(c->subvol_exists || btrfs_qgroup_level(c->qgroupid));
+}
+
static int report_qgroup_difference(struct qgroup_count *count, int verbose)
{
int is_different;
@@ -1050,9 +1249,10 @@ static int report_qgroup_difference(struct qgroup_count *count, int verbose)
is_different = excl_diff || ref_diff;
- if (verbose || (is_different && count->subvol_exists)) {
- printf("Counts for qgroup id: %llu %s\n",
- (unsigned long long)count->qgroupid,
+ if (verbose || (is_different && qgroup_printable(count))) {
+ printf("Counts for qgroup id: %llu/%llu %s\n",
+ btrfs_qgroup_level(count->qgroupid),
+ btrfs_qgroup_subvid(count->qgroupid),
is_different ? "are different" : "");
print_fields(info->referenced, info->referenced_compressed,
@@ -1070,44 +1270,65 @@ static int report_qgroup_difference(struct qgroup_count *count, int verbose)
print_fields_signed(excl_diff, excl_diff,
"diff:", "exclusive");
}
- return (is_different && count->subvol_exists);
+
+ return is_different;
}
-int report_qgroups(int all)
+void report_qgroups(int all)
{
struct rb_node *node;
struct qgroup_count *c;
- int ret = 0;
- if (counts.rescan_running) {
+ if (!repair && counts.rescan_running) {
if (all) {
printf(
- "Qgroup rescan is running, qgroup counts difference is expected\n");
+ "Qgroup rescan is running, a difference in qgroup counts is expected\n");
} else {
printf(
- "Qgroup rescan is running, ignore qgroup check\n");
- return ret;
+ "Qgroup rescan is running, qgroups will not be printed.\n");
+ return;
}
}
if (counts.qgroup_inconsist && !counts.rescan_running)
- fprintf(stderr, "Qgroup is already inconsistent before checking\n");
+ fprintf(stderr, "Qgroup are marked as inconsistent.\n");
node = rb_first(&counts.root);
while (node) {
c = rb_entry(node, struct qgroup_count, rb_node);
- ret |= report_qgroup_difference(c, all);
+
+ if (report_qgroup_difference(c, all))
+ list_add_tail(&c->bad_list, &bad_qgroups);
+
node = rb_next(node);
}
- return ret;
}
void free_qgroup_counts(void)
{
struct rb_node *node;
struct qgroup_count *c;
+ struct btrfs_qgroup_list *glist, *tmpglist;
+
node = rb_first(&counts.root);
while (node) {
c = rb_entry(node, struct qgroup_count, rb_node);
+
+ list_del(&c->bad_list);
+
+ list_for_each_entry_safe(glist, tmpglist, &c->groups,
+ next_group) {
+ list_del(&glist->next_group);
+ list_del(&glist->next_member);
+ free(glist);
+ }
+ list_for_each_entry_safe(glist, tmpglist, &c->members,
+ next_group) {
+ list_del(&glist->next_group);
+ list_del(&glist->next_member);
+ free(glist);
+ }
+
node = rb_next(node);
+
rb_erase(&c->rb_node, &counts.root);
free(c);
}
@@ -1148,7 +1369,7 @@ int qgroup_verify_all(struct btrfs_fs_info *info)
goto out;
}
- account_all_refs(1, 0);
+ ret = account_all_refs(1, 0);
out:
/*
@@ -1220,7 +1441,7 @@ int print_extent_state(struct btrfs_fs_info *info, u64 subvol)
}
printf("Offset\t\tLen\tRoot Refs\tRoots\n");
- account_all_refs(0, subvol);
+ ret = account_all_refs(0, subvol);
out:
free_tree_blocks();
@@ -1228,3 +1449,150 @@ out:
return ret;
}
+static int repair_qgroup_info(struct btrfs_fs_info *info,
+ struct qgroup_count *count)
+{
+ int ret;
+ struct btrfs_root *root = info->quota_root;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ struct btrfs_qgroup_info_item *info_item;
+ struct btrfs_key key;
+
+ printf("Repair qgroup %llu/%llu\n", btrfs_qgroup_level(count->qgroupid),
+ btrfs_qgroup_subvid(count->qgroupid));
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
+ key.objectid = 0;
+ key.type = BTRFS_QGROUP_INFO_KEY;
+ key.offset = count->qgroupid;
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret) {
+ error("Could not find disk item for qgroup %llu/%llu.\n",
+ btrfs_qgroup_level(count->qgroupid),
+ btrfs_qgroup_subvid(count->qgroupid));
+ if (ret > 0)
+ ret = -ENOENT;
+ goto out;
+ }
+
+ info_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_qgroup_info_item);
+
+ btrfs_set_qgroup_info_generation(path->nodes[0], info_item,
+ trans->transid);
+
+ btrfs_set_qgroup_info_referenced(path->nodes[0], info_item,
+ count->info.referenced);
+ btrfs_set_qgroup_info_referenced_compressed(path->nodes[0], info_item,
+ count->info.referenced_compressed);
+
+ btrfs_set_qgroup_info_exclusive(path->nodes[0], info_item,
+ count->info.exclusive);
+ btrfs_set_qgroup_info_exclusive_compressed(path->nodes[0], info_item,
+ count->info.exclusive_compressed);
+
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+
+out:
+ btrfs_commit_transaction(trans, root);
+ btrfs_free_path(path);
+
+ return ret;
+}
+
+static int repair_qgroup_status(struct btrfs_fs_info *info)
+{
+ int ret;
+ struct btrfs_root *root = info->quota_root;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_qgroup_status_item *status_item;
+
+ printf("Repair qgroup status item\n");
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
+ key.objectid = 0;
+ key.type = BTRFS_QGROUP_STATUS_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret) {
+ error("Could not find qgroup status item\n");
+ if (ret > 0)
+ ret = -ENOENT;
+ goto out;
+ }
+
+ status_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_qgroup_status_item);
+ btrfs_set_qgroup_status_flags(path->nodes[0], status_item,
+ BTRFS_QGROUP_STATUS_FLAG_ON);
+ btrfs_set_qgroup_status_rescan(path->nodes[0], status_item, 0);
+ btrfs_set_qgroup_status_generation(path->nodes[0], status_item,
+ trans->transid);
+
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+
+out:
+ btrfs_commit_transaction(trans, root);
+ btrfs_free_path(path);
+
+ return ret;
+}
+
+int repair_qgroups(struct btrfs_fs_info *info, int *repaired)
+{
+ int ret;
+ struct qgroup_count *count, *tmpcount;
+
+ *repaired = 0;
+
+ if (!repair)
+ return 0;
+
+ list_for_each_entry_safe(count, tmpcount, &bad_qgroups, bad_list) {
+ ret = repair_qgroup_info(info, count);
+ if (ret) {
+ goto out;
+ }
+
+ (*repaired)++;
+
+ list_del_init(&count->bad_list);
+ }
+
+ /*
+ * Do this step last as we want the latest transaction id on
+ * our qgroup status to avoid a (useless) warning after
+ * mount.
+ */
+ if (*repaired || counts.qgroup_inconsist || counts.rescan_running) {
+ ret = repair_qgroup_status(info);
+ if (ret)
+ goto out;
+
+ (*repaired)++;
+ }
+
+out:
+ return ret;
+}
diff --git a/qgroup-verify.h b/qgroup-verify.h
index 0f8ff9b3..d7d83a46 100644
--- a/qgroup-verify.h
+++ b/qgroup-verify.h
@@ -23,7 +23,8 @@
#include "ctree.h"
int qgroup_verify_all(struct btrfs_fs_info *info);
-int report_qgroups(int all);
+void report_qgroups(int all);
+int repair_qgroups(struct btrfs_fs_info *info, int *repaired);
int print_extent_state(struct btrfs_fs_info *info, u64 subvol);
diff --git a/raid6.c b/raid6.c
index a6ee483f..833df5f3 100644
--- a/raid6.c
+++ b/raid6.c
@@ -11,6 +11,10 @@
* ----------------------------------------------------------------------- */
/*
+ * Added helpers for unaligned native int access
+ */
+
+/*
* raid6int1.c
*
* 1-way unrolled portable integer math RAID-6 instruction set
@@ -33,11 +37,15 @@
# define NSIZE 8
# define NSHIFT 3
typedef uint64_t unative_t;
+#define put_unaligned_native(val,p) put_unaligned_64((val),(p))
+#define get_unaligned_native(p) get_unaligned_64((p))
#else
# define NBYTES(x) ((x) * 0x01010101U)
# define NSIZE 4
# define NSHIFT 2
typedef uint32_t unative_t;
+#define put_unaligned_native(val,p) put_unaligned_32((val),(p))
+#define get_unaligned_native(p) get_unaligned_32((p))
#endif
/*
@@ -84,9 +92,9 @@ void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs)
q = dptr[z0+2]; /* RS syndrome */
for ( d = 0 ; d < bytes ; d += NSIZE*1 ) {
- wq0 = wp0 = *(unative_t *)&dptr[z0][d+0*NSIZE];
+ wq0 = wp0 = get_unaligned_native(&dptr[z0][d+0*NSIZE]);
for ( z = z0-1 ; z >= 0 ; z-- ) {
- wd0 = *(unative_t *)&dptr[z][d+0*NSIZE];
+ wd0 = get_unaligned_native(&dptr[z][d+0*NSIZE]);
wp0 ^= wd0;
w20 = MASK(wq0);
w10 = SHLBYTE(wq0);
@@ -94,8 +102,8 @@ void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs)
w10 ^= w20;
wq0 = w10 ^ wd0;
}
- *(unative_t *)&p[d+NSIZE*0] = wp0;
- *(unative_t *)&q[d+NSIZE*0] = wq0;
+ put_unaligned_native(wp0, &p[d+NSIZE*0]);
+ put_unaligned_native(wq0, &q[d+NSIZE*0]);
}
}
diff --git a/repair.c b/repair.c
index 4f747421..07a1232e 100644
--- a/repair.c
+++ b/repair.c
@@ -21,6 +21,8 @@
#include "utils.h"
#include "repair.h"
+int repair = 0;
+
int btrfs_add_corrupt_extent_record(struct btrfs_fs_info *info,
struct btrfs_key *first_key,
u64 start, u64 len, int level)
diff --git a/repair.h b/repair.h
index 3fc0e8b6..355bbf2c 100644
--- a/repair.h
+++ b/repair.h
@@ -21,6 +21,8 @@
#include "ctree.h"
+extern int repair; /* repair mode */
+
struct btrfs_corrupt_block {
struct cache_extent cache;
struct btrfs_key key;
diff --git a/tests/clean-tests.sh b/tests/clean-tests.sh
index f7fefdda..7f18e6f0 100755
--- a/tests/clean-tests.sh
+++ b/tests/clean-tests.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# remove all intermediate files from tests
SCRIPT_DIR=$(dirname $(readlink -f $0))
diff --git a/tests/convert-tests/006-large-hole-extent/test.sh b/tests/convert-tests/006-large-hole-extent/test.sh
new file mode 100755
index 00000000..d3bc093c
--- /dev/null
+++ b/tests/convert-tests/006-large-hole-extent/test.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# Create a base image with large hole extent, then convert to btrfs,
+# check the converted image.
+# Check if btrfs-convert can handle such large hole.
+# Fast pinpoint regression test. No options combination nor checksum
+# verification
+
+source $TOP/tests/common
+source $TOP/tests/common.convert
+
+setup_root_helper
+prepare_test_dev 512M
+check_prereq btrfs-convert
+
+default_mke2fs="mke2fs -t ext4 -b 4096"
+convert_test_preamble '' 'large hole extent test' 16k "$default_mke2fs"
+convert_test_prep_fs $default_mke2fs
+
+run_check $SUDO_HELPER dd if=/dev/zero of=$TEST_MNT/file bs=1M \
+ count=1 seek=1024 > /dev/null 2>&1
+
+run_check_umount_test_dev
+convert_test_do_convert
diff --git a/tests/convert-tests/007-unsupported-block-sizes/test.sh b/tests/convert-tests/007-unsupported-block-sizes/test.sh
new file mode 100755
index 00000000..9ba17751
--- /dev/null
+++ b/tests/convert-tests/007-unsupported-block-sizes/test.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# Check if block sizes smaller than 4k expectedly fail to convert
+
+source $TOP/tests/common
+source $TOP/tests/common.convert
+
+setup_root_helper
+prepare_test_dev 512M
+check_prereq btrfs-convert
+
+for bs in 1024 2048; do
+ default_mke2fs="mke2fs -t ext4 -b $bs"
+ convert_test_preamble '' "unsupported block size $bs" 16k "$default_mke2fs"
+ convert_test_prep_fs $default_mke2fs
+
+ run_check $SUDO_HELPER dd if=/dev/zero of=$TEST_MNT/file bs=1M \
+ count=1 seek=1024 > /dev/null 2>&1
+
+ run_check_umount_test_dev
+ run_mustfail "$bs block converted" $TOP/btrfs-convert $TEST_DEV
+done
diff --git a/tests/fuzz-tests/001-simple-unmounted/test.sh b/tests/fuzz-tests/001-simple-unmounted/test.sh
index bf01a3a4..98fe7b0c 100755
--- a/tests/fuzz-tests/001-simple-unmounted/test.sh
+++ b/tests/fuzz-tests/001-simple-unmounted/test.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# iterate over all fuzzed images and run 'btrfs check'
diff --git a/tests/misc-tests/006-image-on-missing-device/test.sh b/tests/misc-tests/006-image-on-missing-device/test.sh
index 8680a707..b22a95d7 100755
--- a/tests/misc-tests/006-image-on-missing-device/test.sh
+++ b/tests/misc-tests/006-image-on-missing-device/test.sh
@@ -61,12 +61,12 @@ test_run()
run_check $SUDO_HELPER umount $TEST_MNT
test_image_dump
- run_check btrfs fi show $dev1
+ run_check $TOP/btrfs filesystem show $dev1
# create a degraded raid1 filesystem, check must succeed
# btrfs-image must not loop
run_mayfail wipefs -a $dev2
run_check $SUDO_HELPER losetup -d $dev2
- run_check btrfs fi show $dev1
+ run_check $TOP/btrfs filesystem show $dev1
test_image_dump
}
diff --git a/tests/test-console.sh b/tests/test-console.sh
index cc1cdf3c..365cc971 100755
--- a/tests/test-console.sh
+++ b/tests/test-console.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# a shell with test environment set up, logged commands and output
LANG=C
diff --git a/utils.c b/utils.c
index 578fdb04..79bf9466 100644
--- a/utils.c
+++ b/utils.c
@@ -1682,8 +1682,8 @@ out:
return ret;
}
-int btrfs_prepare_device(int fd, const char *file, int zero_end,
- u64 *block_count_ret, u64 max_block_count, int discard)
+int btrfs_prepare_device(int fd, const char *file, u64 *block_count_ret,
+ u64 max_block_count, unsigned opflags)
{
u64 block_count;
struct stat st;
@@ -1703,15 +1703,16 @@ int btrfs_prepare_device(int fd, const char *file, int zero_end,
if (max_block_count)
block_count = min(block_count, max_block_count);
- if (discard) {
+ if (opflags & PREP_DEVICE_DISCARD) {
/*
* We intentionally ignore errors from the discard ioctl. It
* is not necessary for the mkfs functionality but just an
* optimization.
*/
if (discard_range(fd, 0, 0) == 0) {
- printf("Performing full device TRIM (%s) ...\n",
- pretty_size(block_count));
+ if (opflags & PREP_DEVICE_VERBOSE)
+ printf("Performing full device TRIM (%s) ...\n",
+ pretty_size(block_count));
discard_blocks(fd, 0, block_count);
}
}
@@ -1720,7 +1721,7 @@ int btrfs_prepare_device(int fd, const char *file, int zero_end,
for (i = 0 ; !ret && i < BTRFS_SUPER_MIRROR_MAX; i++)
ret = zero_dev_clamped(fd, btrfs_sb_offset(i),
BTRFS_SUPER_INFO_SIZE, block_count);
- if (!ret && zero_end)
+ if (!ret && (opflags & PREP_DEVICE_ZERO_END))
ret = zero_dev_clamped(fd, block_count - ZERO_DEV_BYTES,
ZERO_DEV_BYTES, block_count);
@@ -2815,6 +2816,8 @@ path:
if (fd < 0)
goto err;
ret = lookup_ino_rootid(fd, &id);
+ if (ret)
+ error("failed to lookup root id: %s", strerror(-ret));
close(fd);
if (ret < 0)
goto err;
@@ -3497,10 +3500,8 @@ int lookup_ino_rootid(int fd, u64 *rootid)
args.objectid = BTRFS_FIRST_FREE_OBJECTID;
ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
- if (ret < 0) {
- error("failed to lookup root id: %s", strerror(errno));
- return ret;
- }
+ if (ret < 0)
+ return -errno;
*rootid = args.treeid;
diff --git a/utils.h b/utils.h
index 98bfb34d..58121ced 100644
--- a/utils.h
+++ b/utils.h
@@ -147,12 +147,16 @@ struct btrfs_convert_context {
void *fs_data;
};
+#define PREP_DEVICE_ZERO_END (1U << 0)
+#define PREP_DEVICE_DISCARD (1U << 1)
+#define PREP_DEVICE_VERBOSE (1U << 2)
+
int make_btrfs(int fd, struct btrfs_mkfs_config *cfg,
struct btrfs_convert_context *cctx);
int btrfs_make_root_dir(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 objectid);
-int btrfs_prepare_device(int fd, const char *file, int zero_end,
- u64 *block_count_ret, u64 max_block_count, int discard);
+int btrfs_prepare_device(int fd, const char *file, u64 *block_count_ret,
+ u64 max_block_count, unsigned opflags);
int btrfs_add_to_fsid(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int fd, char *path,
u64 block_count, u32 io_width, u32 io_align,
diff --git a/version.sh b/version.sh
index cdb847cc..ed2a0c73 100755
--- a/version.sh
+++ b/version.sh
@@ -6,7 +6,7 @@
# Copyright 2008, Oracle
# Released under the GNU GPLv2
-v="v4.6.1"
+v="v4.7"
opt=$1
diff --git a/volumes.c b/volumes.c
index ccfa732e..a356d740 100644
--- a/volumes.c
+++ b/volumes.c
@@ -227,6 +227,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int flags)
fd = open(device->name, flags);
if (fd < 0) {
ret = -errno;
+ error("cannot open device '%s': %s", device->name,
+ strerror(errno));
goto fail;
}
@@ -1759,6 +1761,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
map->stripes[i].dev = fill_missing_device(devid);
printf("warning, device %llu is missing\n",
(unsigned long long)devid);
+ list_add(&map->stripes[i].dev->dev_list,
+ &root->fs_info->fs_devices->devices);
}
}
@@ -2152,9 +2156,14 @@ int write_raid56_with_parity(struct btrfs_fs_info *info,
ebs[multi->num_stripes - 1] = p_eb;
memcpy(p_eb->data, ebs[0]->data, stripe_len);
for (j = 1; j < multi->num_stripes - 1; j++) {
- for (i = 0; i < stripe_len; i += sizeof(unsigned long)) {
- *(unsigned long *)(p_eb->data + i) ^=
- *(unsigned long *)(ebs[j]->data + i);
+ for (i = 0; i < stripe_len; i += sizeof(u64)) {
+ u64 p_eb_data;
+ u64 ebs_data;
+
+ p_eb_data = get_unaligned_64(p_eb->data + i);
+ ebs_data = get_unaligned_64(ebs[j]->data + i);
+ p_eb_data ^= ebs_data;
+ put_unaligned_64(p_eb_data, p_eb->data + i);
}
}
}